aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c10
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_super.c1
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c24
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/afs/volume.c7
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/block_dev.c20
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/ceph/addr.c62
-rw-r--r--fs/ceph/caps.c42
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/inode.c10
-rw-r--r--fs/ceph/messenger.c9
-rw-r--r--fs/ceph/osdmap.c180
-rw-r--r--fs/ceph/osdmap.h1
-rw-r--r--fs/ceph/rados.h6
-rw-r--r--fs/ceph/snap.c26
-rw-r--r--fs/ceph/super.h3
-rw-r--r--fs/cifs/cifs_fs_sb.h3
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/ecryptfs/crypto.c37
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h15
-rw-r--r--fs/ecryptfs/inode.c129
-rw-r--r--fs/ecryptfs/main.c10
-rw-r--r--fs/ecryptfs/mmap.c38
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/exofs/exofs.h2
-rw-r--r--fs/exofs/super.c8
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/inode.c3
-rw-r--r--fs/ext4/mballoc.c21
-rw-r--r--fs/ioctl.c92
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/jfs_dmap.c16
-rw-r--r--fs/jfs/jfs_dmap.h6
-rw-r--r--fs/jfs/jfs_inode.h1
-rw-r--r--fs/jfs/namei.c4
-rw-r--r--fs/jfs/resize.c6
-rw-r--r--fs/jfs/symlink.c14
-rw-r--r--fs/logfs/gc.c8
-rw-r--r--fs/logfs/journal.c29
-rw-r--r--fs/logfs/logfs.h15
-rw-r--r--fs/logfs/readwrite.c75
-rw-r--r--fs/logfs/segment.c8
-rw-r--r--fs/logfs/super.c11
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/nfs/client.c3
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/inode.c8
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/write.c44
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/quota/Kconfig8
-rw-r--r--fs/quota/dquot.c16
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/xattr.c19
-rw-r--r--fs/smbfs/inode.c8
-rw-r--r--fs/squashfs/block.c5
-rw-r--r--fs/squashfs/super.c4
-rw-r--r--fs/squashfs/zlib_wrapper.c3
-rw-r--r--fs/super.c8
-rw-r--r--fs/sync.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c4
-rw-r--r--fs/xfs/xfs_dfrag.c22
-rw-r--r--fs/xfs/xfs_log.c38
70 files changed, 753 insertions, 476 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 5c5bc8480070..f8b86e92cd66 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -238,6 +238,13 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
238 return ERR_PTR(-ENOMEM); 238 return ERR_PTR(-ENOMEM);
239 } 239 }
240 240
241 rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
242 if (rc) {
243 __putname(v9ses->aname);
244 __putname(v9ses->uname);
245 return ERR_PTR(rc);
246 }
247
241 spin_lock(&v9fs_sessionlist_lock); 248 spin_lock(&v9fs_sessionlist_lock);
242 list_add(&v9ses->slist, &v9fs_sessionlist); 249 list_add(&v9ses->slist, &v9fs_sessionlist);
243 spin_unlock(&v9fs_sessionlist_lock); 250 spin_unlock(&v9fs_sessionlist_lock);
@@ -301,6 +308,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
301 return fid; 308 return fid;
302 309
303error: 310error:
311 bdi_destroy(&v9ses->bdi);
304 return ERR_PTR(retval); 312 return ERR_PTR(retval);
305} 313}
306 314
@@ -326,6 +334,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
326 __putname(v9ses->uname); 334 __putname(v9ses->uname);
327 __putname(v9ses->aname); 335 __putname(v9ses->aname);
328 336
337 bdi_destroy(&v9ses->bdi);
338
329 spin_lock(&v9fs_sessionlist_lock); 339 spin_lock(&v9fs_sessionlist_lock);
330 list_del(&v9ses->slist); 340 list_del(&v9ses->slist);
331 spin_unlock(&v9fs_sessionlist_lock); 341 spin_unlock(&v9fs_sessionlist_lock);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a0a8d3dd1361..bec4d0bcb458 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,7 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#include <linux/backing-dev.h>
23 24
24/** 25/**
25 * enum p9_session_flags - option flags for each 9P session 26 * enum p9_session_flags - option flags for each 9P session
@@ -102,6 +103,7 @@ struct v9fs_session_info {
102 u32 uid; /* if ACCESS_SINGLE, the uid that has access */ 103 u32 uid; /* if ACCESS_SINGLE, the uid that has access */
103 struct p9_client *clnt; /* 9p client */ 104 struct p9_client *clnt; /* 9p client */
104 struct list_head slist; /* list of sessions registered with v9fs */ 105 struct list_head slist; /* list of sessions registered with v9fs */
106 struct backing_dev_info bdi;
105}; 107};
106 108
107struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 109struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 491108bd6e0d..806da5d3b3a0 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -77,6 +77,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
77 sb->s_blocksize = 1 << sb->s_blocksize_bits; 77 sb->s_blocksize = 1 << sb->s_blocksize_bits;
78 sb->s_magic = V9FS_MAGIC; 78 sb->s_magic = V9FS_MAGIC;
79 sb->s_op = &v9fs_super_ops; 79 sb->s_op = &v9fs_super_ops;
80 sb->s_bdi = &v9ses->bdi;
80 81
81 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 82 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
82 MS_NOATIME; 83 MS_NOATIME;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c54dad4e6063..a10f2582844f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -19,6 +19,7 @@
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/fscache.h> 21#include <linux/fscache.h>
22#include <linux/backing-dev.h>
22 23
23#include "afs.h" 24#include "afs.h"
24#include "afs_vl.h" 25#include "afs_vl.h"
@@ -313,6 +314,7 @@ struct afs_volume {
313 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ 314 unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
314 struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ 315 struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
315 struct rw_semaphore server_sem; /* lock for accessing current server */ 316 struct rw_semaphore server_sem; /* lock for accessing current server */
317 struct backing_dev_info bdi;
316}; 318};
317 319
318/* 320/*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 5e813a816ce4..b3feddc4f7d6 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -138,9 +138,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
138{ 138{
139 struct afs_super_info *super; 139 struct afs_super_info *super;
140 struct vfsmount *mnt; 140 struct vfsmount *mnt;
141 struct page *page = NULL; 141 struct page *page;
142 size_t size; 142 size_t size;
143 char *buf, *devname = NULL, *options = NULL; 143 char *buf, *devname, *options;
144 int ret; 144 int ret;
145 145
146 _enter("{%s}", mntpt->d_name.name); 146 _enter("{%s}", mntpt->d_name.name);
@@ -150,22 +150,22 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
150 ret = -EINVAL; 150 ret = -EINVAL;
151 size = mntpt->d_inode->i_size; 151 size = mntpt->d_inode->i_size;
152 if (size > PAGE_SIZE - 1) 152 if (size > PAGE_SIZE - 1)
153 goto error; 153 goto error_no_devname;
154 154
155 ret = -ENOMEM; 155 ret = -ENOMEM;
156 devname = (char *) get_zeroed_page(GFP_KERNEL); 156 devname = (char *) get_zeroed_page(GFP_KERNEL);
157 if (!devname) 157 if (!devname)
158 goto error; 158 goto error_no_devname;
159 159
160 options = (char *) get_zeroed_page(GFP_KERNEL); 160 options = (char *) get_zeroed_page(GFP_KERNEL);
161 if (!options) 161 if (!options)
162 goto error; 162 goto error_no_options;
163 163
164 /* read the contents of the AFS special symlink */ 164 /* read the contents of the AFS special symlink */
165 page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); 165 page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
166 if (IS_ERR(page)) { 166 if (IS_ERR(page)) {
167 ret = PTR_ERR(page); 167 ret = PTR_ERR(page);
168 goto error; 168 goto error_no_page;
169 } 169 }
170 170
171 ret = -EIO; 171 ret = -EIO;
@@ -196,12 +196,12 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
196 return mnt; 196 return mnt;
197 197
198error: 198error:
199 if (page) 199 page_cache_release(page);
200 page_cache_release(page); 200error_no_page:
201 if (devname) 201 free_page((unsigned long) options);
202 free_page((unsigned long) devname); 202error_no_options:
203 if (options) 203 free_page((unsigned long) devname);
204 free_page((unsigned long) options); 204error_no_devname:
205 _leave(" = %d", ret); 205 _leave(" = %d", ret);
206 return ERR_PTR(ret); 206 return ERR_PTR(ret);
207} 207}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 14f6431598ad..e932e5a3a0c1 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -311,6 +311,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
311 sb->s_magic = AFS_FS_MAGIC; 311 sb->s_magic = AFS_FS_MAGIC;
312 sb->s_op = &afs_super_ops; 312 sb->s_op = &afs_super_ops;
313 sb->s_fs_info = as; 313 sb->s_fs_info = as;
314 sb->s_bdi = &as->volume->bdi;
314 315
315 /* allocate the root inode and dentry */ 316 /* allocate the root inode and dentry */
316 fid.vid = as->volume->vid; 317 fid.vid = as->volume->vid;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index a353e69e2391..401eeb21869f 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -106,6 +106,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
106 volume->cell = params->cell; 106 volume->cell = params->cell;
107 volume->vid = vlocation->vldb.vid[params->type]; 107 volume->vid = vlocation->vldb.vid[params->type];
108 108
109 ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY);
110 if (ret)
111 goto error_bdi;
112
109 init_rwsem(&volume->server_sem); 113 init_rwsem(&volume->server_sem);
110 114
111 /* look up all the applicable server records */ 115 /* look up all the applicable server records */
@@ -151,6 +155,8 @@ error:
151 return ERR_PTR(ret); 155 return ERR_PTR(ret);
152 156
153error_discard: 157error_discard:
158 bdi_destroy(&volume->bdi);
159error_bdi:
154 up_write(&params->cell->vl_sem); 160 up_write(&params->cell->vl_sem);
155 161
156 for (loop = volume->nservers - 1; loop >= 0; loop--) 162 for (loop = volume->nservers - 1; loop >= 0; loop--)
@@ -200,6 +206,7 @@ void afs_put_volume(struct afs_volume *volume)
200 for (loop = volume->nservers - 1; loop >= 0; loop--) 206 for (loop = volume->nservers - 1; loop >= 0; loop--)
201 afs_put_server(volume->servers[loop]); 207 afs_put_server(volume->servers[loop]);
202 208
209 bdi_destroy(&volume->bdi);
203 kfree(volume); 210 kfree(volume);
204 211
205 _leave(" [destroyed]"); 212 _leave(" [destroyed]");
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7ab23e006e4c..2c5f9a0e5d72 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1005,15 +1005,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
1005 } 1005 }
1006 } else if (!mm->start_data) { 1006 } else if (!mm->start_data) {
1007 mm->start_data = seg->addr; 1007 mm->start_data = seg->addr;
1008#ifndef CONFIG_MMU
1009 mm->end_data = seg->addr + phdr->p_memsz; 1008 mm->end_data = seg->addr + phdr->p_memsz;
1010#endif
1011 } 1009 }
1012
1013#ifdef CONFIG_MMU
1014 if (seg->addr + phdr->p_memsz > mm->end_data)
1015 mm->end_data = seg->addr + phdr->p_memsz;
1016#endif
1017 } 1010 }
1018 1011
1019 seg++; 1012 seg++;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e0e769bdca59..49566c1687d8 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -355,7 +355,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
355 355
356 if (!flat_reloc_valid(r, start_brk - start_data + text_len)) { 356 if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
357 printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)", 357 printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)",
358 (int) r,(int)(start_brk-start_code),(int)text_len); 358 (int) r,(int)(start_brk-start_data+text_len),(int)text_len);
359 goto failed; 359 goto failed;
360 } 360 }
361 361
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dd769304382e..55dcb7884f4d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -406,17 +406,23 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
406 406
407int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) 407int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
408{ 408{
409 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 409 struct inode *bd_inode = filp->f_mapping->host;
410 struct block_device *bdev = I_BDEV(bd_inode);
410 int error; 411 int error;
411 412
412 error = sync_blockdev(bdev); 413 /*
413 if (error) 414 * There is no need to serialise calls to blkdev_issue_flush with
414 return error; 415 * i_mutex and doing so causes performance issues with concurrent
415 416 * O_SYNC writers to a block device.
416 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, 417 */
417 (BLKDEV_IFL_WAIT)); 418 mutex_unlock(&bd_inode->i_mutex);
419
420 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT);
418 if (error == -EOPNOTSUPP) 421 if (error == -EOPNOTSUPP)
419 error = 0; 422 error = 0;
423
424 mutex_lock(&bd_inode->i_mutex);
425
420 return error; 426 return error;
421} 427}
422EXPORT_SYMBOL(blkdev_fsync); 428EXPORT_SYMBOL(blkdev_fsync);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7b8f2c89ccb..feca04197d02 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops;
44static void end_workqueue_fn(struct btrfs_work *work); 44static void end_workqueue_fn(struct btrfs_work *work);
45static void free_fs_root(struct btrfs_root *root); 45static void free_fs_root(struct btrfs_root *root);
46 46
47static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
48
49/* 47/*
50 * end_io_wq structs are used to do processing in task context when an IO is 48 * end_io_wq structs are used to do processing in task context when an IO is
51 * complete. This is used during reads to verify checksums, and it is used 49 * complete. This is used during reads to verify checksums, and it is used
@@ -1375,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1375{ 1373{
1376 int err; 1374 int err;
1377 1375
1378 bdi->name = "btrfs";
1379 bdi->capabilities = BDI_CAP_MAP_COPY; 1376 bdi->capabilities = BDI_CAP_MAP_COPY;
1380 err = bdi_init(bdi); 1377 err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
1381 if (err) 1378 if (err)
1382 return err; 1379 return err;
1383 1380
1384 err = bdi_register(bdi, NULL, "btrfs-%d",
1385 atomic_inc_return(&btrfs_bdi_num));
1386 if (err) {
1387 bdi_destroy(bdi);
1388 return err;
1389 }
1390
1391 bdi->ra_pages = default_backing_dev_info.ra_pages; 1381 bdi->ra_pages = default_backing_dev_info.ra_pages;
1392 bdi->unplug_io_fn = btrfs_unplug_io_fn; 1382 bdi->unplug_io_fn = btrfs_unplug_io_fn;
1393 bdi->unplug_io_data = info; 1383 bdi->unplug_io_data = info;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index aa3cd7cc3e40..412593703d1e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -337,16 +337,15 @@ out:
337/* 337/*
338 * Get ref for the oldest snapc for an inode with dirty data... that is, the 338 * Get ref for the oldest snapc for an inode with dirty data... that is, the
339 * only snap context we are allowed to write back. 339 * only snap context we are allowed to write back.
340 *
341 * Caller holds i_lock.
342 */ 340 */
343static struct ceph_snap_context *__get_oldest_context(struct inode *inode, 341static struct ceph_snap_context *get_oldest_context(struct inode *inode,
344 u64 *snap_size) 342 u64 *snap_size)
345{ 343{
346 struct ceph_inode_info *ci = ceph_inode(inode); 344 struct ceph_inode_info *ci = ceph_inode(inode);
347 struct ceph_snap_context *snapc = NULL; 345 struct ceph_snap_context *snapc = NULL;
348 struct ceph_cap_snap *capsnap = NULL; 346 struct ceph_cap_snap *capsnap = NULL;
349 347
348 spin_lock(&inode->i_lock);
350 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 349 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
351 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 350 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
352 capsnap->context, capsnap->dirty_pages); 351 capsnap->context, capsnap->dirty_pages);
@@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
357 break; 356 break;
358 } 357 }
359 } 358 }
360 if (!snapc && ci->i_snap_realm) { 359 if (!snapc && ci->i_head_snapc) {
361 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); 360 snapc = ceph_get_snap_context(ci->i_head_snapc);
362 dout(" head snapc %p has %d dirty pages\n", 361 dout(" head snapc %p has %d dirty pages\n",
363 snapc, ci->i_wrbuffer_ref_head); 362 snapc, ci->i_wrbuffer_ref_head);
364 } 363 }
365 return snapc;
366}
367
368static struct ceph_snap_context *get_oldest_context(struct inode *inode,
369 u64 *snap_size)
370{
371 struct ceph_snap_context *snapc = NULL;
372
373 spin_lock(&inode->i_lock);
374 snapc = __get_oldest_context(inode, snap_size);
375 spin_unlock(&inode->i_lock); 364 spin_unlock(&inode->i_lock);
376 return snapc; 365 return snapc;
377} 366}
@@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
392 int len = PAGE_CACHE_SIZE; 381 int len = PAGE_CACHE_SIZE;
393 loff_t i_size; 382 loff_t i_size;
394 int err = 0; 383 int err = 0;
395 struct ceph_snap_context *snapc; 384 struct ceph_snap_context *snapc, *oldest;
396 u64 snap_size = 0; 385 u64 snap_size = 0;
397 long writeback_stat; 386 long writeback_stat;
398 387
@@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
413 dout("writepage %p page %p not dirty?\n", inode, page); 402 dout("writepage %p page %p not dirty?\n", inode, page);
414 goto out; 403 goto out;
415 } 404 }
416 if (snapc != get_oldest_context(inode, &snap_size)) { 405 oldest = get_oldest_context(inode, &snap_size);
406 if (snapc->seq > oldest->seq) {
417 dout("writepage %p page %p snapc %p not writeable - noop\n", 407 dout("writepage %p page %p snapc %p not writeable - noop\n",
418 inode, page, (void *)page->private); 408 inode, page, (void *)page->private);
419 /* we should only noop if called by kswapd */ 409 /* we should only noop if called by kswapd */
420 WARN_ON((current->flags & PF_MEMALLOC) == 0); 410 WARN_ON((current->flags & PF_MEMALLOC) == 0);
411 ceph_put_snap_context(oldest);
421 goto out; 412 goto out;
422 } 413 }
414 ceph_put_snap_context(oldest);
423 415
424 /* is this a partial page at end of file? */ 416 /* is this a partial page at end of file? */
425 if (snap_size) 417 if (snap_size)
@@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
458 ClearPagePrivate(page); 450 ClearPagePrivate(page);
459 end_page_writeback(page); 451 end_page_writeback(page);
460 ceph_put_wrbuffer_cap_refs(ci, 1, snapc); 452 ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
461 ceph_put_snap_context(snapc); 453 ceph_put_snap_context(snapc); /* page's reference */
462out: 454out:
463 return err; 455 return err;
464} 456}
@@ -558,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req,
558 dout("inode %p skipping page %p\n", inode, page); 550 dout("inode %p skipping page %p\n", inode, page);
559 wbc->pages_skipped++; 551 wbc->pages_skipped++;
560 } 552 }
553 ceph_put_snap_context((void *)page->private);
561 page->private = 0; 554 page->private = 0;
562 ClearPagePrivate(page); 555 ClearPagePrivate(page);
563 ceph_put_snap_context(snapc);
564 dout("unlocking %d %p\n", i, page); 556 dout("unlocking %d %p\n", i, page);
565 end_page_writeback(page); 557 end_page_writeback(page);
566 558
@@ -618,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping,
618 int range_whole = 0; 610 int range_whole = 0;
619 int should_loop = 1; 611 int should_loop = 1;
620 pgoff_t max_pages = 0, max_pages_ever = 0; 612 pgoff_t max_pages = 0, max_pages_ever = 0;
621 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; 613 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
622 struct pagevec pvec; 614 struct pagevec pvec;
623 int done = 0; 615 int done = 0;
624 int rc = 0; 616 int rc = 0;
@@ -770,9 +762,10 @@ get_more_pages:
770 } 762 }
771 763
772 /* only if matching snap context */ 764 /* only if matching snap context */
773 if (snapc != (void *)page->private) { 765 pgsnapc = (void *)page->private;
774 dout("page snapc %p != oldest %p\n", 766 if (pgsnapc->seq > snapc->seq) {
775 (void *)page->private, snapc); 767 dout("page snapc %p %lld > oldest %p %lld\n",
768 pgsnapc, pgsnapc->seq, snapc, snapc->seq);
776 unlock_page(page); 769 unlock_page(page);
777 if (!locked_pages) 770 if (!locked_pages)
778 continue; /* keep looking for snap */ 771 continue; /* keep looking for snap */
@@ -914,7 +907,10 @@ static int context_is_writeable_or_written(struct inode *inode,
914 struct ceph_snap_context *snapc) 907 struct ceph_snap_context *snapc)
915{ 908{
916 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); 909 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
917 return !oldest || snapc->seq <= oldest->seq; 910 int ret = !oldest || snapc->seq <= oldest->seq;
911
912 ceph_put_snap_context(oldest);
913 return ret;
918} 914}
919 915
920/* 916/*
@@ -936,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file,
936 int pos_in_page = pos & ~PAGE_CACHE_MASK; 932 int pos_in_page = pos & ~PAGE_CACHE_MASK;
937 int end_in_page = pos_in_page + len; 933 int end_in_page = pos_in_page + len;
938 loff_t i_size; 934 loff_t i_size;
939 struct ceph_snap_context *snapc;
940 int r; 935 int r;
936 struct ceph_snap_context *snapc, *oldest;
941 937
942retry_locked: 938retry_locked:
943 /* writepages currently holds page lock, but if we change that later, */ 939 /* writepages currently holds page lock, but if we change that later, */
@@ -947,23 +943,24 @@ retry_locked:
947 BUG_ON(!ci->i_snap_realm); 943 BUG_ON(!ci->i_snap_realm);
948 down_read(&mdsc->snap_rwsem); 944 down_read(&mdsc->snap_rwsem);
949 BUG_ON(!ci->i_snap_realm->cached_context); 945 BUG_ON(!ci->i_snap_realm->cached_context);
950 if (page->private && 946 snapc = (void *)page->private;
951 (void *)page->private != ci->i_snap_realm->cached_context) { 947 if (snapc && snapc != ci->i_head_snapc) {
952 /* 948 /*
953 * this page is already dirty in another (older) snap 949 * this page is already dirty in another (older) snap
954 * context! is it writeable now? 950 * context! is it writeable now?
955 */ 951 */
956 snapc = get_oldest_context(inode, NULL); 952 oldest = get_oldest_context(inode, NULL);
957 up_read(&mdsc->snap_rwsem); 953 up_read(&mdsc->snap_rwsem);
958 954
959 if (snapc != (void *)page->private) { 955 if (snapc->seq > oldest->seq) {
956 ceph_put_snap_context(oldest);
960 dout(" page %p snapc %p not current or oldest\n", 957 dout(" page %p snapc %p not current or oldest\n",
961 page, (void *)page->private); 958 page, snapc);
962 /* 959 /*
963 * queue for writeback, and wait for snapc to 960 * queue for writeback, and wait for snapc to
964 * be writeable or written 961 * be writeable or written
965 */ 962 */
966 snapc = ceph_get_snap_context((void *)page->private); 963 snapc = ceph_get_snap_context(snapc);
967 unlock_page(page); 964 unlock_page(page);
968 ceph_queue_writeback(inode); 965 ceph_queue_writeback(inode);
969 r = wait_event_interruptible(ci->i_cap_wq, 966 r = wait_event_interruptible(ci->i_cap_wq,
@@ -973,6 +970,7 @@ retry_locked:
973 return r; 970 return r;
974 return -EAGAIN; 971 return -EAGAIN;
975 } 972 }
973 ceph_put_snap_context(oldest);
976 974
977 /* yay, writeable, do it now (without dropping page lock) */ 975 /* yay, writeable, do it now (without dropping page lock) */
978 dout(" page %p snapc %p not current, but oldest\n", 976 dout(" page %p snapc %p not current, but oldest\n",
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 3710e077a857..aa2239fa9a3b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1205,6 +1205,12 @@ retry:
1205 if (capsnap->dirty_pages || capsnap->writing) 1205 if (capsnap->dirty_pages || capsnap->writing)
1206 continue; 1206 continue;
1207 1207
1208 /*
1209 * if cap writeback already occurred, we should have dropped
1210 * the capsnap in ceph_put_wrbuffer_cap_refs.
1211 */
1212 BUG_ON(capsnap->dirty == 0);
1213
1208 /* pick mds, take s_mutex */ 1214 /* pick mds, take s_mutex */
1209 mds = __ceph_get_cap_mds(ci, &mseq); 1215 mds = __ceph_get_cap_mds(ci, &mseq);
1210 if (session && session->s_mds != mds) { 1216 if (session && session->s_mds != mds) {
@@ -2118,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2118 } 2124 }
2119 spin_unlock(&inode->i_lock); 2125 spin_unlock(&inode->i_lock);
2120 2126
2121 dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had), 2127 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
2122 last ? "last" : ""); 2128 last ? " last" : "", put ? " put" : "");
2123 2129
2124 if (last && !flushsnaps) 2130 if (last && !flushsnaps)
2125 ceph_check_caps(ci, 0, NULL); 2131 ceph_check_caps(ci, 0, NULL);
@@ -2143,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2143{ 2149{
2144 struct inode *inode = &ci->vfs_inode; 2150 struct inode *inode = &ci->vfs_inode;
2145 int last = 0; 2151 int last = 0;
2146 int last_snap = 0; 2152 int complete_capsnap = 0;
2153 int drop_capsnap = 0;
2147 int found = 0; 2154 int found = 0;
2148 struct ceph_cap_snap *capsnap = NULL; 2155 struct ceph_cap_snap *capsnap = NULL;
2149 2156
@@ -2166,19 +2173,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2166 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 2173 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
2167 if (capsnap->context == snapc) { 2174 if (capsnap->context == snapc) {
2168 found = 1; 2175 found = 1;
2169 capsnap->dirty_pages -= nr;
2170 last_snap = !capsnap->dirty_pages;
2171 break; 2176 break;
2172 } 2177 }
2173 } 2178 }
2174 BUG_ON(!found); 2179 BUG_ON(!found);
2180 capsnap->dirty_pages -= nr;
2181 if (capsnap->dirty_pages == 0) {
2182 complete_capsnap = 1;
2183 if (capsnap->dirty == 0)
2184 /* cap writeback completed before we created
2185 * the cap_snap; no FLUSHSNAP is needed */
2186 drop_capsnap = 1;
2187 }
2175 dout("put_wrbuffer_cap_refs on %p cap_snap %p " 2188 dout("put_wrbuffer_cap_refs on %p cap_snap %p "
2176 " snap %lld %d/%d -> %d/%d %s%s\n", 2189 " snap %lld %d/%d -> %d/%d %s%s%s\n",
2177 inode, capsnap, capsnap->context->seq, 2190 inode, capsnap, capsnap->context->seq,
2178 ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, 2191 ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
2179 ci->i_wrbuffer_ref, capsnap->dirty_pages, 2192 ci->i_wrbuffer_ref, capsnap->dirty_pages,
2180 last ? " (wrbuffer last)" : "", 2193 last ? " (wrbuffer last)" : "",
2181 last_snap ? " (capsnap last)" : ""); 2194 complete_capsnap ? " (complete capsnap)" : "",
2195 drop_capsnap ? " (drop capsnap)" : "");
2196 if (drop_capsnap) {
2197 ceph_put_snap_context(capsnap->context);
2198 list_del(&capsnap->ci_item);
2199 list_del(&capsnap->flushing_item);
2200 ceph_put_cap_snap(capsnap);
2201 }
2182 } 2202 }
2183 2203
2184 spin_unlock(&inode->i_lock); 2204 spin_unlock(&inode->i_lock);
@@ -2186,10 +2206,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2186 if (last) { 2206 if (last) {
2187 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2207 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
2188 iput(inode); 2208 iput(inode);
2189 } else if (last_snap) { 2209 } else if (complete_capsnap) {
2190 ceph_flush_snaps(ci); 2210 ceph_flush_snaps(ci);
2191 wake_up(&ci->i_cap_wq); 2211 wake_up(&ci->i_cap_wq);
2192 } 2212 }
2213 if (drop_capsnap)
2214 iput(inode);
2193} 2215}
2194 2216
2195/* 2217/*
@@ -2465,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2465 break; 2487 break;
2466 } 2488 }
2467 WARN_ON(capsnap->dirty_pages || capsnap->writing); 2489 WARN_ON(capsnap->dirty_pages || capsnap->writing);
2468 dout(" removing cap_snap %p follows %lld\n", 2490 dout(" removing %p cap_snap %p follows %lld\n",
2469 capsnap, follows); 2491 inode, capsnap, follows);
2470 ceph_put_snap_context(capsnap->context); 2492 ceph_put_snap_context(capsnap->context);
2471 list_del(&capsnap->ci_item); 2493 list_del(&capsnap->ci_item);
2472 list_del(&capsnap->flushing_item); 2494 list_del(&capsnap->flushing_item);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 7261dc6c2ead..ea8ee2e526aa 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -171,11 +171,11 @@ more:
171 spin_lock(&inode->i_lock); 171 spin_lock(&inode->i_lock);
172 spin_lock(&dcache_lock); 172 spin_lock(&dcache_lock);
173 173
174 last = dentry;
175
174 if (err < 0) 176 if (err < 0)
175 goto out_unlock; 177 goto out_unlock;
176 178
177 last = dentry;
178
179 p = p->prev; 179 p = p->prev;
180 filp->f_pos++; 180 filp->f_pos++;
181 181
@@ -312,7 +312,7 @@ more:
312 req->r_readdir_offset = fi->next_offset; 312 req->r_readdir_offset = fi->next_offset;
313 req->r_args.readdir.frag = cpu_to_le32(frag); 313 req->r_args.readdir.frag = cpu_to_le32(frag);
314 req->r_args.readdir.max_entries = cpu_to_le32(max_entries); 314 req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
315 req->r_num_caps = max_entries; 315 req->r_num_caps = max_entries + 1;
316 err = ceph_mdsc_do_request(mdsc, NULL, req); 316 err = ceph_mdsc_do_request(mdsc, NULL, req);
317 if (err < 0) { 317 if (err < 0) {
318 ceph_mdsc_put_request(req); 318 ceph_mdsc_put_request(req);
@@ -489,6 +489,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
489 struct inode *inode = ceph_get_snapdir(parent); 489 struct inode *inode = ceph_get_snapdir(parent);
490 dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", 490 dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
491 dentry, dentry->d_name.len, dentry->d_name.name, inode); 491 dentry, dentry->d_name.len, dentry->d_name.name, inode);
492 BUG_ON(!d_unhashed(dentry));
492 d_add(dentry, inode); 493 d_add(dentry, inode);
493 err = 0; 494 err = 0;
494 } 495 }
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index aca82d55cc53..26f883c275e8 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -886,6 +886,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
886 struct inode *in = NULL; 886 struct inode *in = NULL;
887 struct ceph_mds_reply_inode *ininfo; 887 struct ceph_mds_reply_inode *ininfo;
888 struct ceph_vino vino; 888 struct ceph_vino vino;
889 struct ceph_client *client = ceph_sb_to_client(sb);
889 int i = 0; 890 int i = 0;
890 int err = 0; 891 int err = 0;
891 892
@@ -949,7 +950,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
949 return err; 950 return err;
950 } 951 }
951 952
952 if (rinfo->head->is_dentry && !req->r_aborted) { 953 /*
954 * ignore null lease/binding on snapdir ENOENT, or else we
955 * will have trouble splicing in the virtual snapdir later
956 */
957 if (rinfo->head->is_dentry && !req->r_aborted &&
958 (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
959 client->mount_args->snapdir_name,
960 req->r_dentry->d_name.len))) {
953 /* 961 /*
954 * lookup link rename : null -> possibly existing inode 962 * lookup link rename : null -> possibly existing inode
955 * mknod symlink mkdir : null -> new inode 963 * mknod symlink mkdir : null -> new inode
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 8f1715ffbe4b..cdaaa131add3 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -30,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG;
30static char tag_ack = CEPH_MSGR_TAG_ACK; 30static char tag_ack = CEPH_MSGR_TAG_ACK;
31static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 31static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
32 32
33#ifdef CONFIG_LOCKDEP
34static struct lock_class_key socket_class;
35#endif
36
33 37
34static void queue_con(struct ceph_connection *con); 38static void queue_con(struct ceph_connection *con);
35static void con_work(struct work_struct *); 39static void con_work(struct work_struct *);
@@ -228,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
228 con->sock = sock; 232 con->sock = sock;
229 sock->sk->sk_allocation = GFP_NOFS; 233 sock->sk->sk_allocation = GFP_NOFS;
230 234
235#ifdef CONFIG_LOCKDEP
236 lockdep_set_class(&sock->sk->sk_lock, &socket_class);
237#endif
238
231 set_sock_callbacks(sock, con); 239 set_sock_callbacks(sock, con);
232 240
233 dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); 241 dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
@@ -333,6 +341,7 @@ static void reset_connection(struct ceph_connection *con)
333 con->out_msg = NULL; 341 con->out_msg = NULL;
334 } 342 }
335 con->in_seq = 0; 343 con->in_seq = 0;
344 con->in_seq_acked = 0;
336} 345}
337 346
338/* 347/*
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 21c6623c4b07..2e2c15eed82a 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -314,71 +314,6 @@ bad:
314 return ERR_PTR(err); 314 return ERR_PTR(err);
315} 315}
316 316
317
318/*
319 * osd map
320 */
321void ceph_osdmap_destroy(struct ceph_osdmap *map)
322{
323 dout("osdmap_destroy %p\n", map);
324 if (map->crush)
325 crush_destroy(map->crush);
326 while (!RB_EMPTY_ROOT(&map->pg_temp)) {
327 struct ceph_pg_mapping *pg =
328 rb_entry(rb_first(&map->pg_temp),
329 struct ceph_pg_mapping, node);
330 rb_erase(&pg->node, &map->pg_temp);
331 kfree(pg);
332 }
333 while (!RB_EMPTY_ROOT(&map->pg_pools)) {
334 struct ceph_pg_pool_info *pi =
335 rb_entry(rb_first(&map->pg_pools),
336 struct ceph_pg_pool_info, node);
337 rb_erase(&pi->node, &map->pg_pools);
338 kfree(pi);
339 }
340 kfree(map->osd_state);
341 kfree(map->osd_weight);
342 kfree(map->osd_addr);
343 kfree(map);
344}
345
346/*
347 * adjust max osd value. reallocate arrays.
348 */
349static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
350{
351 u8 *state;
352 struct ceph_entity_addr *addr;
353 u32 *weight;
354
355 state = kcalloc(max, sizeof(*state), GFP_NOFS);
356 addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
357 weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
358 if (state == NULL || addr == NULL || weight == NULL) {
359 kfree(state);
360 kfree(addr);
361 kfree(weight);
362 return -ENOMEM;
363 }
364
365 /* copy old? */
366 if (map->osd_state) {
367 memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
368 memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
369 memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
370 kfree(map->osd_state);
371 kfree(map->osd_addr);
372 kfree(map->osd_weight);
373 }
374
375 map->osd_state = state;
376 map->osd_weight = weight;
377 map->osd_addr = addr;
378 map->max_osd = max;
379 return 0;
380}
381
382/* 317/*
383 * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid 318 * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
384 * to a set of osds) 319 * to a set of osds)
@@ -482,6 +417,13 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
482 return NULL; 417 return NULL;
483} 418}
484 419
420static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
421{
422 rb_erase(&pi->node, root);
423 kfree(pi->name);
424 kfree(pi);
425}
426
485void __decode_pool(void **p, struct ceph_pg_pool_info *pi) 427void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
486{ 428{
487 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 429 ceph_decode_copy(p, &pi->v, sizeof(pi->v));
@@ -490,6 +432,98 @@ void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
490 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; 432 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
491} 433}
492 434
435static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
436{
437 struct ceph_pg_pool_info *pi;
438 u32 num, len, pool;
439
440 ceph_decode_32_safe(p, end, num, bad);
441 dout(" %d pool names\n", num);
442 while (num--) {
443 ceph_decode_32_safe(p, end, pool, bad);
444 ceph_decode_32_safe(p, end, len, bad);
445 dout(" pool %d len %d\n", pool, len);
446 pi = __lookup_pg_pool(&map->pg_pools, pool);
447 if (pi) {
448 kfree(pi->name);
449 pi->name = kmalloc(len + 1, GFP_NOFS);
450 if (pi->name) {
451 memcpy(pi->name, *p, len);
452 pi->name[len] = '\0';
453 dout(" name is %s\n", pi->name);
454 }
455 }
456 *p += len;
457 }
458 return 0;
459
460bad:
461 return -EINVAL;
462}
463
464/*
465 * osd map
466 */
467void ceph_osdmap_destroy(struct ceph_osdmap *map)
468{
469 dout("osdmap_destroy %p\n", map);
470 if (map->crush)
471 crush_destroy(map->crush);
472 while (!RB_EMPTY_ROOT(&map->pg_temp)) {
473 struct ceph_pg_mapping *pg =
474 rb_entry(rb_first(&map->pg_temp),
475 struct ceph_pg_mapping, node);
476 rb_erase(&pg->node, &map->pg_temp);
477 kfree(pg);
478 }
479 while (!RB_EMPTY_ROOT(&map->pg_pools)) {
480 struct ceph_pg_pool_info *pi =
481 rb_entry(rb_first(&map->pg_pools),
482 struct ceph_pg_pool_info, node);
483 __remove_pg_pool(&map->pg_pools, pi);
484 }
485 kfree(map->osd_state);
486 kfree(map->osd_weight);
487 kfree(map->osd_addr);
488 kfree(map);
489}
490
491/*
492 * adjust max osd value. reallocate arrays.
493 */
494static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
495{
496 u8 *state;
497 struct ceph_entity_addr *addr;
498 u32 *weight;
499
500 state = kcalloc(max, sizeof(*state), GFP_NOFS);
501 addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
502 weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
503 if (state == NULL || addr == NULL || weight == NULL) {
504 kfree(state);
505 kfree(addr);
506 kfree(weight);
507 return -ENOMEM;
508 }
509
510 /* copy old? */
511 if (map->osd_state) {
512 memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
513 memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
514 memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
515 kfree(map->osd_state);
516 kfree(map->osd_addr);
517 kfree(map->osd_weight);
518 }
519
520 map->osd_state = state;
521 map->osd_weight = weight;
522 map->osd_addr = addr;
523 map->max_osd = max;
524 return 0;
525}
526
493/* 527/*
494 * decode a full map. 528 * decode a full map.
495 */ 529 */
@@ -526,7 +560,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
526 ceph_decode_32_safe(p, end, max, bad); 560 ceph_decode_32_safe(p, end, max, bad);
527 while (max--) { 561 while (max--) {
528 ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); 562 ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
529 pi = kmalloc(sizeof(*pi), GFP_NOFS); 563 pi = kzalloc(sizeof(*pi), GFP_NOFS);
530 if (!pi) 564 if (!pi)
531 goto bad; 565 goto bad;
532 pi->id = ceph_decode_32(p); 566 pi->id = ceph_decode_32(p);
@@ -539,6 +573,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
539 __decode_pool(p, pi); 573 __decode_pool(p, pi);
540 __insert_pg_pool(&map->pg_pools, pi); 574 __insert_pg_pool(&map->pg_pools, pi);
541 } 575 }
576
577 if (version >= 5 && __decode_pool_names(p, end, map) < 0)
578 goto bad;
579
542 ceph_decode_32_safe(p, end, map->pool_max, bad); 580 ceph_decode_32_safe(p, end, map->pool_max, bad);
543 581
544 ceph_decode_32_safe(p, end, map->flags, bad); 582 ceph_decode_32_safe(p, end, map->flags, bad);
@@ -712,7 +750,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
712 } 750 }
713 pi = __lookup_pg_pool(&map->pg_pools, pool); 751 pi = __lookup_pg_pool(&map->pg_pools, pool);
714 if (!pi) { 752 if (!pi) {
715 pi = kmalloc(sizeof(*pi), GFP_NOFS); 753 pi = kzalloc(sizeof(*pi), GFP_NOFS);
716 if (!pi) { 754 if (!pi) {
717 err = -ENOMEM; 755 err = -ENOMEM;
718 goto bad; 756 goto bad;
@@ -722,6 +760,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
722 } 760 }
723 __decode_pool(p, pi); 761 __decode_pool(p, pi);
724 } 762 }
763 if (version >= 5 && __decode_pool_names(p, end, map) < 0)
764 goto bad;
725 765
726 /* old_pool */ 766 /* old_pool */
727 ceph_decode_32_safe(p, end, len, bad); 767 ceph_decode_32_safe(p, end, len, bad);
@@ -730,10 +770,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
730 770
731 ceph_decode_32_safe(p, end, pool, bad); 771 ceph_decode_32_safe(p, end, pool, bad);
732 pi = __lookup_pg_pool(&map->pg_pools, pool); 772 pi = __lookup_pg_pool(&map->pg_pools, pool);
733 if (pi) { 773 if (pi)
734 rb_erase(&pi->node, &map->pg_pools); 774 __remove_pg_pool(&map->pg_pools, pi);
735 kfree(pi);
736 }
737 } 775 }
738 776
739 /* new_up */ 777 /* new_up */
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 1fb55afb2642..8bc9f1e4f562 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -23,6 +23,7 @@ struct ceph_pg_pool_info {
23 int id; 23 int id;
24 struct ceph_pg_pool v; 24 struct ceph_pg_pool v;
25 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; 25 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
26 char *name;
26}; 27};
27 28
28struct ceph_pg_mapping { 29struct ceph_pg_mapping {
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index 26ac8b89a676..a1fc1d017b58 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -11,8 +11,10 @@
11/* 11/*
12 * osdmap encoding versions 12 * osdmap encoding versions
13 */ 13 */
14#define CEPH_OSDMAP_INC_VERSION 4 14#define CEPH_OSDMAP_INC_VERSION 5
15#define CEPH_OSDMAP_VERSION 4 15#define CEPH_OSDMAP_INC_VERSION_EXT 5
16#define CEPH_OSDMAP_VERSION 5
17#define CEPH_OSDMAP_VERSION_EXT 5
16 18
17/* 19/*
18 * fs id 20 * fs id
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e6f9bc57d472..2b881262ef67 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -431,8 +431,7 @@ static int dup_array(u64 **dst, __le64 *src, int num)
431 * Caller must hold snap_rwsem for read (i.e., the realm topology won't 431 * Caller must hold snap_rwsem for read (i.e., the realm topology won't
432 * change). 432 * change).
433 */ 433 */
434void ceph_queue_cap_snap(struct ceph_inode_info *ci, 434void ceph_queue_cap_snap(struct ceph_inode_info *ci)
435 struct ceph_snap_context *snapc)
436{ 435{
437 struct inode *inode = &ci->vfs_inode; 436 struct inode *inode = &ci->vfs_inode;
438 struct ceph_cap_snap *capsnap; 437 struct ceph_cap_snap *capsnap;
@@ -451,10 +450,11 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
451 as no new writes are allowed to start when pending, so any 450 as no new writes are allowed to start when pending, so any
452 writes in progress now were started before the previous 451 writes in progress now were started before the previous
453 cap_snap. lucky us. */ 452 cap_snap. lucky us. */
454 dout("queue_cap_snap %p snapc %p seq %llu used %d" 453 dout("queue_cap_snap %p already pending\n", inode);
455 " already pending\n", inode, snapc, snapc->seq, used);
456 kfree(capsnap); 454 kfree(capsnap);
457 } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { 455 } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) {
456 struct ceph_snap_context *snapc = ci->i_head_snapc;
457
458 igrab(inode); 458 igrab(inode);
459 459
460 atomic_set(&capsnap->nref, 1); 460 atomic_set(&capsnap->nref, 1);
@@ -463,7 +463,6 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
463 INIT_LIST_HEAD(&capsnap->flushing_item); 463 INIT_LIST_HEAD(&capsnap->flushing_item);
464 464
465 capsnap->follows = snapc->seq - 1; 465 capsnap->follows = snapc->seq - 1;
466 capsnap->context = ceph_get_snap_context(snapc);
467 capsnap->issued = __ceph_caps_issued(ci, NULL); 466 capsnap->issued = __ceph_caps_issued(ci, NULL);
468 capsnap->dirty = __ceph_caps_dirty(ci); 467 capsnap->dirty = __ceph_caps_dirty(ci);
469 468
@@ -480,7 +479,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
480 snapshot. */ 479 snapshot. */
481 capsnap->dirty_pages = ci->i_wrbuffer_ref_head; 480 capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
482 ci->i_wrbuffer_ref_head = 0; 481 ci->i_wrbuffer_ref_head = 0;
483 ceph_put_snap_context(ci->i_head_snapc); 482 capsnap->context = snapc;
484 ci->i_head_snapc = NULL; 483 ci->i_head_snapc = NULL;
485 list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); 484 list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
486 485
@@ -522,15 +521,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
522 capsnap->ctime = inode->i_ctime; 521 capsnap->ctime = inode->i_ctime;
523 capsnap->time_warp_seq = ci->i_time_warp_seq; 522 capsnap->time_warp_seq = ci->i_time_warp_seq;
524 if (capsnap->dirty_pages) { 523 if (capsnap->dirty_pages) {
525 dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu " 524 dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
526 "still has %d dirty pages\n", inode, capsnap, 525 "still has %d dirty pages\n", inode, capsnap,
527 capsnap->context, capsnap->context->seq, 526 capsnap->context, capsnap->context->seq,
528 capsnap->size, capsnap->dirty_pages); 527 ceph_cap_string(capsnap->dirty), capsnap->size,
528 capsnap->dirty_pages);
529 return 0; 529 return 0;
530 } 530 }
531 dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu clean\n", 531 dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
532 inode, capsnap, capsnap->context, 532 inode, capsnap, capsnap->context,
533 capsnap->context->seq, capsnap->size); 533 capsnap->context->seq, ceph_cap_string(capsnap->dirty),
534 capsnap->size);
534 535
535 spin_lock(&mdsc->snap_flush_lock); 536 spin_lock(&mdsc->snap_flush_lock);
536 list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); 537 list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
@@ -602,7 +603,7 @@ more:
602 if (lastinode) 603 if (lastinode)
603 iput(lastinode); 604 iput(lastinode);
604 lastinode = inode; 605 lastinode = inode;
605 ceph_queue_cap_snap(ci, realm->cached_context); 606 ceph_queue_cap_snap(ci);
606 spin_lock(&realm->inodes_with_caps_lock); 607 spin_lock(&realm->inodes_with_caps_lock);
607 } 608 }
608 spin_unlock(&realm->inodes_with_caps_lock); 609 spin_unlock(&realm->inodes_with_caps_lock);
@@ -824,8 +825,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
824 spin_unlock(&realm->inodes_with_caps_lock); 825 spin_unlock(&realm->inodes_with_caps_lock);
825 spin_unlock(&inode->i_lock); 826 spin_unlock(&inode->i_lock);
826 827
827 ceph_queue_cap_snap(ci, 828 ceph_queue_cap_snap(ci);
828 ci->i_snap_realm->cached_context);
829 829
830 iput(inode); 830 iput(inode);
831 continue; 831 continue;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ca702c67bc66..e30dfbb056c3 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -715,8 +715,7 @@ extern int ceph_update_snap_trace(struct ceph_mds_client *m,
715extern void ceph_handle_snap(struct ceph_mds_client *mdsc, 715extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
716 struct ceph_mds_session *session, 716 struct ceph_mds_session *session,
717 struct ceph_msg *msg); 717 struct ceph_msg *msg);
718extern void ceph_queue_cap_snap(struct ceph_inode_info *ci, 718extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
719 struct ceph_snap_context *snapc);
720extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, 719extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
721 struct ceph_cap_snap *capsnap); 720 struct ceph_cap_snap *capsnap);
722extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); 721extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 4797787c6a44..246a167cb913 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -18,6 +18,8 @@
18#ifndef _CIFS_FS_SB_H 18#ifndef _CIFS_FS_SB_H
19#define _CIFS_FS_SB_H 19#define _CIFS_FS_SB_H
20 20
21#include <linux/backing-dev.h>
22
21#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */ 23#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */
22#define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */ 24#define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */
23#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */ 25#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */
@@ -50,5 +52,6 @@ struct cifs_sb_info {
50#ifdef CONFIG_CIFS_DFS_UPCALL 52#ifdef CONFIG_CIFS_DFS_UPCALL
51 char *mountdata; /* mount options received at mount time */ 53 char *mountdata; /* mount options received at mount time */
52#endif 54#endif
55 struct backing_dev_info bdi;
53}; 56};
54#endif /* _CIFS_FS_SB_H */ 57#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ded66be6597c..ad235d604a0b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -103,6 +103,12 @@ cifs_read_super(struct super_block *sb, void *data,
103 if (cifs_sb == NULL) 103 if (cifs_sb == NULL)
104 return -ENOMEM; 104 return -ENOMEM;
105 105
106 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
107 if (rc) {
108 kfree(cifs_sb);
109 return rc;
110 }
111
106#ifdef CONFIG_CIFS_DFS_UPCALL 112#ifdef CONFIG_CIFS_DFS_UPCALL
107 /* copy mount params to sb for use in submounts */ 113 /* copy mount params to sb for use in submounts */
108 /* BB: should we move this after the mount so we 114 /* BB: should we move this after the mount so we
@@ -115,6 +121,7 @@ cifs_read_super(struct super_block *sb, void *data,
115 int len = strlen(data); 121 int len = strlen(data);
116 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); 122 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
117 if (cifs_sb->mountdata == NULL) { 123 if (cifs_sb->mountdata == NULL) {
124 bdi_destroy(&cifs_sb->bdi);
118 kfree(sb->s_fs_info); 125 kfree(sb->s_fs_info);
119 sb->s_fs_info = NULL; 126 sb->s_fs_info = NULL;
120 return -ENOMEM; 127 return -ENOMEM;
@@ -135,6 +142,7 @@ cifs_read_super(struct super_block *sb, void *data,
135 142
136 sb->s_magic = CIFS_MAGIC_NUMBER; 143 sb->s_magic = CIFS_MAGIC_NUMBER;
137 sb->s_op = &cifs_super_ops; 144 sb->s_op = &cifs_super_ops;
145 sb->s_bdi = &cifs_sb->bdi;
138/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) 146/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512)
139 sb->s_blocksize = 147 sb->s_blocksize =
140 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ 148 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */
@@ -183,6 +191,7 @@ out_mount_failed:
183 } 191 }
184#endif 192#endif
185 unload_nls(cifs_sb->local_nls); 193 unload_nls(cifs_sb->local_nls);
194 bdi_destroy(&cifs_sb->bdi);
186 kfree(cifs_sb); 195 kfree(cifs_sb);
187 } 196 }
188 return rc; 197 return rc;
@@ -214,6 +223,7 @@ cifs_put_super(struct super_block *sb)
214#endif 223#endif
215 224
216 unload_nls(cifs_sb->local_nls); 225 unload_nls(cifs_sb->local_nls);
226 bdi_destroy(&cifs_sb->bdi);
217 kfree(cifs_sb); 227 kfree(cifs_sb);
218 228
219 unlock_kernel(); 229 unlock_kernel();
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index a1695dcadd99..d97f9935a028 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -167,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
167 return -EBUSY; 167 return -EBUSY;
168 } 168 }
169 169
170 error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY);
171 if (error)
172 goto bdi_err;
173
170 vc->vc_sb = sb; 174 vc->vc_sb = sb;
171 175
172 sb->s_fs_info = vc; 176 sb->s_fs_info = vc;
@@ -175,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
175 sb->s_blocksize_bits = 12; 179 sb->s_blocksize_bits = 12;
176 sb->s_magic = CODA_SUPER_MAGIC; 180 sb->s_magic = CODA_SUPER_MAGIC;
177 sb->s_op = &coda_super_operations; 181 sb->s_op = &coda_super_operations;
182 sb->s_bdi = &vc->bdi;
178 183
179 /* get root fid from Venus: this needs the root inode */ 184 /* get root fid from Venus: this needs the root inode */
180 error = venus_rootfid(sb, &fid); 185 error = venus_rootfid(sb, &fid);
@@ -200,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
200 return 0; 205 return 0;
201 206
202 error: 207 error:
208 bdi_destroy(&vc->bdi);
209 bdi_err:
203 if (root) 210 if (root)
204 iput(root); 211 iput(root);
205 if (vc) 212 if (vc)
@@ -210,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
210 217
211static void coda_put_super(struct super_block *sb) 218static void coda_put_super(struct super_block *sb)
212{ 219{
220 bdi_destroy(&coda_vcp(sb)->bdi);
213 coda_vcp(sb)->vc_sb = NULL; 221 coda_vcp(sb)->vc_sb = NULL;
214 sb->s_fs_info = NULL; 222 sb->s_fs_info = NULL;
215 223
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index efb2b9400391..1cc087635a5e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -382,8 +382,8 @@ out:
382static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, 382static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num,
383 struct ecryptfs_crypt_stat *crypt_stat) 383 struct ecryptfs_crypt_stat *crypt_stat)
384{ 384{
385 (*offset) = (crypt_stat->num_header_bytes_at_front 385 (*offset) = ecryptfs_lower_header_size(crypt_stat)
386 + (crypt_stat->extent_size * extent_num)); 386 + (crypt_stat->extent_size * extent_num);
387} 387}
388 388
389/** 389/**
@@ -835,13 +835,13 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
835 set_extent_mask_and_shift(crypt_stat); 835 set_extent_mask_and_shift(crypt_stat);
836 crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; 836 crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
837 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 837 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
838 crypt_stat->num_header_bytes_at_front = 0; 838 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
839 else { 839 else {
840 if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) 840 if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
841 crypt_stat->num_header_bytes_at_front = 841 crypt_stat->metadata_size =
842 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; 842 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
843 else 843 else
844 crypt_stat->num_header_bytes_at_front = PAGE_CACHE_SIZE; 844 crypt_stat->metadata_size = PAGE_CACHE_SIZE;
845 } 845 }
846} 846}
847 847
@@ -1108,9 +1108,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
1108 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; 1108 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1109} 1109}
1110 1110
1111static void 1111void ecryptfs_write_crypt_stat_flags(char *page_virt,
1112write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, 1112 struct ecryptfs_crypt_stat *crypt_stat,
1113 size_t *written) 1113 size_t *written)
1114{ 1114{
1115 u32 flags = 0; 1115 u32 flags = 0;
1116 int i; 1116 int i;
@@ -1238,8 +1238,7 @@ ecryptfs_write_header_metadata(char *virt,
1238 1238
1239 header_extent_size = (u32)crypt_stat->extent_size; 1239 header_extent_size = (u32)crypt_stat->extent_size;
1240 num_header_extents_at_front = 1240 num_header_extents_at_front =
1241 (u16)(crypt_stat->num_header_bytes_at_front 1241 (u16)(crypt_stat->metadata_size / crypt_stat->extent_size);
1242 / crypt_stat->extent_size);
1243 put_unaligned_be32(header_extent_size, virt); 1242 put_unaligned_be32(header_extent_size, virt);
1244 virt += 4; 1243 virt += 4;
1245 put_unaligned_be16(num_header_extents_at_front, virt); 1244 put_unaligned_be16(num_header_extents_at_front, virt);
@@ -1292,7 +1291,8 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
1292 offset = ECRYPTFS_FILE_SIZE_BYTES; 1291 offset = ECRYPTFS_FILE_SIZE_BYTES;
1293 write_ecryptfs_marker((page_virt + offset), &written); 1292 write_ecryptfs_marker((page_virt + offset), &written);
1294 offset += written; 1293 offset += written;
1295 write_ecryptfs_flags((page_virt + offset), crypt_stat, &written); 1294 ecryptfs_write_crypt_stat_flags((page_virt + offset), crypt_stat,
1295 &written);
1296 offset += written; 1296 offset += written;
1297 ecryptfs_write_header_metadata((page_virt + offset), crypt_stat, 1297 ecryptfs_write_header_metadata((page_virt + offset), crypt_stat,
1298 &written); 1298 &written);
@@ -1382,7 +1382,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1382 rc = -EINVAL; 1382 rc = -EINVAL;
1383 goto out; 1383 goto out;
1384 } 1384 }
1385 virt_len = crypt_stat->num_header_bytes_at_front; 1385 virt_len = crypt_stat->metadata_size;
1386 order = get_order(virt_len); 1386 order = get_order(virt_len);
1387 /* Released in this function */ 1387 /* Released in this function */
1388 virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order); 1388 virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order);
@@ -1428,16 +1428,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1428 header_extent_size = get_unaligned_be32(virt); 1428 header_extent_size = get_unaligned_be32(virt);
1429 virt += sizeof(__be32); 1429 virt += sizeof(__be32);
1430 num_header_extents_at_front = get_unaligned_be16(virt); 1430 num_header_extents_at_front = get_unaligned_be16(virt);
1431 crypt_stat->num_header_bytes_at_front = 1431 crypt_stat->metadata_size = (((size_t)num_header_extents_at_front
1432 (((size_t)num_header_extents_at_front 1432 * (size_t)header_extent_size));
1433 * (size_t)header_extent_size));
1434 (*bytes_read) = (sizeof(__be32) + sizeof(__be16)); 1433 (*bytes_read) = (sizeof(__be32) + sizeof(__be16));
1435 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) 1434 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
1436 && (crypt_stat->num_header_bytes_at_front 1435 && (crypt_stat->metadata_size
1437 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { 1436 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
1438 rc = -EINVAL; 1437 rc = -EINVAL;
1439 printk(KERN_WARNING "Invalid header size: [%zd]\n", 1438 printk(KERN_WARNING "Invalid header size: [%zd]\n",
1440 crypt_stat->num_header_bytes_at_front); 1439 crypt_stat->metadata_size);
1441 } 1440 }
1442 return rc; 1441 return rc;
1443} 1442}
@@ -1452,8 +1451,7 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1452 */ 1451 */
1453static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) 1452static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
1454{ 1453{
1455 crypt_stat->num_header_bytes_at_front = 1454 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
1456 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
1457} 1455}
1458 1456
1459/** 1457/**
@@ -1607,6 +1605,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1607 ecryptfs_dentry, 1605 ecryptfs_dentry,
1608 ECRYPTFS_VALIDATE_HEADER_SIZE); 1606 ECRYPTFS_VALIDATE_HEADER_SIZE);
1609 if (rc) { 1607 if (rc) {
1608 memset(page_virt, 0, PAGE_CACHE_SIZE);
1610 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); 1609 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
1611 if (rc) { 1610 if (rc) {
1612 printk(KERN_DEBUG "Valid eCryptfs headers not found in " 1611 printk(KERN_DEBUG "Valid eCryptfs headers not found in "
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 542f625312f3..bfc2e0f78f00 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -35,6 +35,7 @@
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/nsproxy.h> 37#include <linux/nsproxy.h>
38#include <linux/backing-dev.h>
38 39
39/* Version verification for shared data structures w/ userspace */ 40/* Version verification for shared data structures w/ userspace */
40#define ECRYPTFS_VERSION_MAJOR 0x00 41#define ECRYPTFS_VERSION_MAJOR 0x00
@@ -273,7 +274,7 @@ struct ecryptfs_crypt_stat {
273 u32 flags; 274 u32 flags;
274 unsigned int file_version; 275 unsigned int file_version;
275 size_t iv_bytes; 276 size_t iv_bytes;
276 size_t num_header_bytes_at_front; 277 size_t metadata_size;
277 size_t extent_size; /* Data extent size; default is 4096 */ 278 size_t extent_size; /* Data extent size; default is 4096 */
278 size_t key_size; 279 size_t key_size;
279 size_t extent_shift; 280 size_t extent_shift;
@@ -393,6 +394,7 @@ struct ecryptfs_mount_crypt_stat {
393struct ecryptfs_sb_info { 394struct ecryptfs_sb_info {
394 struct super_block *wsi_sb; 395 struct super_block *wsi_sb;
395 struct ecryptfs_mount_crypt_stat mount_crypt_stat; 396 struct ecryptfs_mount_crypt_stat mount_crypt_stat;
397 struct backing_dev_info bdi;
396}; 398};
397 399
398/* file private data. */ 400/* file private data. */
@@ -464,6 +466,14 @@ struct ecryptfs_daemon {
464 466
465extern struct mutex ecryptfs_daemon_hash_mux; 467extern struct mutex ecryptfs_daemon_hash_mux;
466 468
469static inline size_t
470ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat)
471{
472 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
473 return 0;
474 return crypt_stat->metadata_size;
475}
476
467static inline struct ecryptfs_file_info * 477static inline struct ecryptfs_file_info *
468ecryptfs_file_to_private(struct file *file) 478ecryptfs_file_to_private(struct file *file)
469{ 479{
@@ -651,6 +661,9 @@ int ecryptfs_decrypt_page(struct page *page);
651int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry); 661int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry);
652int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry); 662int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry);
653int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); 663int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
664void ecryptfs_write_crypt_stat_flags(char *page_virt,
665 struct ecryptfs_crypt_stat *crypt_stat,
666 size_t *written);
654int ecryptfs_read_and_validate_header_region(char *data, 667int ecryptfs_read_and_validate_header_region(char *data,
655 struct inode *ecryptfs_inode); 668 struct inode *ecryptfs_inode);
656int ecryptfs_read_and_validate_xattr_region(char *page_virt, 669int ecryptfs_read_and_validate_xattr_region(char *page_virt,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index d3362faf3852..e2d4418affac 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -324,6 +324,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
324 rc = ecryptfs_read_and_validate_header_region(page_virt, 324 rc = ecryptfs_read_and_validate_header_region(page_virt,
325 ecryptfs_dentry->d_inode); 325 ecryptfs_dentry->d_inode);
326 if (rc) { 326 if (rc) {
327 memset(page_virt, 0, PAGE_CACHE_SIZE);
327 rc = ecryptfs_read_and_validate_xattr_region(page_virt, 328 rc = ecryptfs_read_and_validate_xattr_region(page_virt,
328 ecryptfs_dentry); 329 ecryptfs_dentry);
329 if (rc) { 330 if (rc) {
@@ -336,7 +337,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
336 ecryptfs_dentry->d_sb)->mount_crypt_stat; 337 ecryptfs_dentry->d_sb)->mount_crypt_stat;
337 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { 338 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
338 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 339 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
339 file_size = (crypt_stat->num_header_bytes_at_front 340 file_size = (crypt_stat->metadata_size
340 + i_size_read(lower_dentry->d_inode)); 341 + i_size_read(lower_dentry->d_inode));
341 else 342 else
342 file_size = i_size_read(lower_dentry->d_inode); 343 file_size = i_size_read(lower_dentry->d_inode);
@@ -388,9 +389,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
388 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); 389 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
389 if (IS_ERR(lower_dentry)) { 390 if (IS_ERR(lower_dentry)) {
390 rc = PTR_ERR(lower_dentry); 391 rc = PTR_ERR(lower_dentry);
391 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " 392 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
392 "lower_dentry = [%s]\n", __func__, rc, 393 "[%d] on lower_dentry = [%s]\n", __func__, rc,
393 ecryptfs_dentry->d_name.name); 394 encrypted_and_encoded_name);
394 goto out_d_drop; 395 goto out_d_drop;
395 } 396 }
396 if (lower_dentry->d_inode) 397 if (lower_dentry->d_inode)
@@ -417,9 +418,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
417 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); 418 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
418 if (IS_ERR(lower_dentry)) { 419 if (IS_ERR(lower_dentry)) {
419 rc = PTR_ERR(lower_dentry); 420 rc = PTR_ERR(lower_dentry);
420 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " 421 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
421 "lower_dentry = [%s]\n", __func__, rc, 422 "[%d] on lower_dentry = [%s]\n", __func__, rc,
422 encrypted_and_encoded_name); 423 encrypted_and_encoded_name);
423 goto out_d_drop; 424 goto out_d_drop;
424 } 425 }
425lookup_and_interpose: 426lookup_and_interpose:
@@ -456,8 +457,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
456 rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); 457 rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
457 if (rc) 458 if (rc)
458 goto out_lock; 459 goto out_lock;
459 fsstack_copy_attr_times(dir, lower_new_dentry->d_inode); 460 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
460 fsstack_copy_inode_size(dir, lower_new_dentry->d_inode); 461 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
461 old_dentry->d_inode->i_nlink = 462 old_dentry->d_inode->i_nlink =
462 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; 463 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink;
463 i_size_write(new_dentry->d_inode, file_size_save); 464 i_size_write(new_dentry->d_inode, file_size_save);
@@ -648,38 +649,17 @@ out_lock:
648 return rc; 649 return rc;
649} 650}
650 651
651static int 652static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf,
652ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) 653 size_t *bufsiz)
653{ 654{
655 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
654 char *lower_buf; 656 char *lower_buf;
655 size_t lower_bufsiz; 657 size_t lower_bufsiz = PATH_MAX;
656 struct dentry *lower_dentry;
657 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
658 char *plaintext_name;
659 size_t plaintext_name_size;
660 mm_segment_t old_fs; 658 mm_segment_t old_fs;
661 int rc; 659 int rc;
662 660
663 lower_dentry = ecryptfs_dentry_to_lower(dentry);
664 if (!lower_dentry->d_inode->i_op->readlink) {
665 rc = -EINVAL;
666 goto out;
667 }
668 mount_crypt_stat = &ecryptfs_superblock_to_private(
669 dentry->d_sb)->mount_crypt_stat;
670 /*
671 * If the lower filename is encrypted, it will result in a significantly
672 * longer name. If needed, truncate the name after decode and decrypt.
673 */
674 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
675 lower_bufsiz = PATH_MAX;
676 else
677 lower_bufsiz = bufsiz;
678 /* Released in this function */
679 lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); 661 lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL);
680 if (lower_buf == NULL) { 662 if (!lower_buf) {
681 printk(KERN_ERR "%s: Out of memory whilst attempting to "
682 "kmalloc [%zd] bytes\n", __func__, lower_bufsiz);
683 rc = -ENOMEM; 663 rc = -ENOMEM;
684 goto out; 664 goto out;
685 } 665 }
@@ -689,29 +669,31 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
689 (char __user *)lower_buf, 669 (char __user *)lower_buf,
690 lower_bufsiz); 670 lower_bufsiz);
691 set_fs(old_fs); 671 set_fs(old_fs);
692 if (rc >= 0) { 672 if (rc < 0)
693 rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, 673 goto out;
694 &plaintext_name_size, 674 lower_bufsiz = rc;
695 dentry, lower_buf, 675 rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry,
696 rc); 676 lower_buf, lower_bufsiz);
697 if (rc) { 677out:
698 printk(KERN_ERR "%s: Error attempting to decode and "
699 "decrypt filename; rc = [%d]\n", __func__,
700 rc);
701 goto out_free_lower_buf;
702 }
703 /* Check for bufsiz <= 0 done in sys_readlinkat() */
704 rc = copy_to_user(buf, plaintext_name,
705 min((size_t) bufsiz, plaintext_name_size));
706 if (rc)
707 rc = -EFAULT;
708 else
709 rc = plaintext_name_size;
710 kfree(plaintext_name);
711 fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
712 }
713out_free_lower_buf:
714 kfree(lower_buf); 678 kfree(lower_buf);
679 return rc;
680}
681
682static int
683ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
684{
685 char *kbuf;
686 size_t kbufsiz, copied;
687 int rc;
688
689 rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz);
690 if (rc)
691 goto out;
692 copied = min_t(size_t, bufsiz, kbufsiz);
693 rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied;
694 kfree(kbuf);
695 fsstack_copy_attr_atime(dentry->d_inode,
696 ecryptfs_dentry_to_lower(dentry)->d_inode);
715out: 697out:
716 return rc; 698 return rc;
717} 699}
@@ -769,7 +751,7 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
769{ 751{
770 loff_t lower_size; 752 loff_t lower_size;
771 753
772 lower_size = crypt_stat->num_header_bytes_at_front; 754 lower_size = ecryptfs_lower_header_size(crypt_stat);
773 if (upper_size != 0) { 755 if (upper_size != 0) {
774 loff_t num_extents; 756 loff_t num_extents;
775 757
@@ -1016,6 +998,28 @@ out:
1016 return rc; 998 return rc;
1017} 999}
1018 1000
1001int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
1002 struct kstat *stat)
1003{
1004 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
1005 int rc = 0;
1006
1007 mount_crypt_stat = &ecryptfs_superblock_to_private(
1008 dentry->d_sb)->mount_crypt_stat;
1009 generic_fillattr(dentry->d_inode, stat);
1010 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
1011 char *target;
1012 size_t targetsiz;
1013
1014 rc = ecryptfs_readlink_lower(dentry, &target, &targetsiz);
1015 if (!rc) {
1016 kfree(target);
1017 stat->size = targetsiz;
1018 }
1019 }
1020 return rc;
1021}
1022
1019int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1023int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1020 struct kstat *stat) 1024 struct kstat *stat)
1021{ 1025{
@@ -1040,7 +1044,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
1040 1044
1041 lower_dentry = ecryptfs_dentry_to_lower(dentry); 1045 lower_dentry = ecryptfs_dentry_to_lower(dentry);
1042 if (!lower_dentry->d_inode->i_op->setxattr) { 1046 if (!lower_dentry->d_inode->i_op->setxattr) {
1043 rc = -ENOSYS; 1047 rc = -EOPNOTSUPP;
1044 goto out; 1048 goto out;
1045 } 1049 }
1046 mutex_lock(&lower_dentry->d_inode->i_mutex); 1050 mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1058,7 +1062,7 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
1058 int rc = 0; 1062 int rc = 0;
1059 1063
1060 if (!lower_dentry->d_inode->i_op->getxattr) { 1064 if (!lower_dentry->d_inode->i_op->getxattr) {
1061 rc = -ENOSYS; 1065 rc = -EOPNOTSUPP;
1062 goto out; 1066 goto out;
1063 } 1067 }
1064 mutex_lock(&lower_dentry->d_inode->i_mutex); 1068 mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1085,7 +1089,7 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
1085 1089
1086 lower_dentry = ecryptfs_dentry_to_lower(dentry); 1090 lower_dentry = ecryptfs_dentry_to_lower(dentry);
1087 if (!lower_dentry->d_inode->i_op->listxattr) { 1091 if (!lower_dentry->d_inode->i_op->listxattr) {
1088 rc = -ENOSYS; 1092 rc = -EOPNOTSUPP;
1089 goto out; 1093 goto out;
1090 } 1094 }
1091 mutex_lock(&lower_dentry->d_inode->i_mutex); 1095 mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1102,7 +1106,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
1102 1106
1103 lower_dentry = ecryptfs_dentry_to_lower(dentry); 1107 lower_dentry = ecryptfs_dentry_to_lower(dentry);
1104 if (!lower_dentry->d_inode->i_op->removexattr) { 1108 if (!lower_dentry->d_inode->i_op->removexattr) {
1105 rc = -ENOSYS; 1109 rc = -EOPNOTSUPP;
1106 goto out; 1110 goto out;
1107 } 1111 }
1108 mutex_lock(&lower_dentry->d_inode->i_mutex); 1112 mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1133,6 +1137,7 @@ const struct inode_operations ecryptfs_symlink_iops = {
1133 .put_link = ecryptfs_put_link, 1137 .put_link = ecryptfs_put_link,
1134 .permission = ecryptfs_permission, 1138 .permission = ecryptfs_permission,
1135 .setattr = ecryptfs_setattr, 1139 .setattr = ecryptfs_setattr,
1140 .getattr = ecryptfs_getattr_link,
1136 .setxattr = ecryptfs_setxattr, 1141 .setxattr = ecryptfs_setxattr,
1137 .getxattr = ecryptfs_getxattr, 1142 .getxattr = ecryptfs_getxattr,
1138 .listxattr = ecryptfs_listxattr, 1143 .listxattr = ecryptfs_listxattr,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index af1a8f01ebac..760983d0f25e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -497,17 +497,25 @@ struct kmem_cache *ecryptfs_sb_info_cache;
497static int 497static int
498ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) 498ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
499{ 499{
500 struct ecryptfs_sb_info *esi;
500 int rc = 0; 501 int rc = 0;
501 502
502 /* Released in ecryptfs_put_super() */ 503 /* Released in ecryptfs_put_super() */
503 ecryptfs_set_superblock_private(sb, 504 ecryptfs_set_superblock_private(sb,
504 kmem_cache_zalloc(ecryptfs_sb_info_cache, 505 kmem_cache_zalloc(ecryptfs_sb_info_cache,
505 GFP_KERNEL)); 506 GFP_KERNEL));
506 if (!ecryptfs_superblock_to_private(sb)) { 507 esi = ecryptfs_superblock_to_private(sb);
508 if (!esi) {
507 ecryptfs_printk(KERN_WARNING, "Out of memory\n"); 509 ecryptfs_printk(KERN_WARNING, "Out of memory\n");
508 rc = -ENOMEM; 510 rc = -ENOMEM;
509 goto out; 511 goto out;
510 } 512 }
513
514 rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
515 if (rc)
516 goto out;
517
518 sb->s_bdi = &esi->bdi;
511 sb->s_op = &ecryptfs_sops; 519 sb->s_op = &ecryptfs_sops;
512 /* Released through deactivate_super(sb) from get_sb_nodev */ 520 /* Released through deactivate_super(sb) from get_sb_nodev */
513 sb->s_root = d_alloc(NULL, &(const struct qstr) { 521 sb->s_root = d_alloc(NULL, &(const struct qstr) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index d491237c98e7..2ee9a3a7b68c 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -83,6 +83,19 @@ out:
83 return rc; 83 return rc;
84} 84}
85 85
86static void strip_xattr_flag(char *page_virt,
87 struct ecryptfs_crypt_stat *crypt_stat)
88{
89 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
90 size_t written;
91
92 crypt_stat->flags &= ~ECRYPTFS_METADATA_IN_XATTR;
93 ecryptfs_write_crypt_stat_flags(page_virt, crypt_stat,
94 &written);
95 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
96 }
97}
98
86/** 99/**
87 * Header Extent: 100 * Header Extent:
88 * Octets 0-7: Unencrypted file size (big-endian) 101 * Octets 0-7: Unencrypted file size (big-endian)
@@ -98,19 +111,6 @@ out:
98 * (big-endian) 111 * (big-endian)
99 * Octet 26: Begin RFC 2440 authentication token packet set 112 * Octet 26: Begin RFC 2440 authentication token packet set
100 */ 113 */
101static void set_header_info(char *page_virt,
102 struct ecryptfs_crypt_stat *crypt_stat)
103{
104 size_t written;
105 size_t save_num_header_bytes_at_front =
106 crypt_stat->num_header_bytes_at_front;
107
108 crypt_stat->num_header_bytes_at_front =
109 ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
110 ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written);
111 crypt_stat->num_header_bytes_at_front =
112 save_num_header_bytes_at_front;
113}
114 114
115/** 115/**
116 * ecryptfs_copy_up_encrypted_with_header 116 * ecryptfs_copy_up_encrypted_with_header
@@ -136,8 +136,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
136 * num_extents_per_page) 136 * num_extents_per_page)
137 + extent_num_in_page); 137 + extent_num_in_page);
138 size_t num_header_extents_at_front = 138 size_t num_header_extents_at_front =
139 (crypt_stat->num_header_bytes_at_front 139 (crypt_stat->metadata_size / crypt_stat->extent_size);
140 / crypt_stat->extent_size);
141 140
142 if (view_extent_num < num_header_extents_at_front) { 141 if (view_extent_num < num_header_extents_at_front) {
143 /* This is a header extent */ 142 /* This is a header extent */
@@ -147,9 +146,14 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
147 memset(page_virt, 0, PAGE_CACHE_SIZE); 146 memset(page_virt, 0, PAGE_CACHE_SIZE);
148 /* TODO: Support more than one header extent */ 147 /* TODO: Support more than one header extent */
149 if (view_extent_num == 0) { 148 if (view_extent_num == 0) {
149 size_t written;
150
150 rc = ecryptfs_read_xattr_region( 151 rc = ecryptfs_read_xattr_region(
151 page_virt, page->mapping->host); 152 page_virt, page->mapping->host);
152 set_header_info(page_virt, crypt_stat); 153 strip_xattr_flag(page_virt + 16, crypt_stat);
154 ecryptfs_write_header_metadata(page_virt + 20,
155 crypt_stat,
156 &written);
153 } 157 }
154 kunmap_atomic(page_virt, KM_USER0); 158 kunmap_atomic(page_virt, KM_USER0);
155 flush_dcache_page(page); 159 flush_dcache_page(page);
@@ -162,7 +166,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
162 /* This is an encrypted data extent */ 166 /* This is an encrypted data extent */
163 loff_t lower_offset = 167 loff_t lower_offset =
164 ((view_extent_num * crypt_stat->extent_size) 168 ((view_extent_num * crypt_stat->extent_size)
165 - crypt_stat->num_header_bytes_at_front); 169 - crypt_stat->metadata_size);
166 170
167 rc = ecryptfs_read_lower_page_segment( 171 rc = ecryptfs_read_lower_page_segment(
168 page, (lower_offset >> PAGE_CACHE_SHIFT), 172 page, (lower_offset >> PAGE_CACHE_SHIFT),
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index fcef41c1d2cf..0c0ae491d231 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -86,7 +86,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
86 if (lower_dentry->d_inode) { 86 if (lower_dentry->d_inode) {
87 fput(inode_info->lower_file); 87 fput(inode_info->lower_file);
88 inode_info->lower_file = NULL; 88 inode_info->lower_file = NULL;
89 d_drop(lower_dentry);
90 } 89 }
91 } 90 }
92 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); 91 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
@@ -123,6 +122,7 @@ static void ecryptfs_put_super(struct super_block *sb)
123 lock_kernel(); 122 lock_kernel();
124 123
125 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); 124 ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
125 bdi_destroy(&sb_info->bdi);
126 kmem_cache_free(ecryptfs_sb_info_cache, sb_info); 126 kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
127 ecryptfs_set_superblock_private(sb, NULL); 127 ecryptfs_set_superblock_private(sb, NULL);
128 128
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 8442e353309f..54373278a353 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -35,6 +35,7 @@
35 35
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/backing-dev.h>
38#include "common.h" 39#include "common.h"
39 40
40/* FIXME: Remove once pnfs hits mainline 41/* FIXME: Remove once pnfs hits mainline
@@ -92,6 +93,7 @@ struct exofs_sb_info {
92 struct exofs_layout layout; /* Default files layout, 93 struct exofs_layout layout; /* Default files layout,
93 * contains the variable osd_dev 94 * contains the variable osd_dev
94 * array. Keep last */ 95 * array. Keep last */
96 struct backing_dev_info bdi;
95 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ 97 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
96}; 98};
97 99
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 18e57ea1e5b4..03149b9a5178 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -302,6 +302,7 @@ static void exofs_put_super(struct super_block *sb)
302 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], 302 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
303 sbi->layout.s_pid); 303 sbi->layout.s_pid);
304 304
305 bdi_destroy(&sbi->bdi);
305 exofs_free_sbi(sbi); 306 exofs_free_sbi(sbi);
306 sb->s_fs_info = NULL; 307 sb->s_fs_info = NULL;
307} 308}
@@ -546,6 +547,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
546 if (!sbi) 547 if (!sbi)
547 return -ENOMEM; 548 return -ENOMEM;
548 549
550 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
551 if (ret)
552 goto free_bdi;
553
549 /* use mount options to fill superblock */ 554 /* use mount options to fill superblock */
550 od = osduld_path_lookup(opts->dev_name); 555 od = osduld_path_lookup(opts->dev_name);
551 if (IS_ERR(od)) { 556 if (IS_ERR(od)) {
@@ -612,6 +617,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
612 } 617 }
613 618
614 /* set up operation vectors */ 619 /* set up operation vectors */
620 sb->s_bdi = &sbi->bdi;
615 sb->s_fs_info = sbi; 621 sb->s_fs_info = sbi;
616 sb->s_op = &exofs_sops; 622 sb->s_op = &exofs_sops;
617 sb->s_export_op = &exofs_export_ops; 623 sb->s_export_op = &exofs_export_ops;
@@ -643,6 +649,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
643 return 0; 649 return 0;
644 650
645free_sbi: 651free_sbi:
652 bdi_destroy(&sbi->bdi);
653free_bdi:
646 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 654 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
647 opts->dev_name, sbi->layout.s_pid, ret); 655 opts->dev_name, sbi->layout.s_pid, ret);
648 exofs_free_sbi(sbi); 656 exofs_free_sbi(sbi);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 94c8ee81f5e1..236b834b4ca8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
3879 physical += offset; 3879 physical += offset;
3880 length = EXT4_SB(inode->i_sb)->s_inode_size - offset; 3880 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
3881 flags |= FIEMAP_EXTENT_DATA_INLINE; 3881 flags |= FIEMAP_EXTENT_DATA_INLINE;
3882 brelse(iloc.bh);
3882 } else { /* external block */ 3883 } else { /* external block */
3883 physical = EXT4_I(inode)->i_file_acl << blockbits; 3884 physical = EXT4_I(inode)->i_file_acl << blockbits;
3884 length = inode->i_sb->s_blocksize; 3885 length = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5381802d6052..81d605412844 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5375,7 +5375,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5375 } else { 5375 } else {
5376 struct ext4_iloc iloc; 5376 struct ext4_iloc iloc;
5377 5377
5378 err = ext4_get_inode_loc(inode, &iloc); 5378 err = __ext4_get_inode_loc(inode, &iloc, 0);
5379 if (err) 5379 if (err)
5380 return err; 5380 return err;
5381 if (wbc->sync_mode == WB_SYNC_ALL) 5381 if (wbc->sync_mode == WB_SYNC_ALL)
@@ -5386,6 +5386,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5386 (unsigned long long)iloc.bh->b_blocknr); 5386 (unsigned long long)iloc.bh->b_blocknr);
5387 err = -EIO; 5387 err = -EIO;
5388 } 5388 }
5389 brelse(iloc.bh);
5389 } 5390 }
5390 return err; 5391 return err;
5391} 5392}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bde9d0b170c2..b423a364dca3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2535,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2535 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2535 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2536 entry->count, entry->group, entry); 2536 entry->count, entry->group, entry);
2537 2537
2538 if (test_opt(sb, DISCARD)) {
2539 ext4_fsblk_t discard_block;
2540
2541 discard_block = entry->start_blk +
2542 ext4_group_first_block_no(sb, entry->group);
2543 trace_ext4_discard_blocks(sb,
2544 (unsigned long long)discard_block,
2545 entry->count);
2546 sb_issue_discard(sb, discard_block, entry->count);
2547 }
2548
2538 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2549 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2539 /* we expect to find existing buddy because it's pinned */ 2550 /* we expect to find existing buddy because it's pinned */
2540 BUG_ON(err != 0); 2551 BUG_ON(err != 0);
@@ -2556,16 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2556 page_cache_release(e4b.bd_bitmap_page); 2567 page_cache_release(e4b.bd_bitmap_page);
2557 } 2568 }
2558 ext4_unlock_group(sb, entry->group); 2569 ext4_unlock_group(sb, entry->group);
2559 if (test_opt(sb, DISCARD)) {
2560 ext4_fsblk_t discard_block;
2561
2562 discard_block = entry->start_blk +
2563 ext4_group_first_block_no(sb, entry->group);
2564 trace_ext4_discard_blocks(sb,
2565 (unsigned long long)discard_block,
2566 entry->count);
2567 sb_issue_discard(sb, discard_block, entry->count);
2568 }
2569 kmem_cache_free(ext4_free_ext_cachep, entry); 2570 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b); 2571 ext4_mb_release_desc(&e4b);
2571 } 2572 }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c751106c2e5..7faefb4da939 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
228 228
229#ifdef CONFIG_BLOCK 229#ifdef CONFIG_BLOCK
230 230
231#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) 231static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
232#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); 232{
233 return (offset >> inode->i_blkbits);
234}
235
236static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
237{
238 return (blk << inode->i_blkbits);
239}
233 240
234/** 241/**
235 * __generic_block_fiemap - FIEMAP for block based inodes (no locking) 242 * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
236 * @inode - the inode to map 243 * @inode: the inode to map
237 * @arg - the pointer to userspace where we copy everything to 244 * @fieinfo: the fiemap info struct that will be passed back to userspace
238 * @get_block - the fs's get_block function 245 * @start: where to start mapping in the inode
246 * @len: how much space to map
247 * @get_block: the fs's get_block function
239 * 248 *
240 * This does FIEMAP for block based inodes. Basically it will just loop 249 * This does FIEMAP for block based inodes. Basically it will just loop
241 * through get_block until we hit the number of extents we want to map, or we 250 * through get_block until we hit the number of extents we want to map, or we
@@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
250 */ 259 */
251 260
252int __generic_block_fiemap(struct inode *inode, 261int __generic_block_fiemap(struct inode *inode,
253 struct fiemap_extent_info *fieinfo, u64 start, 262 struct fiemap_extent_info *fieinfo, loff_t start,
254 u64 len, get_block_t *get_block) 263 loff_t len, get_block_t *get_block)
255{ 264{
256 struct buffer_head tmp; 265 struct buffer_head map_bh;
257 unsigned long long start_blk; 266 sector_t start_blk, last_blk;
258 long long length = 0, map_len = 0; 267 loff_t isize = i_size_read(inode);
259 u64 logical = 0, phys = 0, size = 0; 268 u64 logical = 0, phys = 0, size = 0;
260 u32 flags = FIEMAP_EXTENT_MERGED; 269 u32 flags = FIEMAP_EXTENT_MERGED;
261 int ret = 0, past_eof = 0, whole_file = 0; 270 bool past_eof = false, whole_file = false;
271 int ret = 0;
262 272
263 if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) 273 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
274 if (ret)
264 return ret; 275 return ret;
265 276
266 start_blk = logical_to_blk(inode, start); 277 /*
267 278 * Either the i_mutex or other appropriate locking needs to be held
268 length = (long long)min_t(u64, len, i_size_read(inode)); 279 * since we expect isize to not change at all through the duration of
269 if (length < len) 280 * this call.
270 whole_file = 1; 281 */
282 if (len >= isize) {
283 whole_file = true;
284 len = isize;
285 }
271 286
272 map_len = length; 287 start_blk = logical_to_blk(inode, start);
288 last_blk = logical_to_blk(inode, start + len - 1);
273 289
274 do { 290 do {
275 /* 291 /*
276 * we set b_size to the total size we want so it will map as 292 * we set b_size to the total size we want so it will map as
277 * many contiguous blocks as possible at once 293 * many contiguous blocks as possible at once
278 */ 294 */
279 memset(&tmp, 0, sizeof(struct buffer_head)); 295 memset(&map_bh, 0, sizeof(struct buffer_head));
280 tmp.b_size = map_len; 296 map_bh.b_size = len;
281 297
282 ret = get_block(inode, start_blk, &tmp, 0); 298 ret = get_block(inode, start_blk, &map_bh, 0);
283 if (ret) 299 if (ret)
284 break; 300 break;
285 301
286 /* HOLE */ 302 /* HOLE */
287 if (!buffer_mapped(&tmp)) { 303 if (!buffer_mapped(&map_bh)) {
288 length -= blk_to_logical(inode, 1);
289 start_blk++; 304 start_blk++;
290 305
291 /* 306 /*
292 * we want to handle the case where there is an 307 * We want to handle the case where there is an
293 * allocated block at the front of the file, and then 308 * allocated block at the front of the file, and then
294 * nothing but holes up to the end of the file properly, 309 * nothing but holes up to the end of the file properly,
295 * to make sure that extent at the front gets properly 310 * to make sure that extent at the front gets properly
296 * marked with FIEMAP_EXTENT_LAST 311 * marked with FIEMAP_EXTENT_LAST
297 */ 312 */
298 if (!past_eof && 313 if (!past_eof &&
299 blk_to_logical(inode, start_blk) >= 314 blk_to_logical(inode, start_blk) >= isize)
300 blk_to_logical(inode, 0)+i_size_read(inode))
301 past_eof = 1; 315 past_eof = 1;
302 316
303 /* 317 /*
304 * first hole after going past the EOF, this is our 318 * First hole after going past the EOF, this is our
305 * last extent 319 * last extent
306 */ 320 */
307 if (past_eof && size) { 321 if (past_eof && size) {
@@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode,
309 ret = fiemap_fill_next_extent(fieinfo, logical, 323 ret = fiemap_fill_next_extent(fieinfo, logical,
310 phys, size, 324 phys, size,
311 flags); 325 flags);
312 break; 326 } else if (size) {
327 ret = fiemap_fill_next_extent(fieinfo, logical,
328 phys, size, flags);
329 size = 0;
313 } 330 }
314 331
315 /* if we have holes up to/past EOF then we're done */ 332 /* if we have holes up to/past EOF then we're done */
316 if (length <= 0 || past_eof) 333 if (start_blk > last_blk || past_eof || ret)
317 break; 334 break;
318 } else { 335 } else {
319 /* 336 /*
320 * we have gone over the length of what we wanted to 337 * We have gone over the length of what we wanted to
321 * map, and it wasn't the entire file, so add the extent 338 * map, and it wasn't the entire file, so add the extent
322 * we got last time and exit. 339 * we got last time and exit.
323 * 340 *
@@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode,
331 * are good to go, just add the extent to the fieinfo 348 * are good to go, just add the extent to the fieinfo
332 * and break 349 * and break
333 */ 350 */
334 if (length <= 0 && !whole_file) { 351 if (start_blk > last_blk && !whole_file) {
335 ret = fiemap_fill_next_extent(fieinfo, logical, 352 ret = fiemap_fill_next_extent(fieinfo, logical,
336 phys, size, 353 phys, size,
337 flags); 354 flags);
@@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode,
351 } 368 }
352 369
353 logical = blk_to_logical(inode, start_blk); 370 logical = blk_to_logical(inode, start_blk);
354 phys = blk_to_logical(inode, tmp.b_blocknr); 371 phys = blk_to_logical(inode, map_bh.b_blocknr);
355 size = tmp.b_size; 372 size = map_bh.b_size;
356 flags = FIEMAP_EXTENT_MERGED; 373 flags = FIEMAP_EXTENT_MERGED;
357 374
358 length -= tmp.b_size;
359 start_blk += logical_to_blk(inode, size); 375 start_blk += logical_to_blk(inode, size);
360 376
361 /* 377 /*
@@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode,
363 * soon as we find a hole that the last extent we found 379 * soon as we find a hole that the last extent we found
364 * is marked with FIEMAP_EXTENT_LAST 380 * is marked with FIEMAP_EXTENT_LAST
365 */ 381 */
366 if (!past_eof && 382 if (!past_eof && logical + size >= isize)
367 logical+size >= 383 past_eof = true;
368 blk_to_logical(inode, 0)+i_size_read(inode))
369 past_eof = 1;
370 } 384 }
371 cond_resched(); 385 cond_resched();
372 } while (1); 386 } while (1);
373 387
374 /* if ret is 1 then we just hit the end of the extent array */ 388 /* If ret is 1 then we just hit the end of the extent array */
375 if (ret == 1) 389 if (ret == 1)
376 ret = 0; 390 ret = 0;
377 391
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9dd126276c9f..ed9ba6fe04f5 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -61,7 +61,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
61 inode->i_op = &page_symlink_inode_operations; 61 inode->i_op = &page_symlink_inode_operations;
62 inode->i_mapping->a_ops = &jfs_aops; 62 inode->i_mapping->a_ops = &jfs_aops;
63 } else { 63 } else {
64 inode->i_op = &jfs_symlink_inode_operations; 64 inode->i_op = &jfs_fast_symlink_inode_operations;
65 /* 65 /*
66 * The inline data should be null-terminated, but 66 * The inline data should be null-terminated, but
67 * don't let on-disk corruption crash the kernel 67 * don't let on-disk corruption crash the kernel
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 6c4dfcbf3f55..9e2f6a721668 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -196,7 +196,7 @@ int dbMount(struct inode *ipbmap)
196 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); 196 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
197 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); 197 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
198 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); 198 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
199 bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); 199 bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
200 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); 200 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
201 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); 201 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
202 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); 202 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
@@ -288,7 +288,7 @@ int dbSync(struct inode *ipbmap)
288 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); 288 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
289 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); 289 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
290 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); 290 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
291 dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); 291 dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight);
292 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); 292 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
293 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); 293 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
294 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); 294 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
@@ -1441,7 +1441,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1441 * tree index of this allocation group within the control page. 1441 * tree index of this allocation group within the control page.
1442 */ 1442 */
1443 agperlev = 1443 agperlev =
1444 (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; 1444 (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
1445 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); 1445 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
1446 1446
1447 /* dmap control page trees fan-out by 4 and a single allocation 1447 /* dmap control page trees fan-out by 4 and a single allocation
@@ -1460,7 +1460,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1460 * the subtree to find the leftmost leaf that describes this 1460 * the subtree to find the leftmost leaf that describes this
1461 * free space. 1461 * free space.
1462 */ 1462 */
1463 for (k = bmp->db_agheigth; k > 0; k--) { 1463 for (k = bmp->db_agheight; k > 0; k--) {
1464 for (n = 0, m = (ti << 2) + 1; n < 4; n++) { 1464 for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
1465 if (l2nb <= dcp->stree[m + n]) { 1465 if (l2nb <= dcp->stree[m + n]) {
1466 ti = m + n; 1466 ti = m + n;
@@ -3607,7 +3607,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
3607 } 3607 }
3608 3608
3609 /* 3609 /*
3610 * compute db_aglevel, db_agheigth, db_width, db_agstart: 3610 * compute db_aglevel, db_agheight, db_width, db_agstart:
3611 * an ag is covered in aglevel dmapctl summary tree, 3611 * an ag is covered in aglevel dmapctl summary tree,
3612 * at agheight level height (from leaf) with agwidth number of nodes 3612 * at agheight level height (from leaf) with agwidth number of nodes
3613 * each, which starts at agstart index node of the smmary tree node 3613 * each, which starts at agstart index node of the smmary tree node
@@ -3616,9 +3616,9 @@ void dbFinalizeBmap(struct inode *ipbmap)
3616 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); 3616 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
3617 l2nl = 3617 l2nl =
3618 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); 3618 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
3619 bmp->db_agheigth = l2nl >> 1; 3619 bmp->db_agheight = l2nl >> 1;
3620 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); 3620 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1));
3621 for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; 3621 for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0;
3622 i--) { 3622 i--) {
3623 bmp->db_agstart += n; 3623 bmp->db_agstart += n;
3624 n <<= 2; 3624 n <<= 2;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 1a6eb41569bc..6dcb906c55d8 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -210,7 +210,7 @@ struct dbmap_disk {
210 __le32 dn_maxag; /* 4: max active alloc group number */ 210 __le32 dn_maxag; /* 4: max active alloc group number */
211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */ 211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */
212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ 212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */
213 __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ 213 __le32 dn_agheight; /* 4: height in dmapctl of the AG */
214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ 214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */
215 __le32 dn_agstart; /* 4: start tree index at AG height */ 215 __le32 dn_agstart; /* 4: start tree index at AG height */
216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ 216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */
@@ -229,7 +229,7 @@ struct dbmap {
229 int dn_maxag; /* max active alloc group number */ 229 int dn_maxag; /* max active alloc group number */
230 int dn_agpref; /* preferred alloc group (hint) */ 230 int dn_agpref; /* preferred alloc group (hint) */
231 int dn_aglevel; /* dmapctl level holding the AG */ 231 int dn_aglevel; /* dmapctl level holding the AG */
232 int dn_agheigth; /* height in dmapctl of the AG */ 232 int dn_agheight; /* height in dmapctl of the AG */
233 int dn_agwidth; /* width in dmapctl of the AG */ 233 int dn_agwidth; /* width in dmapctl of the AG */
234 int dn_agstart; /* start tree index at AG height */ 234 int dn_agstart; /* start tree index at AG height */
235 int dn_agl2size; /* l2 num of blks per alloc group */ 235 int dn_agl2size; /* l2 num of blks per alloc group */
@@ -255,7 +255,7 @@ struct bmap {
255#define db_agsize db_bmap.dn_agsize 255#define db_agsize db_bmap.dn_agsize
256#define db_agl2size db_bmap.dn_agl2size 256#define db_agl2size db_bmap.dn_agl2size
257#define db_agwidth db_bmap.dn_agwidth 257#define db_agwidth db_bmap.dn_agwidth
258#define db_agheigth db_bmap.dn_agheigth 258#define db_agheight db_bmap.dn_agheight
259#define db_agstart db_bmap.dn_agstart 259#define db_agstart db_bmap.dn_agstart
260#define db_numag db_bmap.dn_numag 260#define db_numag db_bmap.dn_numag
261#define db_maxlevel db_bmap.dn_maxlevel 261#define db_maxlevel db_bmap.dn_maxlevel
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 79e2c79661df..9e6bda30a6e8 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -48,5 +48,6 @@ extern const struct file_operations jfs_dir_operations;
48extern const struct inode_operations jfs_file_inode_operations; 48extern const struct inode_operations jfs_file_inode_operations;
49extern const struct file_operations jfs_file_operations; 49extern const struct file_operations jfs_file_operations;
50extern const struct inode_operations jfs_symlink_inode_operations; 50extern const struct inode_operations jfs_symlink_inode_operations;
51extern const struct inode_operations jfs_fast_symlink_inode_operations;
51extern const struct dentry_operations jfs_ci_dentry_operations; 52extern const struct dentry_operations jfs_ci_dentry_operations;
52#endif /* _H_JFS_INODE */ 53#endif /* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 4a3e9f39c21d..a9cf8e8675be 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -956,7 +956,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
956 */ 956 */
957 957
958 if (ssize <= IDATASIZE) { 958 if (ssize <= IDATASIZE) {
959 ip->i_op = &jfs_symlink_inode_operations; 959 ip->i_op = &jfs_fast_symlink_inode_operations;
960 960
961 i_fastsymlink = JFS_IP(ip)->i_inline; 961 i_fastsymlink = JFS_IP(ip)->i_inline;
962 memcpy(i_fastsymlink, name, ssize); 962 memcpy(i_fastsymlink, name, ssize);
@@ -978,7 +978,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
978 else { 978 else {
979 jfs_info("jfs_symlink: allocate extent ip:0x%p", ip); 979 jfs_info("jfs_symlink: allocate extent ip:0x%p", ip);
980 980
981 ip->i_op = &page_symlink_inode_operations; 981 ip->i_op = &jfs_symlink_inode_operations;
982 ip->i_mapping->a_ops = &jfs_aops; 982 ip->i_mapping->a_ops = &jfs_aops;
983 983
984 /* 984 /*
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 7f24a0bb08ca..1aba0039f1c9 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -81,6 +81,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
81 struct inode *iplist[1]; 81 struct inode *iplist[1];
82 struct jfs_superblock *j_sb, *j_sb2; 82 struct jfs_superblock *j_sb, *j_sb2;
83 uint old_agsize; 83 uint old_agsize;
84 int agsizechanged = 0;
84 struct buffer_head *bh, *bh2; 85 struct buffer_head *bh, *bh2;
85 86
86 /* If the volume hasn't grown, get out now */ 87 /* If the volume hasn't grown, get out now */
@@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
333 */ 334 */
334 if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) 335 if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
335 goto error_out; 336 goto error_out;
337
338 agsizechanged |= (bmp->db_agsize != old_agsize);
339
336 /* 340 /*
337 * the map now has extended to cover additional nblocks: 341 * the map now has extended to cover additional nblocks:
338 * dn_mapsize = oldMapsize + nblocks; 342 * dn_mapsize = oldMapsize + nblocks;
@@ -432,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
432 * will correctly identify the new ag); 436 * will correctly identify the new ag);
433 */ 437 */
434 /* if new AG size the same as old AG size, done! */ 438 /* if new AG size the same as old AG size, done! */
435 if (bmp->db_agsize != old_agsize) { 439 if (agsizechanged) {
436 if ((rc = diExtendFS(ipimap, ipbmap))) 440 if ((rc = diExtendFS(ipimap, ipbmap)))
437 goto error_out; 441 goto error_out;
438 442
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 4af1a05aad0a..205b946d8e0d 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
29 return NULL; 29 return NULL;
30} 30}
31 31
32const struct inode_operations jfs_symlink_inode_operations = { 32const struct inode_operations jfs_fast_symlink_inode_operations = {
33 .readlink = generic_readlink, 33 .readlink = generic_readlink,
34 .follow_link = jfs_follow_link, 34 .follow_link = jfs_follow_link,
35 .setattr = jfs_setattr,
36 .setxattr = jfs_setxattr,
37 .getxattr = jfs_getxattr,
38 .listxattr = jfs_listxattr,
39 .removexattr = jfs_removexattr,
40};
41
42const struct inode_operations jfs_symlink_inode_operations = {
43 .readlink = generic_readlink,
44 .follow_link = page_follow_link_light,
45 .put_link = page_put_link,
46 .setattr = jfs_setattr,
35 .setxattr = jfs_setxattr, 47 .setxattr = jfs_setxattr,
36 .getxattr = jfs_getxattr, 48 .getxattr = jfs_getxattr,
37 .listxattr = jfs_listxattr, 49 .listxattr = jfs_listxattr,
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 84e36f52fe95..76c242fbe1b0 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -459,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target)
459 struct logfs_block *block; 459 struct logfs_block *block;
460 int round, progress, last_progress = 0; 460 int round, progress, last_progress = 0;
461 461
462 /*
463 * Doing too many changes to the segfile at once would result
464 * in a large number of aliases. Write the journal before
465 * things get out of hand.
466 */
467 if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
468 logfs_write_anchor(sb);
469
462 if (no_free_segments(sb) >= target && 470 if (no_free_segments(sb) >= target &&
463 super->s_no_object_aliases < MAX_OBJ_ALIASES) 471 super->s_no_object_aliases < MAX_OBJ_ALIASES)
464 return; 472 return;
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 33bd260b8309..fb0a613f885b 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -389,7 +389,10 @@ static void journal_get_erase_count(struct logfs_area *area)
389static int journal_erase_segment(struct logfs_area *area) 389static int journal_erase_segment(struct logfs_area *area)
390{ 390{
391 struct super_block *sb = area->a_sb; 391 struct super_block *sb = area->a_sb;
392 struct logfs_segment_header sh; 392 union {
393 struct logfs_segment_header sh;
394 unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)];
395 } u;
393 u64 ofs; 396 u64 ofs;
394 int err; 397 int err;
395 398
@@ -397,20 +400,21 @@ static int journal_erase_segment(struct logfs_area *area)
397 if (err) 400 if (err)
398 return err; 401 return err;
399 402
400 sh.pad = 0; 403 memset(&u, 0, sizeof(u));
401 sh.type = SEG_JOURNAL; 404 u.sh.pad = 0;
402 sh.level = 0; 405 u.sh.type = SEG_JOURNAL;
403 sh.segno = cpu_to_be32(area->a_segno); 406 u.sh.level = 0;
404 sh.ec = cpu_to_be32(area->a_erase_count); 407 u.sh.segno = cpu_to_be32(area->a_segno);
405 sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); 408 u.sh.ec = cpu_to_be32(area->a_erase_count);
406 sh.crc = logfs_crc32(&sh, sizeof(sh), 4); 409 u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
410 u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4);
407 411
408 /* This causes a bug in segment.c. Not yet. */ 412 /* This causes a bug in segment.c. Not yet. */
409 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); 413 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
410 414
411 ofs = dev_ofs(sb, area->a_segno, 0); 415 ofs = dev_ofs(sb, area->a_segno, 0);
412 area->a_used_bytes = ALIGN(sizeof(sh), 16); 416 area->a_used_bytes = sizeof(u);
413 logfs_buf_write(area, ofs, &sh, sizeof(sh)); 417 logfs_buf_write(area, ofs, &u, sizeof(u));
414 return 0; 418 return 0;
415} 419}
416 420
@@ -494,6 +498,8 @@ static void account_shadows(struct super_block *sb)
494 498
495 btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); 499 btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow);
496 btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); 500 btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow);
501 btree_grim_visitor32(&tree->segment_map, 0, NULL);
502 tree->no_shadowed_segments = 0;
497 503
498 if (li->li_block) { 504 if (li->li_block) {
499 /* 505 /*
@@ -607,9 +613,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
607 if (len == 0) 613 if (len == 0)
608 return logfs_write_header(super, header, 0, type); 614 return logfs_write_header(super, header, 0, type);
609 615
616 BUG_ON(len > sb->s_blocksize);
610 compr_len = logfs_compress(buf, data, len, sb->s_blocksize); 617 compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
611 if (compr_len < 0 || type == JE_ANCHOR) { 618 if (compr_len < 0 || type == JE_ANCHOR) {
612 BUG_ON(len > sb->s_blocksize);
613 memcpy(data, buf, len); 619 memcpy(data, buf, len);
614 compr_len = len; 620 compr_len = len;
615 compr = COMPR_NONE; 621 compr = COMPR_NONE;
@@ -661,6 +667,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type,
661 if (ofs < 0) 667 if (ofs < 0)
662 return ofs; 668 return ofs;
663 logfs_buf_write(area, ofs, super->s_compressed_je, len); 669 logfs_buf_write(area, ofs, super->s_compressed_je, len);
670 BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES);
664 super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); 671 super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs);
665 return 0; 672 return 0;
666} 673}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b84b0eec6024..0a3df1a0c936 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -257,10 +257,14 @@ struct logfs_shadow {
257 * struct shadow_tree 257 * struct shadow_tree
258 * @new: shadows where old_ofs==0, indexed by new_ofs 258 * @new: shadows where old_ofs==0, indexed by new_ofs
259 * @old: shadows where old_ofs!=0, indexed by old_ofs 259 * @old: shadows where old_ofs!=0, indexed by old_ofs
260 * @segment_map: bitfield of segments containing shadows
261 * @no_shadowed_segment: number of segments containing shadows
260 */ 262 */
261struct shadow_tree { 263struct shadow_tree {
262 struct btree_head64 new; 264 struct btree_head64 new;
263 struct btree_head64 old; 265 struct btree_head64 old;
266 struct btree_head32 segment_map;
267 int no_shadowed_segments;
264}; 268};
265 269
266struct object_alias_item { 270struct object_alias_item {
@@ -305,13 +309,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix,
305 level_t level, int child_no, __be64 val); 309 level_t level, int child_no, __be64 val);
306struct logfs_block_ops { 310struct logfs_block_ops {
307 void (*write_block)(struct logfs_block *block); 311 void (*write_block)(struct logfs_block *block);
308 gc_level_t (*block_level)(struct logfs_block *block);
309 void (*free_block)(struct super_block *sb, struct logfs_block*block); 312 void (*free_block)(struct super_block *sb, struct logfs_block*block);
310 int (*write_alias)(struct super_block *sb, 313 int (*write_alias)(struct super_block *sb,
311 struct logfs_block *block, 314 struct logfs_block *block,
312 write_alias_t *write_one_alias); 315 write_alias_t *write_one_alias);
313}; 316};
314 317
318#define MAX_JOURNAL_ENTRIES 256
319
315struct logfs_super { 320struct logfs_super {
316 struct mtd_info *s_mtd; /* underlying device */ 321 struct mtd_info *s_mtd; /* underlying device */
317 struct block_device *s_bdev; /* underlying device */ 322 struct block_device *s_bdev; /* underlying device */
@@ -378,7 +383,7 @@ struct logfs_super {
378 u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */ 383 u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */
379 u64 s_last_version; 384 u64 s_last_version;
380 struct logfs_area *s_journal_area; /* open journal segment */ 385 struct logfs_area *s_journal_area; /* open journal segment */
381 __be64 s_je_array[64]; 386 __be64 s_je_array[MAX_JOURNAL_ENTRIES];
382 int s_no_je; 387 int s_no_je;
383 388
384 int s_sum_index; /* for the 12 summaries */ 389 int s_sum_index; /* for the 12 summaries */
@@ -722,4 +727,10 @@ static inline struct logfs_area *get_area(struct super_block *sb,
722 return logfs_super(sb)->s_area[(__force u8)gc_level]; 727 return logfs_super(sb)->s_area[(__force u8)gc_level];
723} 728}
724 729
730static inline void logfs_mempool_destroy(mempool_t *pool)
731{
732 if (pool)
733 mempool_destroy(pool);
734}
735
725#endif 736#endif
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index bff40253dfb2..3159db6958e5 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -430,25 +430,6 @@ static void inode_write_block(struct logfs_block *block)
430 } 430 }
431} 431}
432 432
433static gc_level_t inode_block_level(struct logfs_block *block)
434{
435 BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER);
436 return GC_LEVEL(LOGFS_MAX_LEVELS);
437}
438
439static gc_level_t indirect_block_level(struct logfs_block *block)
440{
441 struct page *page;
442 struct inode *inode;
443 u64 bix;
444 level_t level;
445
446 page = block->page;
447 inode = page->mapping->host;
448 logfs_unpack_index(page->index, &bix, &level);
449 return expand_level(inode->i_ino, level);
450}
451
452/* 433/*
453 * This silences a false, yet annoying gcc warning. I hate it when my editor 434 * This silences a false, yet annoying gcc warning. I hate it when my editor
454 * jumps into bitops.h each time I recompile this file. 435 * jumps into bitops.h each time I recompile this file.
@@ -587,14 +568,12 @@ static void indirect_free_block(struct super_block *sb,
587 568
588static struct logfs_block_ops inode_block_ops = { 569static struct logfs_block_ops inode_block_ops = {
589 .write_block = inode_write_block, 570 .write_block = inode_write_block,
590 .block_level = inode_block_level,
591 .free_block = inode_free_block, 571 .free_block = inode_free_block,
592 .write_alias = inode_write_alias, 572 .write_alias = inode_write_alias,
593}; 573};
594 574
595struct logfs_block_ops indirect_block_ops = { 575struct logfs_block_ops indirect_block_ops = {
596 .write_block = indirect_write_block, 576 .write_block = indirect_write_block,
597 .block_level = indirect_block_level,
598 .free_block = indirect_free_block, 577 .free_block = indirect_free_block,
599 .write_alias = indirect_write_alias, 578 .write_alias = indirect_write_alias,
600}; 579};
@@ -1241,6 +1220,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
1241 mempool_free(shadow, super->s_shadow_pool); 1220 mempool_free(shadow, super->s_shadow_pool);
1242} 1221}
1243 1222
1223static void mark_segment(struct shadow_tree *tree, u32 segno)
1224{
1225 int err;
1226
1227 if (!btree_lookup32(&tree->segment_map, segno)) {
1228 err = btree_insert32(&tree->segment_map, segno, (void *)1,
1229 GFP_NOFS);
1230 BUG_ON(err);
1231 tree->no_shadowed_segments++;
1232 }
1233}
1234
1244/** 1235/**
1245 * fill_shadow_tree - Propagate shadow tree changes due to a write 1236 * fill_shadow_tree - Propagate shadow tree changes due to a write
1246 * @inode: Inode owning the page 1237 * @inode: Inode owning the page
@@ -1288,6 +1279,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page,
1288 1279
1289 super->s_dirty_used_bytes += shadow->new_len; 1280 super->s_dirty_used_bytes += shadow->new_len;
1290 super->s_dirty_free_bytes += shadow->old_len; 1281 super->s_dirty_free_bytes += shadow->old_len;
1282 mark_segment(tree, shadow->old_ofs >> super->s_segshift);
1283 mark_segment(tree, shadow->new_ofs >> super->s_segshift);
1291 } 1284 }
1292} 1285}
1293 1286
@@ -1845,19 +1838,37 @@ static int __logfs_truncate(struct inode *inode, u64 size)
1845 return logfs_truncate_direct(inode, size); 1838 return logfs_truncate_direct(inode, size);
1846} 1839}
1847 1840
1848int logfs_truncate(struct inode *inode, u64 size) 1841/*
1842 * Truncate, by changing the segment file, can consume a fair amount
1843 * of resources. So back off from time to time and do some GC.
1844 * 8 or 2048 blocks should be well within safety limits even if
1845 * every single block resided in a different segment.
1846 */
1847#define TRUNCATE_STEP (8 * 1024 * 1024)
1848int logfs_truncate(struct inode *inode, u64 target)
1849{ 1849{
1850 struct super_block *sb = inode->i_sb; 1850 struct super_block *sb = inode->i_sb;
1851 int err; 1851 u64 size = i_size_read(inode);
1852 int err = 0;
1852 1853
1853 logfs_get_wblocks(sb, NULL, 1); 1854 size = ALIGN(size, TRUNCATE_STEP);
1854 err = __logfs_truncate(inode, size); 1855 while (size > target) {
1855 if (!err) 1856 if (size > TRUNCATE_STEP)
1856 err = __logfs_write_inode(inode, 0); 1857 size -= TRUNCATE_STEP;
1857 logfs_put_wblocks(sb, NULL, 1); 1858 else
1859 size = 0;
1860 if (size < target)
1861 size = target;
1862
1863 logfs_get_wblocks(sb, NULL, 1);
1864 err = __logfs_truncate(inode, target);
1865 if (!err)
1866 err = __logfs_write_inode(inode, 0);
1867 logfs_put_wblocks(sb, NULL, 1);
1868 }
1858 1869
1859 if (!err) 1870 if (!err)
1860 err = vmtruncate(inode, size); 1871 err = vmtruncate(inode, target);
1861 1872
1862 /* I don't trust error recovery yet. */ 1873 /* I don't trust error recovery yet. */
1863 WARN_ON(err); 1874 WARN_ON(err);
@@ -2251,8 +2262,6 @@ void logfs_cleanup_rw(struct super_block *sb)
2251 struct logfs_super *super = logfs_super(sb); 2262 struct logfs_super *super = logfs_super(sb);
2252 2263
2253 destroy_meta_inode(super->s_segfile_inode); 2264 destroy_meta_inode(super->s_segfile_inode);
2254 if (super->s_block_pool) 2265 logfs_mempool_destroy(super->s_block_pool);
2255 mempool_destroy(super->s_block_pool); 2266 logfs_mempool_destroy(super->s_shadow_pool);
2256 if (super->s_shadow_pool)
2257 mempool_destroy(super->s_shadow_pool);
2258} 2267}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 801a3a141625..f77ce2b470ba 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -183,14 +183,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
183 return 0; 183 return 0;
184} 184}
185 185
186static gc_level_t btree_block_level(struct logfs_block *block)
187{
188 return expand_level(block->ino, block->level);
189}
190
191static struct logfs_block_ops btree_block_ops = { 186static struct logfs_block_ops btree_block_ops = {
192 .write_block = btree_write_block, 187 .write_block = btree_write_block,
193 .block_level = btree_block_level,
194 .free_block = __free_block, 188 .free_block = __free_block,
195 .write_alias = btree_write_alias, 189 .write_alias = btree_write_alias,
196}; 190};
@@ -919,7 +913,7 @@ err:
919 for (i--; i >= 0; i--) 913 for (i--; i >= 0; i--)
920 free_area(super->s_area[i]); 914 free_area(super->s_area[i]);
921 free_area(super->s_journal_area); 915 free_area(super->s_journal_area);
922 mempool_destroy(super->s_alias_pool); 916 logfs_mempool_destroy(super->s_alias_pool);
923 return -ENOMEM; 917 return -ENOMEM;
924} 918}
925 919
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index b60bfac3263c..5866ee6e1327 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -12,6 +12,7 @@
12#include "logfs.h" 12#include "logfs.h"
13#include <linux/bio.h> 13#include <linux/bio.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/blkdev.h>
15#include <linux/mtd/mtd.h> 16#include <linux/mtd/mtd.h>
16#include <linux/statfs.h> 17#include <linux/statfs.h>
17#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
@@ -137,6 +138,10 @@ static int logfs_sb_set(struct super_block *sb, void *_super)
137 sb->s_fs_info = super; 138 sb->s_fs_info = super;
138 sb->s_mtd = super->s_mtd; 139 sb->s_mtd = super->s_mtd;
139 sb->s_bdev = super->s_bdev; 140 sb->s_bdev = super->s_bdev;
141 if (sb->s_bdev)
142 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
143 if (sb->s_mtd)
144 sb->s_bdi = sb->s_mtd->backing_dev_info;
140 return 0; 145 return 0;
141} 146}
142 147
@@ -452,6 +457,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
452 457
453 btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool); 458 btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
454 btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool); 459 btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
460 btree_init_mempool32(&super->s_shadow_tree.segment_map,
461 super->s_btree_pool);
455 462
456 ret = logfs_init_mapping(sb); 463 ret = logfs_init_mapping(sb);
457 if (ret) 464 if (ret)
@@ -516,8 +523,8 @@ static void logfs_kill_sb(struct super_block *sb)
516 if (super->s_erase_page) 523 if (super->s_erase_page)
517 __free_page(super->s_erase_page); 524 __free_page(super->s_erase_page);
518 super->s_devops->put_device(sb); 525 super->s_devops->put_device(sb);
519 mempool_destroy(super->s_btree_pool); 526 logfs_mempool_destroy(super->s_btree_pool);
520 mempool_destroy(super->s_alias_pool); 527 logfs_mempool_destroy(super->s_alias_pool);
521 kfree(super); 528 kfree(super);
522 log_super("LogFS: Finished unmounting\n"); 529 log_super("LogFS: Finished unmounting\n");
523} 530}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index cf98da1be23e..fa3385154023 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
526 sb->s_blocksize_bits = 10; 526 sb->s_blocksize_bits = 10;
527 sb->s_magic = NCP_SUPER_MAGIC; 527 sb->s_magic = NCP_SUPER_MAGIC;
528 sb->s_op = &ncp_sops; 528 sb->s_op = &ncp_sops;
529 sb->s_bdi = &server->bdi;
529 530
530 server = NCP_SBP(sb); 531 server = NCP_SBP(sb);
531 memset(server, 0, sizeof(*server)); 532 memset(server, 0, sizeof(*server));
532 533
534 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
535 if (error)
536 goto out_bdi;
537
533 server->ncp_filp = ncp_filp; 538 server->ncp_filp = ncp_filp;
534 server->ncp_sock = sock; 539 server->ncp_sock = sock;
535 540
@@ -719,6 +724,8 @@ out_fput2:
719 if (server->info_filp) 724 if (server->info_filp)
720 fput(server->info_filp); 725 fput(server->info_filp);
721out_fput: 726out_fput:
727 bdi_destroy(&server->bdi);
728out_bdi:
722 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: 729 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
723 * 730 *
724 * The previously used put_filp(ncp_filp); was bogous, since 731 * The previously used put_filp(ncp_filp); was bogous, since
@@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb)
756 kill_pid(server->m.wdog_pid, SIGTERM, 1); 763 kill_pid(server->m.wdog_pid, SIGTERM, 1);
757 put_pid(server->m.wdog_pid); 764 put_pid(server->m.wdog_pid);
758 765
766 bdi_destroy(&server->bdi);
759 kfree(server->priv.data); 767 kfree(server->priv.data);
760 kfree(server->auth.object_name); 768 kfree(server->auth.object_name);
761 vfree(server->rxbuf); 769 vfree(server->rxbuf);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2a3d352c0bff..a8766c4ef2e0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1294,7 +1294,8 @@ static int nfs4_init_server(struct nfs_server *server,
1294 1294
1295 /* Initialise the client representation from the mount data */ 1295 /* Initialise the client representation from the mount data */
1296 server->flags = data->flags; 1296 server->flags = data->flags;
1297 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; 1297 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|
1298 NFS_CAP_POSIX_LOCK;
1298 server->options = data->options; 1299 server->options = data->options;
1299 1300
1300 /* Get a client record */ 1301 /* Get a client record */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c6f2750648f4..db3ad849a289 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -837,6 +837,8 @@ out_zap_parent:
837 /* If we have submounts, don't unhash ! */ 837 /* If we have submounts, don't unhash ! */
838 if (have_submounts(dentry)) 838 if (have_submounts(dentry))
839 goto out_valid; 839 goto out_valid;
840 if (dentry->d_flags & DCACHE_DISCONNECTED)
841 goto out_valid;
840 shrink_dcache_parent(dentry); 842 shrink_dcache_parent(dentry);
841 } 843 }
842 d_drop(dentry); 844 d_drop(dentry);
@@ -1025,12 +1027,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1025 res = NULL; 1027 res = NULL;
1026 goto out; 1028 goto out;
1027 /* This turned out not to be a regular file */ 1029 /* This turned out not to be a regular file */
1030 case -EISDIR:
1028 case -ENOTDIR: 1031 case -ENOTDIR:
1029 goto no_open; 1032 goto no_open;
1030 case -ELOOP: 1033 case -ELOOP:
1031 if (!(nd->intent.open.flags & O_NOFOLLOW)) 1034 if (!(nd->intent.open.flags & O_NOFOLLOW))
1032 goto no_open; 1035 goto no_open;
1033 /* case -EISDIR: */
1034 /* case -EINVAL: */ 1036 /* case -EINVAL: */
1035 default: 1037 default:
1036 goto out; 1038 goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 737128f777f3..50a56edca0b5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -623,10 +623,10 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
623 list_for_each_entry(pos, &nfsi->open_files, list) { 623 list_for_each_entry(pos, &nfsi->open_files, list) {
624 if (cred != NULL && pos->cred != cred) 624 if (cred != NULL && pos->cred != cred)
625 continue; 625 continue;
626 if ((pos->mode & mode) == mode) { 626 if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
627 ctx = get_nfs_open_context(pos); 627 continue;
628 break; 628 ctx = get_nfs_open_context(pos);
629 } 629 break;
630 } 630 }
631 spin_unlock(&inode->i_lock); 631 spin_unlock(&inode->i_lock);
632 return ctx; 632 return ctx;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fe0cd9eb1d4d..638067007c65 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1523,6 +1523,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1523 nfs_post_op_update_inode(dir, o_res->dir_attr); 1523 nfs_post_op_update_inode(dir, o_res->dir_attr);
1524 } else 1524 } else
1525 nfs_refresh_inode(dir, o_res->dir_attr); 1525 nfs_refresh_inode(dir, o_res->dir_attr);
1526 if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
1527 server->caps &= ~NFS_CAP_POSIX_LOCK;
1526 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { 1528 if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
1527 status = _nfs4_proc_open_confirm(data); 1529 status = _nfs4_proc_open_confirm(data);
1528 if (status != 0) 1530 if (status != 0)
@@ -1664,7 +1666,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1664 status = PTR_ERR(state); 1666 status = PTR_ERR(state);
1665 if (IS_ERR(state)) 1667 if (IS_ERR(state))
1666 goto err_opendata_put; 1668 goto err_opendata_put;
1667 if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) 1669 if (server->caps & NFS_CAP_POSIX_LOCK)
1668 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); 1670 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1669 nfs4_opendata_put(opendata); 1671 nfs4_opendata_put(opendata);
1670 nfs4_put_state_owner(sp); 1672 nfs4_put_state_owner(sp);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53ff70e23993..de38d63aa920 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -201,6 +201,7 @@ static int nfs_set_page_writeback(struct page *page)
201 struct inode *inode = page->mapping->host; 201 struct inode *inode = page->mapping->host;
202 struct nfs_server *nfss = NFS_SERVER(inode); 202 struct nfs_server *nfss = NFS_SERVER(inode);
203 203
204 page_cache_get(page);
204 if (atomic_long_inc_return(&nfss->writeback) > 205 if (atomic_long_inc_return(&nfss->writeback) >
205 NFS_CONGESTION_ON_THRESH) { 206 NFS_CONGESTION_ON_THRESH) {
206 set_bdi_congested(&nfss->backing_dev_info, 207 set_bdi_congested(&nfss->backing_dev_info,
@@ -216,6 +217,7 @@ static void nfs_end_page_writeback(struct page *page)
216 struct nfs_server *nfss = NFS_SERVER(inode); 217 struct nfs_server *nfss = NFS_SERVER(inode);
217 218
218 end_page_writeback(page); 219 end_page_writeback(page);
220 page_cache_release(page);
219 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 221 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
220 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 222 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
221} 223}
@@ -421,6 +423,7 @@ static void
421nfs_mark_request_dirty(struct nfs_page *req) 423nfs_mark_request_dirty(struct nfs_page *req)
422{ 424{
423 __set_page_dirty_nobuffers(req->wb_page); 425 __set_page_dirty_nobuffers(req->wb_page);
426 __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC);
424} 427}
425 428
426#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 429#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -660,9 +663,11 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
660 req = nfs_setup_write_request(ctx, page, offset, count); 663 req = nfs_setup_write_request(ctx, page, offset, count);
661 if (IS_ERR(req)) 664 if (IS_ERR(req))
662 return PTR_ERR(req); 665 return PTR_ERR(req);
666 nfs_mark_request_dirty(req);
663 /* Update file length */ 667 /* Update file length */
664 nfs_grow_file(page, offset, count); 668 nfs_grow_file(page, offset, count);
665 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 669 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
670 nfs_mark_request_dirty(req);
666 nfs_clear_page_tag_locked(req); 671 nfs_clear_page_tag_locked(req);
667 return 0; 672 return 0;
668} 673}
@@ -739,8 +744,6 @@ int nfs_updatepage(struct file *file, struct page *page,
739 status = nfs_writepage_setup(ctx, page, offset, count); 744 status = nfs_writepage_setup(ctx, page, offset, count);
740 if (status < 0) 745 if (status < 0)
741 nfs_set_pageerror(page); 746 nfs_set_pageerror(page);
742 else
743 __set_page_dirty_nobuffers(page);
744 747
745 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", 748 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
746 status, (long long)i_size_read(inode)); 749 status, (long long)i_size_read(inode));
@@ -749,13 +752,12 @@ int nfs_updatepage(struct file *file, struct page *page,
749 752
750static void nfs_writepage_release(struct nfs_page *req) 753static void nfs_writepage_release(struct nfs_page *req)
751{ 754{
755 struct page *page = req->wb_page;
752 756
753 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { 757 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req))
754 nfs_end_page_writeback(req->wb_page);
755 nfs_inode_remove_request(req); 758 nfs_inode_remove_request(req);
756 } else
757 nfs_end_page_writeback(req->wb_page);
758 nfs_clear_page_tag_locked(req); 759 nfs_clear_page_tag_locked(req);
760 nfs_end_page_writeback(page);
759} 761}
760 762
761static int flush_task_priority(int how) 763static int flush_task_priority(int how)
@@ -779,7 +781,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
779 int how) 781 int how)
780{ 782{
781 struct inode *inode = req->wb_context->path.dentry->d_inode; 783 struct inode *inode = req->wb_context->path.dentry->d_inode;
782 int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
783 int priority = flush_task_priority(how); 784 int priority = flush_task_priority(how);
784 struct rpc_task *task; 785 struct rpc_task *task;
785 struct rpc_message msg = { 786 struct rpc_message msg = {
@@ -794,9 +795,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
794 .callback_ops = call_ops, 795 .callback_ops = call_ops,
795 .callback_data = data, 796 .callback_data = data,
796 .workqueue = nfsiod_workqueue, 797 .workqueue = nfsiod_workqueue,
797 .flags = flags, 798 .flags = RPC_TASK_ASYNC,
798 .priority = priority, 799 .priority = priority,
799 }; 800 };
801 int ret = 0;
800 802
801 /* Set up the RPC argument and reply structs 803 /* Set up the RPC argument and reply structs
802 * NB: take care not to mess about with data->commit et al. */ 804 * NB: take care not to mess about with data->commit et al. */
@@ -835,10 +837,18 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
835 (unsigned long long)data->args.offset); 837 (unsigned long long)data->args.offset);
836 838
837 task = rpc_run_task(&task_setup_data); 839 task = rpc_run_task(&task_setup_data);
838 if (IS_ERR(task)) 840 if (IS_ERR(task)) {
839 return PTR_ERR(task); 841 ret = PTR_ERR(task);
842 goto out;
843 }
844 if (how & FLUSH_SYNC) {
845 ret = rpc_wait_for_completion_task(task);
846 if (ret == 0)
847 ret = task->tk_status;
848 }
840 rpc_put_task(task); 849 rpc_put_task(task);
841 return 0; 850out:
851 return ret;
842} 852}
843 853
844/* If a nfs_flush_* function fails, it should remove reqs from @head and 854/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -847,9 +857,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
847 */ 857 */
848static void nfs_redirty_request(struct nfs_page *req) 858static void nfs_redirty_request(struct nfs_page *req)
849{ 859{
860 struct page *page = req->wb_page;
861
850 nfs_mark_request_dirty(req); 862 nfs_mark_request_dirty(req);
851 nfs_end_page_writeback(req->wb_page);
852 nfs_clear_page_tag_locked(req); 863 nfs_clear_page_tag_locked(req);
864 nfs_end_page_writeback(page);
853} 865}
854 866
855/* 867/*
@@ -1084,16 +1096,15 @@ static void nfs_writeback_release_full(void *calldata)
1084 if (nfs_write_need_commit(data)) { 1096 if (nfs_write_need_commit(data)) {
1085 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1097 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1086 nfs_mark_request_commit(req); 1098 nfs_mark_request_commit(req);
1087 nfs_end_page_writeback(page);
1088 dprintk(" marked for commit\n"); 1099 dprintk(" marked for commit\n");
1089 goto next; 1100 goto next;
1090 } 1101 }
1091 dprintk(" OK\n"); 1102 dprintk(" OK\n");
1092remove_request: 1103remove_request:
1093 nfs_end_page_writeback(page);
1094 nfs_inode_remove_request(req); 1104 nfs_inode_remove_request(req);
1095 next: 1105 next:
1096 nfs_clear_page_tag_locked(req); 1106 nfs_clear_page_tag_locked(req);
1107 nfs_end_page_writeback(page);
1097 } 1108 }
1098 nfs_writedata_release(calldata); 1109 nfs_writedata_release(calldata);
1099} 1110}
@@ -1207,7 +1218,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1207{ 1218{
1208 struct nfs_page *first = nfs_list_entry(head->next); 1219 struct nfs_page *first = nfs_list_entry(head->next);
1209 struct inode *inode = first->wb_context->path.dentry->d_inode; 1220 struct inode *inode = first->wb_context->path.dentry->d_inode;
1210 int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
1211 int priority = flush_task_priority(how); 1221 int priority = flush_task_priority(how);
1212 struct rpc_task *task; 1222 struct rpc_task *task;
1213 struct rpc_message msg = { 1223 struct rpc_message msg = {
@@ -1222,7 +1232,7 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1222 .callback_ops = &nfs_commit_ops, 1232 .callback_ops = &nfs_commit_ops,
1223 .callback_data = data, 1233 .callback_data = data,
1224 .workqueue = nfsiod_workqueue, 1234 .workqueue = nfsiod_workqueue,
1225 .flags = flags, 1235 .flags = RPC_TASK_ASYNC,
1226 .priority = priority, 1236 .priority = priority,
1227 }; 1237 };
1228 1238
@@ -1252,6 +1262,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1252 task = rpc_run_task(&task_setup_data); 1262 task = rpc_run_task(&task_setup_data);
1253 if (IS_ERR(task)) 1263 if (IS_ERR(task))
1254 return PTR_ERR(task); 1264 return PTR_ERR(task);
1265 if (how & FLUSH_SYNC)
1266 rpc_wait_for_completion_task(task);
1255 rpc_put_task(task); 1267 rpc_put_task(task);
1256 return 0; 1268 return 0;
1257} 1269}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e1703175ee28..34ccf815ea8a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -161,10 +161,10 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
161 argp->p = page_address(argp->pagelist[0]); 161 argp->p = page_address(argp->pagelist[0]);
162 argp->pagelist++; 162 argp->pagelist++;
163 if (argp->pagelen < PAGE_SIZE) { 163 if (argp->pagelen < PAGE_SIZE) {
164 argp->end = p + (argp->pagelen>>2); 164 argp->end = argp->p + (argp->pagelen>>2);
165 argp->pagelen = 0; 165 argp->pagelen = 0;
166 } else { 166 } else {
167 argp->end = p + (PAGE_SIZE>>2); 167 argp->end = argp->p + (PAGE_SIZE>>2);
168 argp->pagelen -= PAGE_SIZE; 168 argp->pagelen -= PAGE_SIZE;
169 } 169 }
170 memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); 170 memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
@@ -1426,10 +1426,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1426 argp->p = page_address(argp->pagelist[0]); 1426 argp->p = page_address(argp->pagelist[0]);
1427 argp->pagelist++; 1427 argp->pagelist++;
1428 if (argp->pagelen < PAGE_SIZE) { 1428 if (argp->pagelen < PAGE_SIZE) {
1429 argp->end = p + (argp->pagelen>>2); 1429 argp->end = argp->p + (argp->pagelen>>2);
1430 argp->pagelen = 0; 1430 argp->pagelen = 0;
1431 } else { 1431 } else {
1432 argp->end = p + (PAGE_SIZE>>2); 1432 argp->end = argp->p + (PAGE_SIZE>>2);
1433 argp->pagelen -= PAGE_SIZE; 1433 argp->pagelen -= PAGE_SIZE;
1434 } 1434 }
1435 } 1435 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7621db800a74..8418fcc0a6ab 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2909,7 +2909,7 @@ out_no_task:
2909 */ 2909 */
2910static const struct pid_entry tid_base_stuff[] = { 2910static const struct pid_entry tid_base_stuff[] = {
2911 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2911 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2912 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), 2912 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2913 REG("environ", S_IRUSR, proc_environ_operations), 2913 REG("environ", S_IRUSR, proc_environ_operations),
2914 INF("auxv", S_IRUSR, proc_pid_auxv), 2914 INF("auxv", S_IRUSR, proc_pid_auxv),
2915 ONE("status", S_IRUGO, proc_pid_status), 2915 ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index dad7fb247ddc..3e21b1e2ad3a 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -33,6 +33,14 @@ config PRINT_QUOTA_WARNING
33 Note that this behavior is currently deprecated and may go away in 33 Note that this behavior is currently deprecated and may go away in
34 future. Please use notification via netlink socket instead. 34 future. Please use notification via netlink socket instead.
35 35
36config QUOTA_DEBUG
37 bool "Additional quota sanity checks"
38 depends on QUOTA
39 default n
40 help
41 If you say Y here, quota subsystem will perform some additional
42 sanity checks of quota internal structures. If unsure, say N.
43
36# Generic support for tree structured quota files. Selected when needed. 44# Generic support for tree structured quota files. Selected when needed.
37config QUOTA_TREE 45config QUOTA_TREE
38 tristate 46 tristate
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a0a9405b202a..788b5802a7ce 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -80,8 +80,6 @@
80 80
81#include <asm/uaccess.h> 81#include <asm/uaccess.h>
82 82
83#define __DQUOT_PARANOIA
84
85/* 83/*
86 * There are three quota SMP locks. dq_list_lock protects all lists with quotas 84 * There are three quota SMP locks. dq_list_lock protects all lists with quotas
87 * and quota formats, dqstats structure containing statistics about the lists 85 * and quota formats, dqstats structure containing statistics about the lists
@@ -695,7 +693,7 @@ void dqput(struct dquot *dquot)
695 693
696 if (!dquot) 694 if (!dquot)
697 return; 695 return;
698#ifdef __DQUOT_PARANOIA 696#ifdef CONFIG_QUOTA_DEBUG
699 if (!atomic_read(&dquot->dq_count)) { 697 if (!atomic_read(&dquot->dq_count)) {
700 printk("VFS: dqput: trying to free free dquot\n"); 698 printk("VFS: dqput: trying to free free dquot\n");
701 printk("VFS: device %s, dquot of %s %d\n", 699 printk("VFS: device %s, dquot of %s %d\n",
@@ -748,7 +746,7 @@ we_slept:
748 goto we_slept; 746 goto we_slept;
749 } 747 }
750 atomic_dec(&dquot->dq_count); 748 atomic_dec(&dquot->dq_count);
751#ifdef __DQUOT_PARANOIA 749#ifdef CONFIG_QUOTA_DEBUG
752 /* sanity check */ 750 /* sanity check */
753 BUG_ON(!list_empty(&dquot->dq_free)); 751 BUG_ON(!list_empty(&dquot->dq_free));
754#endif 752#endif
@@ -845,7 +843,7 @@ we_slept:
845 dquot = NULL; 843 dquot = NULL;
846 goto out; 844 goto out;
847 } 845 }
848#ifdef __DQUOT_PARANOIA 846#ifdef CONFIG_QUOTA_DEBUG
849 BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ 847 BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */
850#endif 848#endif
851out: 849out:
@@ -874,7 +872,7 @@ static int dqinit_needed(struct inode *inode, int type)
874static void add_dquot_ref(struct super_block *sb, int type) 872static void add_dquot_ref(struct super_block *sb, int type)
875{ 873{
876 struct inode *inode, *old_inode = NULL; 874 struct inode *inode, *old_inode = NULL;
877#ifdef __DQUOT_PARANOIA 875#ifdef CONFIG_QUOTA_DEBUG
878 int reserved = 0; 876 int reserved = 0;
879#endif 877#endif
880 878
@@ -882,7 +880,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
882 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 880 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
883 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 881 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
884 continue; 882 continue;
885#ifdef __DQUOT_PARANOIA 883#ifdef CONFIG_QUOTA_DEBUG
886 if (unlikely(inode_get_rsv_space(inode) > 0)) 884 if (unlikely(inode_get_rsv_space(inode) > 0))
887 reserved = 1; 885 reserved = 1;
888#endif 886#endif
@@ -907,7 +905,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
907 spin_unlock(&inode_lock); 905 spin_unlock(&inode_lock);
908 iput(old_inode); 906 iput(old_inode);
909 907
910#ifdef __DQUOT_PARANOIA 908#ifdef CONFIG_QUOTA_DEBUG
911 if (reserved) { 909 if (reserved) {
912 printk(KERN_WARNING "VFS (%s): Writes happened before quota" 910 printk(KERN_WARNING "VFS (%s): Writes happened before quota"
913 " was turned on thus quota information is probably " 911 " was turned on thus quota information is probably "
@@ -940,7 +938,7 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
940 inode->i_dquot[type] = NULL; 938 inode->i_dquot[type] = NULL;
941 if (dquot) { 939 if (dquot) {
942 if (dqput_blocks(dquot)) { 940 if (dqput_blocks(dquot)) {
943#ifdef __DQUOT_PARANOIA 941#ifdef CONFIG_QUOTA_DEBUG
944 if (atomic_read(&dquot->dq_count) != 1) 942 if (atomic_read(&dquot->dq_count) != 1)
945 printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); 943 printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count));
946#endif 944#endif
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index f8a6075abf50..07930449a958 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -46,8 +46,6 @@ static inline bool is_privroot_deh(struct dentry *dir,
46 struct reiserfs_de_head *deh) 46 struct reiserfs_de_head *deh)
47{ 47{
48 struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; 48 struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
49 if (reiserfs_expose_privroot(dir->d_sb))
50 return 0;
51 return (dir == dir->d_parent && privroot->d_inode && 49 return (dir == dir->d_parent && privroot->d_inode &&
52 deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); 50 deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
53} 51}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4f9586bb7631..e7cc00e636dc 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -554,7 +554,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
554 if (!err && new_size < i_size_read(dentry->d_inode)) { 554 if (!err && new_size < i_size_read(dentry->d_inode)) {
555 struct iattr newattrs = { 555 struct iattr newattrs = {
556 .ia_ctime = current_fs_time(inode->i_sb), 556 .ia_ctime = current_fs_time(inode->i_sb),
557 .ia_size = buffer_size, 557 .ia_size = new_size,
558 .ia_valid = ATTR_SIZE | ATTR_CTIME, 558 .ia_valid = ATTR_SIZE | ATTR_CTIME,
559 }; 559 };
560 560
@@ -973,21 +973,13 @@ int reiserfs_permission(struct inode *inode, int mask)
973 return generic_permission(inode, mask, NULL); 973 return generic_permission(inode, mask, NULL);
974} 974}
975 975
976/* This will catch lookups from the fs root to .reiserfs_priv */ 976static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
977static int
978xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
979{ 977{
980 struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; 978 return -EPERM;
981 if (container_of(q1, struct dentry, d_name) == priv_root)
982 return -ENOENT;
983 if (q1->len == name->len &&
984 !memcmp(q1->name, name->name, name->len))
985 return 0;
986 return 1;
987} 979}
988 980
989static const struct dentry_operations xattr_lookup_poison_ops = { 981static const struct dentry_operations xattr_lookup_poison_ops = {
990 .d_compare = xattr_lookup_poison, 982 .d_revalidate = xattr_hide_revalidate,
991}; 983};
992 984
993int reiserfs_lookup_privroot(struct super_block *s) 985int reiserfs_lookup_privroot(struct super_block *s)
@@ -1001,8 +993,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
1001 strlen(PRIVROOT_NAME)); 993 strlen(PRIVROOT_NAME));
1002 if (!IS_ERR(dentry)) { 994 if (!IS_ERR(dentry)) {
1003 REISERFS_SB(s)->priv_root = dentry; 995 REISERFS_SB(s)->priv_root = dentry;
1004 if (!reiserfs_expose_privroot(s)) 996 dentry->d_op = &xattr_lookup_poison_ops;
1005 s->s_root->d_op = &xattr_lookup_poison_ops;
1006 if (dentry->d_inode) 997 if (dentry->d_inode)
1007 dentry->d_inode->i_flags |= S_PRIVATE; 998 dentry->d_inode->i_flags |= S_PRIVATE;
1008 } else 999 } else
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1c4c8f089970..dfa1d67f8fca 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb)
479 if (server->conn_pid) 479 if (server->conn_pid)
480 kill_pid(server->conn_pid, SIGTERM, 1); 480 kill_pid(server->conn_pid, SIGTERM, 1);
481 481
482 bdi_destroy(&server->bdi);
482 kfree(server->ops); 483 kfree(server->ops);
483 smb_unload_nls(server); 484 smb_unload_nls(server);
484 sb->s_fs_info = NULL; 485 sb->s_fs_info = NULL;
@@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
525 if (!server) 526 if (!server)
526 goto out_no_server; 527 goto out_no_server;
527 sb->s_fs_info = server; 528 sb->s_fs_info = server;
529
530 if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
531 goto out_bdi;
532
533 sb->s_bdi = &server->bdi;
528 534
529 server->super_block = sb; 535 server->super_block = sb;
530 server->mnt = NULL; 536 server->mnt = NULL;
@@ -624,6 +630,8 @@ out_no_smbiod:
624out_bad_option: 630out_bad_option:
625 kfree(mem); 631 kfree(mem);
626out_no_mem: 632out_no_mem:
633 bdi_destroy(&server->bdi);
634out_bdi:
627 if (!server->mnt) 635 if (!server->mnt)
628 printk(KERN_ERR "smb_fill_super: allocation failure\n"); 636 printk(KERN_ERR "smb_fill_super: allocation failure\n");
629 sb->s_fs_info = NULL; 637 sb->s_fs_info = NULL;
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 1cb0d81b164b..653c030eb840 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -87,9 +87,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
87 u64 cur_index = index >> msblk->devblksize_log2; 87 u64 cur_index = index >> msblk->devblksize_log2;
88 int bytes, compressed, b = 0, k = 0, page = 0, avail; 88 int bytes, compressed, b = 0, k = 0, page = 0, avail;
89 89
90 90 bh = kcalloc(((srclength + msblk->devblksize - 1)
91 bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1, 91 >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
92 sizeof(*bh), GFP_KERNEL);
93 if (bh == NULL) 92 if (bh == NULL)
94 return -ENOMEM; 93 return -ENOMEM;
95 94
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3550aec2f655..48b6f4a385a6 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -275,7 +275,8 @@ allocate_root:
275 275
276 err = squashfs_read_inode(root, root_inode); 276 err = squashfs_read_inode(root, root_inode);
277 if (err) { 277 if (err) {
278 iget_failed(root); 278 make_bad_inode(root);
279 iput(root);
279 goto failed_mount; 280 goto failed_mount;
280 } 281 }
281 insert_inode_hash(root); 282 insert_inode_hash(root);
@@ -353,6 +354,7 @@ static void squashfs_put_super(struct super_block *sb)
353 kfree(sbi->id_table); 354 kfree(sbi->id_table);
354 kfree(sbi->fragment_index); 355 kfree(sbi->fragment_index);
355 kfree(sbi->meta_index); 356 kfree(sbi->meta_index);
357 kfree(sbi->inode_lookup_table);
356 kfree(sb->s_fs_info); 358 kfree(sb->s_fs_info);
357 sb->s_fs_info = NULL; 359 sb->s_fs_info = NULL;
358 } 360 }
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 15a03d0fb9f3..7a603874e483 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -128,8 +128,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
128 goto release_mutex; 128 goto release_mutex;
129 } 129 }
130 130
131 length = stream->total_out;
131 mutex_unlock(&msblk->read_data_mutex); 132 mutex_unlock(&msblk->read_data_mutex);
132 return stream->total_out; 133 return length;
133 134
134release_mutex: 135release_mutex:
135 mutex_unlock(&msblk->read_data_mutex); 136 mutex_unlock(&msblk->read_data_mutex);
diff --git a/fs/super.c b/fs/super.c
index f35ac6022109..dc72491a19f9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -693,6 +693,7 @@ int set_anon_super(struct super_block *s, void *data)
693 return -EMFILE; 693 return -EMFILE;
694 } 694 }
695 s->s_dev = MKDEV(0, dev & MINORMASK); 695 s->s_dev = MKDEV(0, dev & MINORMASK);
696 s->s_bdi = &noop_backing_dev_info;
696 return 0; 697 return 0;
697} 698}
698 699
@@ -954,10 +955,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
954 if (error < 0) 955 if (error < 0)
955 goto out_free_secdata; 956 goto out_free_secdata;
956 BUG_ON(!mnt->mnt_sb); 957 BUG_ON(!mnt->mnt_sb);
958 WARN_ON(!mnt->mnt_sb->s_bdi);
957 959
958 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); 960 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
959 if (error) 961 if (error)
960 goto out_sb; 962 goto out_sb;
961 963
962 /* 964 /*
963 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 965 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
diff --git a/fs/sync.c b/fs/sync.c
index fc5c3d75cf3c..92b228176f7c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,6 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/quotaops.h> 15#include <linux/quotaops.h>
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
17#include <linux/backing-dev.h>
17#include "internal.h" 18#include "internal.h"
18 19
19#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ 20#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
@@ -32,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
32 * This should be safe, as we require bdi backing to actually 33 * This should be safe, as we require bdi backing to actually
33 * write out data in the first place 34 * write out data in the first place
34 */ 35 */
35 if (!sb->s_bdi) 36 if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
36 return 0; 37 return 0;
37 38
38 if (sb->s_qcop && sb->s_qcop->quota_sync) 39 if (sb->s_qcop && sb->s_qcop->quota_sync)
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 05cd85317f6f..fd9698215759 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -820,10 +820,10 @@ xfs_reclaim_inode(
820 * call into reclaim to find it in a clean state instead of waiting for 820 * call into reclaim to find it in a clean state instead of waiting for
821 * it now. We also don't return errors here - if the error is transient 821 * it now. We also don't return errors here - if the error is transient
822 * then the next reclaim pass will flush the inode, and if the error 822 * then the next reclaim pass will flush the inode, and if the error
823 * is permanent then the next sync reclaim will relcaim the inode and 823 * is permanent then the next sync reclaim will reclaim the inode and
824 * pass on the error. 824 * pass on the error.
825 */ 825 */
826 if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { 826 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
827 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 827 xfs_fs_cmn_err(CE_WARN, ip->i_mount,
828 "inode 0x%llx background reclaim flush failed with %d", 828 "inode 0x%llx background reclaim flush failed with %d",
829 (long long)ip->i_ino, error); 829 (long long)ip->i_ino, error);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index cd27c9d6c71f..5bba29a07812 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -177,16 +177,26 @@ xfs_swap_extents_check_format(
177 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) 177 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
178 return EINVAL; 178 return EINVAL;
179 179
180 /* Check root block of temp in btree form to max in target */ 180 /*
181 * If we are in a btree format, check that the temp root block will fit
182 * in the target and that it has enough extents to be in btree format
183 * in the target.
184 *
185 * Note that we have to be careful to allow btree->extent conversions
186 * (a common defrag case) which will occur when the temp inode is in
187 * extent format...
188 */
181 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && 189 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(ip) && 190 ((XFS_IFORK_BOFF(ip) &&
183 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) 191 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) ||
192 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max))
184 return EINVAL; 193 return EINVAL;
185 194
186 /* Check root block of target in btree form to max in temp */ 195 /* Reciprocal target->temp btree format checks */
187 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && 196 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
188 XFS_IFORK_BOFF(tip) && 197 ((XFS_IFORK_BOFF(tip) &&
189 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) 198 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) ||
199 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max))
190 return EINVAL; 200 return EINVAL;
191 201
192 return 0; 202 return 0;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e8fba92d7cd9..2be019136287 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -745,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t *mp,
745 745
746/* 746/*
747 * Determine if we have a transaction that has gone to disk 747 * Determine if we have a transaction that has gone to disk
748 * that needs to be covered. Log activity needs to be idle (no AIL and 748 * that needs to be covered. To begin the transition to the idle state
749 * nothing in the iclogs). And, we need to be in the right state indicating 749 * firstly the log needs to be idle (no AIL and nothing in the iclogs).
750 * something has gone out. 750 * If we are then in a state where covering is needed, the caller is informed
751 * that dummy transactions are required to move the log into the idle state.
752 *
753 * Because this is called as part of the sync process, we should also indicate
754 * that dummy transactions should be issued in anything but the covered or
755 * idle states. This ensures that the log tail is accurately reflected in
756 * the log at the end of the sync, hence if a crash occurrs avoids replay
757 * of transactions where the metadata is already on disk.
751 */ 758 */
752int 759int
753xfs_log_need_covered(xfs_mount_t *mp) 760xfs_log_need_covered(xfs_mount_t *mp)
@@ -759,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp)
759 return 0; 766 return 0;
760 767
761 spin_lock(&log->l_icloglock); 768 spin_lock(&log->l_icloglock);
762 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || 769 switch (log->l_covered_state) {
763 (log->l_covered_state == XLOG_STATE_COVER_NEED2)) 770 case XLOG_STATE_COVER_DONE:
764 && !xfs_trans_ail_tail(log->l_ailp) 771 case XLOG_STATE_COVER_DONE2:
765 && xlog_iclogs_empty(log)) { 772 case XLOG_STATE_COVER_IDLE:
766 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 773 break;
767 log->l_covered_state = XLOG_STATE_COVER_DONE; 774 case XLOG_STATE_COVER_NEED:
768 else { 775 case XLOG_STATE_COVER_NEED2:
769 ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2); 776 if (!xfs_trans_ail_tail(log->l_ailp) &&
770 log->l_covered_state = XLOG_STATE_COVER_DONE2; 777 xlog_iclogs_empty(log)) {
778 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
779 log->l_covered_state = XLOG_STATE_COVER_DONE;
780 else
781 log->l_covered_state = XLOG_STATE_COVER_DONE2;
771 } 782 }
783 /* FALLTHRU */
784 default:
772 needed = 1; 785 needed = 1;
786 break;
773 } 787 }
774 spin_unlock(&log->l_icloglock); 788 spin_unlock(&log->l_icloglock);
775 return needed; 789 return needed;