diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2010-05-19 13:12:41 -0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2010-05-19 13:12:41 -0400 |
commit | 8d0bc2b456103a34c11e01305cd1aed1cde579e5 (patch) | |
tree | 5e1e6ad55cc9e2b5c5617f6f320114b8cff9e3f3 /fs | |
parent | 30ba3ead05763b172acaa65ae1be71af2a878940 (diff) | |
parent | e40152ee1e1c7a63f4777791863215e3faa37a86 (diff) |
Merge commit 'v2.6.34' into next
Diffstat (limited to 'fs')
115 files changed, 1433 insertions, 780 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 5c5bc8480070..f8b86e92cd66 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -238,6 +238,13 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
238 | return ERR_PTR(-ENOMEM); | 238 | return ERR_PTR(-ENOMEM); |
239 | } | 239 | } |
240 | 240 | ||
241 | rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY); | ||
242 | if (rc) { | ||
243 | __putname(v9ses->aname); | ||
244 | __putname(v9ses->uname); | ||
245 | return ERR_PTR(rc); | ||
246 | } | ||
247 | |||
241 | spin_lock(&v9fs_sessionlist_lock); | 248 | spin_lock(&v9fs_sessionlist_lock); |
242 | list_add(&v9ses->slist, &v9fs_sessionlist); | 249 | list_add(&v9ses->slist, &v9fs_sessionlist); |
243 | spin_unlock(&v9fs_sessionlist_lock); | 250 | spin_unlock(&v9fs_sessionlist_lock); |
@@ -301,6 +308,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
301 | return fid; | 308 | return fid; |
302 | 309 | ||
303 | error: | 310 | error: |
311 | bdi_destroy(&v9ses->bdi); | ||
304 | return ERR_PTR(retval); | 312 | return ERR_PTR(retval); |
305 | } | 313 | } |
306 | 314 | ||
@@ -326,6 +334,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) | |||
326 | __putname(v9ses->uname); | 334 | __putname(v9ses->uname); |
327 | __putname(v9ses->aname); | 335 | __putname(v9ses->aname); |
328 | 336 | ||
337 | bdi_destroy(&v9ses->bdi); | ||
338 | |||
329 | spin_lock(&v9fs_sessionlist_lock); | 339 | spin_lock(&v9fs_sessionlist_lock); |
330 | list_del(&v9ses->slist); | 340 | list_del(&v9ses->slist); |
331 | spin_unlock(&v9fs_sessionlist_lock); | 341 | spin_unlock(&v9fs_sessionlist_lock); |
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index a0a8d3dd1361..bec4d0bcb458 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h | |||
@@ -20,6 +20,7 @@ | |||
20 | * Boston, MA 02111-1301 USA | 20 | * Boston, MA 02111-1301 USA |
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | #include <linux/backing-dev.h> | ||
23 | 24 | ||
24 | /** | 25 | /** |
25 | * enum p9_session_flags - option flags for each 9P session | 26 | * enum p9_session_flags - option flags for each 9P session |
@@ -102,6 +103,7 @@ struct v9fs_session_info { | |||
102 | u32 uid; /* if ACCESS_SINGLE, the uid that has access */ | 103 | u32 uid; /* if ACCESS_SINGLE, the uid that has access */ |
103 | struct p9_client *clnt; /* 9p client */ | 104 | struct p9_client *clnt; /* 9p client */ |
104 | struct list_head slist; /* list of sessions registered with v9fs */ | 105 | struct list_head slist; /* list of sessions registered with v9fs */ |
106 | struct backing_dev_info bdi; | ||
105 | }; | 107 | }; |
106 | 108 | ||
107 | struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, | 109 | struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 491108bd6e0d..806da5d3b3a0 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -77,6 +77,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | |||
77 | sb->s_blocksize = 1 << sb->s_blocksize_bits; | 77 | sb->s_blocksize = 1 << sb->s_blocksize_bits; |
78 | sb->s_magic = V9FS_MAGIC; | 78 | sb->s_magic = V9FS_MAGIC; |
79 | sb->s_op = &v9fs_super_ops; | 79 | sb->s_op = &v9fs_super_ops; |
80 | sb->s_bdi = &v9ses->bdi; | ||
80 | 81 | ||
81 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | | 82 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | |
82 | MS_NOATIME; | 83 | MS_NOATIME; |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index c54dad4e6063..a10f2582844f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/workqueue.h> | 19 | #include <linux/workqueue.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/fscache.h> | 21 | #include <linux/fscache.h> |
22 | #include <linux/backing-dev.h> | ||
22 | 23 | ||
23 | #include "afs.h" | 24 | #include "afs.h" |
24 | #include "afs_vl.h" | 25 | #include "afs_vl.h" |
@@ -313,6 +314,7 @@ struct afs_volume { | |||
313 | unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ | 314 | unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ |
314 | struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ | 315 | struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ |
315 | struct rw_semaphore server_sem; /* lock for accessing current server */ | 316 | struct rw_semaphore server_sem; /* lock for accessing current server */ |
317 | struct backing_dev_info bdi; | ||
316 | }; | 318 | }; |
317 | 319 | ||
318 | /* | 320 | /* |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 5e813a816ce4..b3feddc4f7d6 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -138,9 +138,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
138 | { | 138 | { |
139 | struct afs_super_info *super; | 139 | struct afs_super_info *super; |
140 | struct vfsmount *mnt; | 140 | struct vfsmount *mnt; |
141 | struct page *page = NULL; | 141 | struct page *page; |
142 | size_t size; | 142 | size_t size; |
143 | char *buf, *devname = NULL, *options = NULL; | 143 | char *buf, *devname, *options; |
144 | int ret; | 144 | int ret; |
145 | 145 | ||
146 | _enter("{%s}", mntpt->d_name.name); | 146 | _enter("{%s}", mntpt->d_name.name); |
@@ -150,22 +150,22 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
150 | ret = -EINVAL; | 150 | ret = -EINVAL; |
151 | size = mntpt->d_inode->i_size; | 151 | size = mntpt->d_inode->i_size; |
152 | if (size > PAGE_SIZE - 1) | 152 | if (size > PAGE_SIZE - 1) |
153 | goto error; | 153 | goto error_no_devname; |
154 | 154 | ||
155 | ret = -ENOMEM; | 155 | ret = -ENOMEM; |
156 | devname = (char *) get_zeroed_page(GFP_KERNEL); | 156 | devname = (char *) get_zeroed_page(GFP_KERNEL); |
157 | if (!devname) | 157 | if (!devname) |
158 | goto error; | 158 | goto error_no_devname; |
159 | 159 | ||
160 | options = (char *) get_zeroed_page(GFP_KERNEL); | 160 | options = (char *) get_zeroed_page(GFP_KERNEL); |
161 | if (!options) | 161 | if (!options) |
162 | goto error; | 162 | goto error_no_options; |
163 | 163 | ||
164 | /* read the contents of the AFS special symlink */ | 164 | /* read the contents of the AFS special symlink */ |
165 | page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); | 165 | page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); |
166 | if (IS_ERR(page)) { | 166 | if (IS_ERR(page)) { |
167 | ret = PTR_ERR(page); | 167 | ret = PTR_ERR(page); |
168 | goto error; | 168 | goto error_no_page; |
169 | } | 169 | } |
170 | 170 | ||
171 | ret = -EIO; | 171 | ret = -EIO; |
@@ -196,12 +196,12 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
196 | return mnt; | 196 | return mnt; |
197 | 197 | ||
198 | error: | 198 | error: |
199 | if (page) | 199 | page_cache_release(page); |
200 | page_cache_release(page); | 200 | error_no_page: |
201 | if (devname) | 201 | free_page((unsigned long) options); |
202 | free_page((unsigned long) devname); | 202 | error_no_options: |
203 | if (options) | 203 | free_page((unsigned long) devname); |
204 | free_page((unsigned long) options); | 204 | error_no_devname: |
205 | _leave(" = %d", ret); | 205 | _leave(" = %d", ret); |
206 | return ERR_PTR(ret); | 206 | return ERR_PTR(ret); |
207 | } | 207 | } |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 14f6431598ad..e932e5a3a0c1 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -311,6 +311,7 @@ static int afs_fill_super(struct super_block *sb, void *data) | |||
311 | sb->s_magic = AFS_FS_MAGIC; | 311 | sb->s_magic = AFS_FS_MAGIC; |
312 | sb->s_op = &afs_super_ops; | 312 | sb->s_op = &afs_super_ops; |
313 | sb->s_fs_info = as; | 313 | sb->s_fs_info = as; |
314 | sb->s_bdi = &as->volume->bdi; | ||
314 | 315 | ||
315 | /* allocate the root inode and dentry */ | 316 | /* allocate the root inode and dentry */ |
316 | fid.vid = as->volume->vid; | 317 | fid.vid = as->volume->vid; |
diff --git a/fs/afs/volume.c b/fs/afs/volume.c index a353e69e2391..401eeb21869f 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c | |||
@@ -106,6 +106,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) | |||
106 | volume->cell = params->cell; | 106 | volume->cell = params->cell; |
107 | volume->vid = vlocation->vldb.vid[params->type]; | 107 | volume->vid = vlocation->vldb.vid[params->type]; |
108 | 108 | ||
109 | ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY); | ||
110 | if (ret) | ||
111 | goto error_bdi; | ||
112 | |||
109 | init_rwsem(&volume->server_sem); | 113 | init_rwsem(&volume->server_sem); |
110 | 114 | ||
111 | /* look up all the applicable server records */ | 115 | /* look up all the applicable server records */ |
@@ -151,6 +155,8 @@ error: | |||
151 | return ERR_PTR(ret); | 155 | return ERR_PTR(ret); |
152 | 156 | ||
153 | error_discard: | 157 | error_discard: |
158 | bdi_destroy(&volume->bdi); | ||
159 | error_bdi: | ||
154 | up_write(¶ms->cell->vl_sem); | 160 | up_write(¶ms->cell->vl_sem); |
155 | 161 | ||
156 | for (loop = volume->nservers - 1; loop >= 0; loop--) | 162 | for (loop = volume->nservers - 1; loop >= 0; loop--) |
@@ -200,6 +206,7 @@ void afs_put_volume(struct afs_volume *volume) | |||
200 | for (loop = volume->nservers - 1; loop >= 0; loop--) | 206 | for (loop = volume->nservers - 1; loop >= 0; loop--) |
201 | afs_put_server(volume->servers[loop]); | 207 | afs_put_server(volume->servers[loop]); |
202 | 208 | ||
209 | bdi_destroy(&volume->bdi); | ||
203 | kfree(volume); | 210 | kfree(volume); |
204 | 211 | ||
205 | _leave(" [destroyed]"); | 212 | _leave(" [destroyed]"); |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 109a6c606d92..e8e5e63ac950 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) | |||
177 | } | 177 | } |
178 | /* Trigger mount for path component or follow link */ | 178 | /* Trigger mount for path component or follow link */ |
179 | } else if (ino->flags & AUTOFS_INF_PENDING || | 179 | } else if (ino->flags & AUTOFS_INF_PENDING || |
180 | autofs4_need_mount(flags) || | 180 | autofs4_need_mount(flags)) { |
181 | current->link_count) { | ||
182 | DPRINTK("waiting for mount name=%.*s", | 181 | DPRINTK("waiting for mount name=%.*s", |
183 | dentry->d_name.len, dentry->d_name.name); | 182 | dentry->d_name.len, dentry->d_name.name); |
184 | 183 | ||
@@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
262 | spin_unlock(&dcache_lock); | 261 | spin_unlock(&dcache_lock); |
263 | spin_unlock(&sbi->fs_lock); | 262 | spin_unlock(&sbi->fs_lock); |
264 | 263 | ||
265 | status = try_to_fill_dentry(dentry, 0); | 264 | status = try_to_fill_dentry(dentry, nd->flags); |
266 | if (status) | 265 | if (status) |
267 | goto out_error; | 266 | goto out_error; |
268 | 267 | ||
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 7ab23e006e4c..2c5f9a0e5d72 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -1005,15 +1005,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( | |||
1005 | } | 1005 | } |
1006 | } else if (!mm->start_data) { | 1006 | } else if (!mm->start_data) { |
1007 | mm->start_data = seg->addr; | 1007 | mm->start_data = seg->addr; |
1008 | #ifndef CONFIG_MMU | ||
1009 | mm->end_data = seg->addr + phdr->p_memsz; | 1008 | mm->end_data = seg->addr + phdr->p_memsz; |
1010 | #endif | ||
1011 | } | 1009 | } |
1012 | |||
1013 | #ifdef CONFIG_MMU | ||
1014 | if (seg->addr + phdr->p_memsz > mm->end_data) | ||
1015 | mm->end_data = seg->addr + phdr->p_memsz; | ||
1016 | #endif | ||
1017 | } | 1010 | } |
1018 | 1011 | ||
1019 | seg++; | 1012 | seg++; |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index e0e769bdca59..49566c1687d8 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -355,7 +355,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) | |||
355 | 355 | ||
356 | if (!flat_reloc_valid(r, start_brk - start_data + text_len)) { | 356 | if (!flat_reloc_valid(r, start_brk - start_data + text_len)) { |
357 | printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)", | 357 | printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)", |
358 | (int) r,(int)(start_brk-start_code),(int)text_len); | 358 | (int) r,(int)(start_brk-start_data+text_len),(int)text_len); |
359 | goto failed; | 359 | goto failed; |
360 | } | 360 | } |
361 | 361 | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index 2a6d0193f139..6dcee88c2e5d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -406,16 +406,23 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) | |||
406 | 406 | ||
407 | int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) | 407 | int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) |
408 | { | 408 | { |
409 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); | 409 | struct inode *bd_inode = filp->f_mapping->host; |
410 | struct block_device *bdev = I_BDEV(bd_inode); | ||
410 | int error; | 411 | int error; |
411 | 412 | ||
412 | error = sync_blockdev(bdev); | 413 | /* |
413 | if (error) | 414 | * There is no need to serialise calls to blkdev_issue_flush with |
414 | return error; | 415 | * i_mutex and doing so causes performance issues with concurrent |
415 | 416 | * O_SYNC writers to a block device. | |
417 | */ | ||
418 | mutex_unlock(&bd_inode->i_mutex); | ||
419 | |||
416 | error = blkdev_issue_flush(bdev, NULL); | 420 | error = blkdev_issue_flush(bdev, NULL); |
417 | if (error == -EOPNOTSUPP) | 421 | if (error == -EOPNOTSUPP) |
418 | error = 0; | 422 | error = 0; |
423 | |||
424 | mutex_lock(&bd_inode->i_mutex); | ||
425 | |||
419 | return error; | 426 | return error; |
420 | } | 427 | } |
421 | EXPORT_SYMBOL(blkdev_fsync); | 428 | EXPORT_SYMBOL(blkdev_fsync); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e7b8f2c89ccb..feca04197d02 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -44,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops; | |||
44 | static void end_workqueue_fn(struct btrfs_work *work); | 44 | static void end_workqueue_fn(struct btrfs_work *work); |
45 | static void free_fs_root(struct btrfs_root *root); | 45 | static void free_fs_root(struct btrfs_root *root); |
46 | 46 | ||
47 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); | ||
48 | |||
49 | /* | 47 | /* |
50 | * end_io_wq structs are used to do processing in task context when an IO is | 48 | * end_io_wq structs are used to do processing in task context when an IO is |
51 | * complete. This is used during reads to verify checksums, and it is used | 49 | * complete. This is used during reads to verify checksums, and it is used |
@@ -1375,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1375 | { | 1373 | { |
1376 | int err; | 1374 | int err; |
1377 | 1375 | ||
1378 | bdi->name = "btrfs"; | ||
1379 | bdi->capabilities = BDI_CAP_MAP_COPY; | 1376 | bdi->capabilities = BDI_CAP_MAP_COPY; |
1380 | err = bdi_init(bdi); | 1377 | err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY); |
1381 | if (err) | 1378 | if (err) |
1382 | return err; | 1379 | return err; |
1383 | 1380 | ||
1384 | err = bdi_register(bdi, NULL, "btrfs-%d", | ||
1385 | atomic_inc_return(&btrfs_bdi_num)); | ||
1386 | if (err) { | ||
1387 | bdi_destroy(bdi); | ||
1388 | return err; | ||
1389 | } | ||
1390 | |||
1391 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1381 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1392 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | 1382 | bdi->unplug_io_fn = btrfs_unplug_io_fn; |
1393 | bdi->unplug_io_data = info; | 1383 | bdi->unplug_io_data = info; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e84ef60ffe35..97a97839a867 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -1481,12 +1481,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1481 | ret = -EBADF; | 1481 | ret = -EBADF; |
1482 | goto out_drop_write; | 1482 | goto out_drop_write; |
1483 | } | 1483 | } |
1484 | |||
1484 | src = src_file->f_dentry->d_inode; | 1485 | src = src_file->f_dentry->d_inode; |
1485 | 1486 | ||
1486 | ret = -EINVAL; | 1487 | ret = -EINVAL; |
1487 | if (src == inode) | 1488 | if (src == inode) |
1488 | goto out_fput; | 1489 | goto out_fput; |
1489 | 1490 | ||
1491 | /* the src must be open for reading */ | ||
1492 | if (!(src_file->f_mode & FMODE_READ)) | ||
1493 | goto out_fput; | ||
1494 | |||
1490 | ret = -EISDIR; | 1495 | ret = -EISDIR; |
1491 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) | 1496 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) |
1492 | goto out_fput; | 1497 | goto out_fput; |
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index f7c255f9c624..a8cd821226da 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h | |||
@@ -34,6 +34,7 @@ struct cachefiles_object { | |||
34 | loff_t i_size; /* object size */ | 34 | loff_t i_size; /* object size */ |
35 | unsigned long flags; | 35 | unsigned long flags; |
36 | #define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ | 36 | #define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ |
37 | #define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */ | ||
37 | atomic_t usage; /* object usage count */ | 38 | atomic_t usage; /* object usage count */ |
38 | uint8_t type; /* object type */ | 39 | uint8_t type; /* object type */ |
39 | uint8_t new; /* T if object new */ | 40 | uint8_t new; /* T if object new */ |
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index d5db84a1ee0d..f4a7840bf42c 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c | |||
@@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object, | |||
93 | } | 93 | } |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * mark the owner of a dentry, if there is one, to indicate that that dentry | ||
97 | * has been preemptively deleted | ||
98 | * - the caller must hold the i_mutex on the dentry's parent as required to | ||
99 | * call vfs_unlink(), vfs_rmdir() or vfs_rename() | ||
100 | */ | ||
101 | static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, | ||
102 | struct dentry *dentry) | ||
103 | { | ||
104 | struct cachefiles_object *object; | ||
105 | struct rb_node *p; | ||
106 | |||
107 | _enter(",'%*.*s'", | ||
108 | dentry->d_name.len, dentry->d_name.len, dentry->d_name.name); | ||
109 | |||
110 | write_lock(&cache->active_lock); | ||
111 | |||
112 | p = cache->active_nodes.rb_node; | ||
113 | while (p) { | ||
114 | object = rb_entry(p, struct cachefiles_object, active_node); | ||
115 | if (object->dentry > dentry) | ||
116 | p = p->rb_left; | ||
117 | else if (object->dentry < dentry) | ||
118 | p = p->rb_right; | ||
119 | else | ||
120 | goto found_dentry; | ||
121 | } | ||
122 | |||
123 | write_unlock(&cache->active_lock); | ||
124 | _leave(" [no owner]"); | ||
125 | return; | ||
126 | |||
127 | /* found the dentry for */ | ||
128 | found_dentry: | ||
129 | kdebug("preemptive burial: OBJ%x [%s] %p", | ||
130 | object->fscache.debug_id, | ||
131 | fscache_object_states[object->fscache.state], | ||
132 | dentry); | ||
133 | |||
134 | if (object->fscache.state < FSCACHE_OBJECT_DYING) { | ||
135 | printk(KERN_ERR "\n"); | ||
136 | printk(KERN_ERR "CacheFiles: Error:" | ||
137 | " Can't preemptively bury live object\n"); | ||
138 | cachefiles_printk_object(object, NULL); | ||
139 | } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { | ||
140 | printk(KERN_ERR "CacheFiles: Error:" | ||
141 | " Object already preemptively buried\n"); | ||
142 | } | ||
143 | |||
144 | write_unlock(&cache->active_lock); | ||
145 | _leave(" [owner marked]"); | ||
146 | } | ||
147 | |||
148 | /* | ||
96 | * record the fact that an object is now active | 149 | * record the fact that an object is now active |
97 | */ | 150 | */ |
98 | static int cachefiles_mark_object_active(struct cachefiles_cache *cache, | 151 | static int cachefiles_mark_object_active(struct cachefiles_cache *cache, |
@@ -219,7 +272,8 @@ requeue: | |||
219 | */ | 272 | */ |
220 | static int cachefiles_bury_object(struct cachefiles_cache *cache, | 273 | static int cachefiles_bury_object(struct cachefiles_cache *cache, |
221 | struct dentry *dir, | 274 | struct dentry *dir, |
222 | struct dentry *rep) | 275 | struct dentry *rep, |
276 | bool preemptive) | ||
223 | { | 277 | { |
224 | struct dentry *grave, *trap; | 278 | struct dentry *grave, *trap; |
225 | char nbuffer[8 + 8 + 1]; | 279 | char nbuffer[8 + 8 + 1]; |
@@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, | |||
229 | dir->d_name.len, dir->d_name.len, dir->d_name.name, | 283 | dir->d_name.len, dir->d_name.len, dir->d_name.name, |
230 | rep->d_name.len, rep->d_name.len, rep->d_name.name); | 284 | rep->d_name.len, rep->d_name.len, rep->d_name.name); |
231 | 285 | ||
286 | _debug("remove %p from %p", rep, dir); | ||
287 | |||
232 | /* non-directories can just be unlinked */ | 288 | /* non-directories can just be unlinked */ |
233 | if (!S_ISDIR(rep->d_inode->i_mode)) { | 289 | if (!S_ISDIR(rep->d_inode->i_mode)) { |
234 | _debug("unlink stale object"); | 290 | _debug("unlink stale object"); |
235 | ret = vfs_unlink(dir->d_inode, rep); | 291 | ret = vfs_unlink(dir->d_inode, rep); |
236 | 292 | ||
293 | if (preemptive) | ||
294 | cachefiles_mark_object_buried(cache, rep); | ||
295 | |||
237 | mutex_unlock(&dir->d_inode->i_mutex); | 296 | mutex_unlock(&dir->d_inode->i_mutex); |
238 | 297 | ||
239 | if (ret == -EIO) | 298 | if (ret == -EIO) |
@@ -325,6 +384,9 @@ try_again: | |||
325 | if (ret != 0 && ret != -ENOMEM) | 384 | if (ret != 0 && ret != -ENOMEM) |
326 | cachefiles_io_error(cache, "Rename failed with error %d", ret); | 385 | cachefiles_io_error(cache, "Rename failed with error %d", ret); |
327 | 386 | ||
387 | if (preemptive) | ||
388 | cachefiles_mark_object_buried(cache, rep); | ||
389 | |||
328 | unlock_rename(cache->graveyard, dir); | 390 | unlock_rename(cache->graveyard, dir); |
329 | dput(grave); | 391 | dput(grave); |
330 | _leave(" = 0"); | 392 | _leave(" = 0"); |
@@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, | |||
340 | struct dentry *dir; | 402 | struct dentry *dir; |
341 | int ret; | 403 | int ret; |
342 | 404 | ||
343 | _enter(",{%p}", object->dentry); | 405 | _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry); |
344 | 406 | ||
345 | ASSERT(object->dentry); | 407 | ASSERT(object->dentry); |
346 | ASSERT(object->dentry->d_inode); | 408 | ASSERT(object->dentry->d_inode); |
@@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, | |||
350 | 412 | ||
351 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); | 413 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); |
352 | 414 | ||
353 | /* we need to check that our parent is _still_ our parent - it may have | 415 | if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { |
354 | * been renamed */ | 416 | /* object allocation for the same key preemptively deleted this |
355 | if (dir == object->dentry->d_parent) { | 417 | * object's file so that it could create its own file */ |
356 | ret = cachefiles_bury_object(cache, dir, object->dentry); | 418 | _debug("object preemptively buried"); |
357 | } else { | ||
358 | /* it got moved, presumably by cachefilesd culling it, so it's | ||
359 | * no longer in the key path and we can ignore it */ | ||
360 | mutex_unlock(&dir->d_inode->i_mutex); | 419 | mutex_unlock(&dir->d_inode->i_mutex); |
361 | ret = 0; | 420 | ret = 0; |
421 | } else { | ||
422 | /* we need to check that our parent is _still_ our parent - it | ||
423 | * may have been renamed */ | ||
424 | if (dir == object->dentry->d_parent) { | ||
425 | ret = cachefiles_bury_object(cache, dir, | ||
426 | object->dentry, false); | ||
427 | } else { | ||
428 | /* it got moved, presumably by cachefilesd culling it, | ||
429 | * so it's no longer in the key path and we can ignore | ||
430 | * it */ | ||
431 | mutex_unlock(&dir->d_inode->i_mutex); | ||
432 | ret = 0; | ||
433 | } | ||
362 | } | 434 | } |
363 | 435 | ||
364 | dput(dir); | 436 | dput(dir); |
@@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, | |||
381 | const char *name; | 453 | const char *name; |
382 | int ret, nlen; | 454 | int ret, nlen; |
383 | 455 | ||
384 | _enter("{%p},,%s,", parent->dentry, key); | 456 | _enter("OBJ%x{%p},OBJ%x,%s,", |
457 | parent->fscache.debug_id, parent->dentry, | ||
458 | object->fscache.debug_id, key); | ||
385 | 459 | ||
386 | cache = container_of(parent->fscache.cache, | 460 | cache = container_of(parent->fscache.cache, |
387 | struct cachefiles_cache, cache); | 461 | struct cachefiles_cache, cache); |
@@ -509,7 +583,7 @@ lookup_again: | |||
509 | * mutex) */ | 583 | * mutex) */ |
510 | object->dentry = NULL; | 584 | object->dentry = NULL; |
511 | 585 | ||
512 | ret = cachefiles_bury_object(cache, dir, next); | 586 | ret = cachefiles_bury_object(cache, dir, next, true); |
513 | dput(next); | 587 | dput(next); |
514 | next = NULL; | 588 | next = NULL; |
515 | 589 | ||
@@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, | |||
828 | /* actually remove the victim (drops the dir mutex) */ | 902 | /* actually remove the victim (drops the dir mutex) */ |
829 | _debug("bury"); | 903 | _debug("bury"); |
830 | 904 | ||
831 | ret = cachefiles_bury_object(cache, dir, victim); | 905 | ret = cachefiles_bury_object(cache, dir, victim, false); |
832 | if (ret < 0) | 906 | if (ret < 0) |
833 | goto error; | 907 | goto error; |
834 | 908 | ||
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c index b5808cdb2232..039b5011d83b 100644 --- a/fs/cachefiles/security.c +++ b/fs/cachefiles/security.c | |||
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, | |||
77 | /* | 77 | /* |
78 | * check the security details of the on-disk cache | 78 | * check the security details of the on-disk cache |
79 | * - must be called with security override in force | 79 | * - must be called with security override in force |
80 | * - must return with a security override in force - even in the case of an | ||
81 | * error | ||
80 | */ | 82 | */ |
81 | int cachefiles_determine_cache_security(struct cachefiles_cache *cache, | 83 | int cachefiles_determine_cache_security(struct cachefiles_cache *cache, |
82 | struct dentry *root, | 84 | struct dentry *root, |
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache, | |||
99 | * which create files */ | 101 | * which create files */ |
100 | ret = set_create_files_as(new, root->d_inode); | 102 | ret = set_create_files_as(new, root->d_inode); |
101 | if (ret < 0) { | 103 | if (ret < 0) { |
104 | abort_creds(new); | ||
105 | cachefiles_begin_secure(cache, _saved_cred); | ||
102 | _leave(" = %d [cfa]", ret); | 106 | _leave(" = %d [cfa]", ret); |
103 | return ret; | 107 | return ret; |
104 | } | 108 | } |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index aa3cd7cc3e40..a9005d862ed4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -337,16 +337,15 @@ out: | |||
337 | /* | 337 | /* |
338 | * Get ref for the oldest snapc for an inode with dirty data... that is, the | 338 | * Get ref for the oldest snapc for an inode with dirty data... that is, the |
339 | * only snap context we are allowed to write back. | 339 | * only snap context we are allowed to write back. |
340 | * | ||
341 | * Caller holds i_lock. | ||
342 | */ | 340 | */ |
343 | static struct ceph_snap_context *__get_oldest_context(struct inode *inode, | 341 | static struct ceph_snap_context *get_oldest_context(struct inode *inode, |
344 | u64 *snap_size) | 342 | u64 *snap_size) |
345 | { | 343 | { |
346 | struct ceph_inode_info *ci = ceph_inode(inode); | 344 | struct ceph_inode_info *ci = ceph_inode(inode); |
347 | struct ceph_snap_context *snapc = NULL; | 345 | struct ceph_snap_context *snapc = NULL; |
348 | struct ceph_cap_snap *capsnap = NULL; | 346 | struct ceph_cap_snap *capsnap = NULL; |
349 | 347 | ||
348 | spin_lock(&inode->i_lock); | ||
350 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { | 349 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { |
351 | dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, | 350 | dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, |
352 | capsnap->context, capsnap->dirty_pages); | 351 | capsnap->context, capsnap->dirty_pages); |
@@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode, | |||
357 | break; | 356 | break; |
358 | } | 357 | } |
359 | } | 358 | } |
360 | if (!snapc && ci->i_snap_realm) { | 359 | if (!snapc && ci->i_head_snapc) { |
361 | snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); | 360 | snapc = ceph_get_snap_context(ci->i_head_snapc); |
362 | dout(" head snapc %p has %d dirty pages\n", | 361 | dout(" head snapc %p has %d dirty pages\n", |
363 | snapc, ci->i_wrbuffer_ref_head); | 362 | snapc, ci->i_wrbuffer_ref_head); |
364 | } | 363 | } |
365 | return snapc; | ||
366 | } | ||
367 | |||
368 | static struct ceph_snap_context *get_oldest_context(struct inode *inode, | ||
369 | u64 *snap_size) | ||
370 | { | ||
371 | struct ceph_snap_context *snapc = NULL; | ||
372 | |||
373 | spin_lock(&inode->i_lock); | ||
374 | snapc = __get_oldest_context(inode, snap_size); | ||
375 | spin_unlock(&inode->i_lock); | 364 | spin_unlock(&inode->i_lock); |
376 | return snapc; | 365 | return snapc; |
377 | } | 366 | } |
@@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
392 | int len = PAGE_CACHE_SIZE; | 381 | int len = PAGE_CACHE_SIZE; |
393 | loff_t i_size; | 382 | loff_t i_size; |
394 | int err = 0; | 383 | int err = 0; |
395 | struct ceph_snap_context *snapc; | 384 | struct ceph_snap_context *snapc, *oldest; |
396 | u64 snap_size = 0; | 385 | u64 snap_size = 0; |
397 | long writeback_stat; | 386 | long writeback_stat; |
398 | 387 | ||
@@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
413 | dout("writepage %p page %p not dirty?\n", inode, page); | 402 | dout("writepage %p page %p not dirty?\n", inode, page); |
414 | goto out; | 403 | goto out; |
415 | } | 404 | } |
416 | if (snapc != get_oldest_context(inode, &snap_size)) { | 405 | oldest = get_oldest_context(inode, &snap_size); |
406 | if (snapc->seq > oldest->seq) { | ||
417 | dout("writepage %p page %p snapc %p not writeable - noop\n", | 407 | dout("writepage %p page %p snapc %p not writeable - noop\n", |
418 | inode, page, (void *)page->private); | 408 | inode, page, (void *)page->private); |
419 | /* we should only noop if called by kswapd */ | 409 | /* we should only noop if called by kswapd */ |
420 | WARN_ON((current->flags & PF_MEMALLOC) == 0); | 410 | WARN_ON((current->flags & PF_MEMALLOC) == 0); |
411 | ceph_put_snap_context(oldest); | ||
421 | goto out; | 412 | goto out; |
422 | } | 413 | } |
414 | ceph_put_snap_context(oldest); | ||
423 | 415 | ||
424 | /* is this a partial page at end of file? */ | 416 | /* is this a partial page at end of file? */ |
425 | if (snap_size) | 417 | if (snap_size) |
@@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
458 | ClearPagePrivate(page); | 450 | ClearPagePrivate(page); |
459 | end_page_writeback(page); | 451 | end_page_writeback(page); |
460 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); | 452 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); |
461 | ceph_put_snap_context(snapc); | 453 | ceph_put_snap_context(snapc); /* page's reference */ |
462 | out: | 454 | out: |
463 | return err; | 455 | return err; |
464 | } | 456 | } |
@@ -512,12 +504,11 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
512 | int i; | 504 | int i; |
513 | struct ceph_snap_context *snapc = req->r_snapc; | 505 | struct ceph_snap_context *snapc = req->r_snapc; |
514 | struct address_space *mapping = inode->i_mapping; | 506 | struct address_space *mapping = inode->i_mapping; |
515 | struct writeback_control *wbc = req->r_wbc; | ||
516 | __s32 rc = -EIO; | 507 | __s32 rc = -EIO; |
517 | u64 bytes = 0; | 508 | u64 bytes = 0; |
518 | struct ceph_client *client = ceph_inode_to_client(inode); | 509 | struct ceph_client *client = ceph_inode_to_client(inode); |
519 | long writeback_stat; | 510 | long writeback_stat; |
520 | unsigned issued = __ceph_caps_issued(ci, NULL); | 511 | unsigned issued = ceph_caps_issued(ci); |
521 | 512 | ||
522 | /* parse reply */ | 513 | /* parse reply */ |
523 | replyhead = msg->front.iov_base; | 514 | replyhead = msg->front.iov_base; |
@@ -554,13 +545,9 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
554 | clear_bdi_congested(&client->backing_dev_info, | 545 | clear_bdi_congested(&client->backing_dev_info, |
555 | BLK_RW_ASYNC); | 546 | BLK_RW_ASYNC); |
556 | 547 | ||
557 | if (i >= wrote) { | 548 | ceph_put_snap_context((void *)page->private); |
558 | dout("inode %p skipping page %p\n", inode, page); | ||
559 | wbc->pages_skipped++; | ||
560 | } | ||
561 | page->private = 0; | 549 | page->private = 0; |
562 | ClearPagePrivate(page); | 550 | ClearPagePrivate(page); |
563 | ceph_put_snap_context(snapc); | ||
564 | dout("unlocking %d %p\n", i, page); | 551 | dout("unlocking %d %p\n", i, page); |
565 | end_page_writeback(page); | 552 | end_page_writeback(page); |
566 | 553 | ||
@@ -618,7 +605,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
618 | int range_whole = 0; | 605 | int range_whole = 0; |
619 | int should_loop = 1; | 606 | int should_loop = 1; |
620 | pgoff_t max_pages = 0, max_pages_ever = 0; | 607 | pgoff_t max_pages = 0, max_pages_ever = 0; |
621 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; | 608 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; |
622 | struct pagevec pvec; | 609 | struct pagevec pvec; |
623 | int done = 0; | 610 | int done = 0; |
624 | int rc = 0; | 611 | int rc = 0; |
@@ -770,9 +757,10 @@ get_more_pages: | |||
770 | } | 757 | } |
771 | 758 | ||
772 | /* only if matching snap context */ | 759 | /* only if matching snap context */ |
773 | if (snapc != (void *)page->private) { | 760 | pgsnapc = (void *)page->private; |
774 | dout("page snapc %p != oldest %p\n", | 761 | if (pgsnapc->seq > snapc->seq) { |
775 | (void *)page->private, snapc); | 762 | dout("page snapc %p %lld > oldest %p %lld\n", |
763 | pgsnapc, pgsnapc->seq, snapc, snapc->seq); | ||
776 | unlock_page(page); | 764 | unlock_page(page); |
777 | if (!locked_pages) | 765 | if (!locked_pages) |
778 | continue; /* keep looking for snap */ | 766 | continue; /* keep looking for snap */ |
@@ -806,7 +794,6 @@ get_more_pages: | |||
806 | alloc_page_vec(client, req); | 794 | alloc_page_vec(client, req); |
807 | req->r_callback = writepages_finish; | 795 | req->r_callback = writepages_finish; |
808 | req->r_inode = inode; | 796 | req->r_inode = inode; |
809 | req->r_wbc = wbc; | ||
810 | } | 797 | } |
811 | 798 | ||
812 | /* note position of first page in pvec */ | 799 | /* note position of first page in pvec */ |
@@ -914,7 +901,10 @@ static int context_is_writeable_or_written(struct inode *inode, | |||
914 | struct ceph_snap_context *snapc) | 901 | struct ceph_snap_context *snapc) |
915 | { | 902 | { |
916 | struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); | 903 | struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); |
917 | return !oldest || snapc->seq <= oldest->seq; | 904 | int ret = !oldest || snapc->seq <= oldest->seq; |
905 | |||
906 | ceph_put_snap_context(oldest); | ||
907 | return ret; | ||
918 | } | 908 | } |
919 | 909 | ||
920 | /* | 910 | /* |
@@ -936,8 +926,8 @@ static int ceph_update_writeable_page(struct file *file, | |||
936 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 926 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
937 | int end_in_page = pos_in_page + len; | 927 | int end_in_page = pos_in_page + len; |
938 | loff_t i_size; | 928 | loff_t i_size; |
939 | struct ceph_snap_context *snapc; | ||
940 | int r; | 929 | int r; |
930 | struct ceph_snap_context *snapc, *oldest; | ||
941 | 931 | ||
942 | retry_locked: | 932 | retry_locked: |
943 | /* writepages currently holds page lock, but if we change that later, */ | 933 | /* writepages currently holds page lock, but if we change that later, */ |
@@ -947,23 +937,24 @@ retry_locked: | |||
947 | BUG_ON(!ci->i_snap_realm); | 937 | BUG_ON(!ci->i_snap_realm); |
948 | down_read(&mdsc->snap_rwsem); | 938 | down_read(&mdsc->snap_rwsem); |
949 | BUG_ON(!ci->i_snap_realm->cached_context); | 939 | BUG_ON(!ci->i_snap_realm->cached_context); |
950 | if (page->private && | 940 | snapc = (void *)page->private; |
951 | (void *)page->private != ci->i_snap_realm->cached_context) { | 941 | if (snapc && snapc != ci->i_head_snapc) { |
952 | /* | 942 | /* |
953 | * this page is already dirty in another (older) snap | 943 | * this page is already dirty in another (older) snap |
954 | * context! is it writeable now? | 944 | * context! is it writeable now? |
955 | */ | 945 | */ |
956 | snapc = get_oldest_context(inode, NULL); | 946 | oldest = get_oldest_context(inode, NULL); |
957 | up_read(&mdsc->snap_rwsem); | 947 | up_read(&mdsc->snap_rwsem); |
958 | 948 | ||
959 | if (snapc != (void *)page->private) { | 949 | if (snapc->seq > oldest->seq) { |
950 | ceph_put_snap_context(oldest); | ||
960 | dout(" page %p snapc %p not current or oldest\n", | 951 | dout(" page %p snapc %p not current or oldest\n", |
961 | page, (void *)page->private); | 952 | page, snapc); |
962 | /* | 953 | /* |
963 | * queue for writeback, and wait for snapc to | 954 | * queue for writeback, and wait for snapc to |
964 | * be writeable or written | 955 | * be writeable or written |
965 | */ | 956 | */ |
966 | snapc = ceph_get_snap_context((void *)page->private); | 957 | snapc = ceph_get_snap_context(snapc); |
967 | unlock_page(page); | 958 | unlock_page(page); |
968 | ceph_queue_writeback(inode); | 959 | ceph_queue_writeback(inode); |
969 | r = wait_event_interruptible(ci->i_cap_wq, | 960 | r = wait_event_interruptible(ci->i_cap_wq, |
@@ -973,6 +964,7 @@ retry_locked: | |||
973 | return r; | 964 | return r; |
974 | return -EAGAIN; | 965 | return -EAGAIN; |
975 | } | 966 | } |
967 | ceph_put_snap_context(oldest); | ||
976 | 968 | ||
977 | /* yay, writeable, do it now (without dropping page lock) */ | 969 | /* yay, writeable, do it now (without dropping page lock) */ |
978 | dout(" page %p snapc %p not current, but oldest\n", | 970 | dout(" page %p snapc %p not current, but oldest\n", |
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index f6394b94b866..818afe72e6c7 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/slab.h> | ||
6 | 7 | ||
7 | #include "types.h" | 8 | #include "types.h" |
8 | #include "auth_none.h" | 9 | #include "auth_none.h" |
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h index 56c05533a31c..8164df1a08be 100644 --- a/fs/ceph/auth_none.h +++ b/fs/ceph/auth_none.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _FS_CEPH_AUTH_NONE_H | 1 | #ifndef _FS_CEPH_AUTH_NONE_H |
2 | #define _FS_CEPH_AUTH_NONE_H | 2 | #define _FS_CEPH_AUTH_NONE_H |
3 | 3 | ||
4 | #include <linux/slab.h> | ||
5 | |||
4 | #include "auth.h" | 6 | #include "auth.h" |
5 | 7 | ||
6 | /* | 8 | /* |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index d9001a4dc8cc..fee5a08da881 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -12,8 +12,6 @@ | |||
12 | #include "auth.h" | 12 | #include "auth.h" |
13 | #include "decode.h" | 13 | #include "decode.h" |
14 | 14 | ||
15 | struct kmem_cache *ceph_x_ticketbuf_cachep; | ||
16 | |||
17 | #define TEMP_TICKET_BUF_LEN 256 | 15 | #define TEMP_TICKET_BUF_LEN 256 |
18 | 16 | ||
19 | static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); | 17 | static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); |
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
131 | char *ticket_buf; | 129 | char *ticket_buf; |
132 | u8 struct_v; | 130 | u8 struct_v; |
133 | 131 | ||
134 | dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC); | 132 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); |
135 | if (!dbuf) | 133 | if (!dbuf) |
136 | return -ENOMEM; | 134 | return -ENOMEM; |
137 | 135 | ||
138 | ret = -ENOMEM; | 136 | ret = -ENOMEM; |
139 | ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, | 137 | ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); |
140 | GFP_NOFS | GFP_ATOMIC); | ||
141 | if (!ticket_buf) | 138 | if (!ticket_buf) |
142 | goto out_dbuf; | 139 | goto out_dbuf; |
143 | 140 | ||
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
251 | 248 | ||
252 | ret = 0; | 249 | ret = 0; |
253 | out: | 250 | out: |
254 | kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf); | 251 | kfree(ticket_buf); |
255 | out_dbuf: | 252 | out_dbuf: |
256 | kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf); | 253 | kfree(dbuf); |
257 | return ret; | 254 | return ret; |
258 | 255 | ||
259 | bad: | 256 | bad: |
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac) | |||
605 | remove_ticket_handler(ac, th); | 602 | remove_ticket_handler(ac, th); |
606 | } | 603 | } |
607 | 604 | ||
608 | kmem_cache_destroy(ceph_x_ticketbuf_cachep); | ||
609 | |||
610 | kfree(ac->private); | 605 | kfree(ac->private); |
611 | ac->private = NULL; | 606 | ac->private = NULL; |
612 | } | 607 | } |
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac) | |||
641 | int ret; | 636 | int ret; |
642 | 637 | ||
643 | dout("ceph_x_init %p\n", ac); | 638 | dout("ceph_x_init %p\n", ac); |
639 | ret = -ENOMEM; | ||
644 | xi = kzalloc(sizeof(*xi), GFP_NOFS); | 640 | xi = kzalloc(sizeof(*xi), GFP_NOFS); |
645 | if (!xi) | 641 | if (!xi) |
646 | return -ENOMEM; | 642 | goto out; |
647 | 643 | ||
648 | ret = -ENOMEM; | ||
649 | ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf", | ||
650 | TEMP_TICKET_BUF_LEN, 8, | ||
651 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), | ||
652 | NULL); | ||
653 | if (!ceph_x_ticketbuf_cachep) | ||
654 | goto done_nomem; | ||
655 | ret = -EINVAL; | 644 | ret = -EINVAL; |
656 | if (!ac->secret) { | 645 | if (!ac->secret) { |
657 | pr_err("no secret set (for auth_x protocol)\n"); | 646 | pr_err("no secret set (for auth_x protocol)\n"); |
658 | goto done_nomem; | 647 | goto out_nomem; |
659 | } | 648 | } |
660 | 649 | ||
661 | ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); | 650 | ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); |
662 | if (ret) | 651 | if (ret) |
663 | goto done_nomem; | 652 | goto out_nomem; |
664 | 653 | ||
665 | xi->starting = true; | 654 | xi->starting = true; |
666 | xi->ticket_handlers = RB_ROOT; | 655 | xi->ticket_handlers = RB_ROOT; |
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac) | |||
670 | ac->ops = &ceph_x_ops; | 659 | ac->ops = &ceph_x_ops; |
671 | return 0; | 660 | return 0; |
672 | 661 | ||
673 | done_nomem: | 662 | out_nomem: |
674 | kfree(xi); | 663 | kfree(xi); |
675 | if (ceph_x_ticketbuf_cachep) | 664 | out: |
676 | kmem_cache_destroy(ceph_x_ticketbuf_cachep); | ||
677 | return ret; | 665 | return ret; |
678 | } | 666 | } |
679 | 667 | ||
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 3710e077a857..d9400534b279 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) | |||
858 | } | 858 | } |
859 | 859 | ||
860 | /* | 860 | /* |
861 | * Remove a cap. Take steps to deal with a racing iterate_session_caps. | ||
862 | * | ||
861 | * caller should hold i_lock. | 863 | * caller should hold i_lock. |
862 | * caller will not hold session s_mutex if called from destroy_inode. | 864 | * caller will not hold session s_mutex if called from destroy_inode. |
863 | */ | 865 | */ |
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
866 | struct ceph_mds_session *session = cap->session; | 868 | struct ceph_mds_session *session = cap->session; |
867 | struct ceph_inode_info *ci = cap->ci; | 869 | struct ceph_inode_info *ci = cap->ci; |
868 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 870 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; |
871 | int removed = 0; | ||
869 | 872 | ||
870 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 873 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
871 | 874 | ||
872 | /* remove from inode list */ | ||
873 | rb_erase(&cap->ci_node, &ci->i_caps); | ||
874 | cap->ci = NULL; | ||
875 | if (ci->i_auth_cap == cap) | ||
876 | ci->i_auth_cap = NULL; | ||
877 | |||
878 | /* remove from session list */ | 875 | /* remove from session list */ |
879 | spin_lock(&session->s_cap_lock); | 876 | spin_lock(&session->s_cap_lock); |
880 | if (session->s_cap_iterator == cap) { | 877 | if (session->s_cap_iterator == cap) { |
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
885 | list_del_init(&cap->session_caps); | 882 | list_del_init(&cap->session_caps); |
886 | session->s_nr_caps--; | 883 | session->s_nr_caps--; |
887 | cap->session = NULL; | 884 | cap->session = NULL; |
885 | removed = 1; | ||
888 | } | 886 | } |
887 | /* protect backpointer with s_cap_lock: see iterate_session_caps */ | ||
888 | cap->ci = NULL; | ||
889 | spin_unlock(&session->s_cap_lock); | 889 | spin_unlock(&session->s_cap_lock); |
890 | 890 | ||
891 | if (cap->session == NULL) | 891 | /* remove from inode list */ |
892 | rb_erase(&cap->ci_node, &ci->i_caps); | ||
893 | if (ci->i_auth_cap == cap) | ||
894 | ci->i_auth_cap = NULL; | ||
895 | |||
896 | if (removed) | ||
892 | ceph_put_cap(cap); | 897 | ceph_put_cap(cap); |
893 | 898 | ||
894 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { | 899 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { |
@@ -1205,6 +1210,12 @@ retry: | |||
1205 | if (capsnap->dirty_pages || capsnap->writing) | 1210 | if (capsnap->dirty_pages || capsnap->writing) |
1206 | continue; | 1211 | continue; |
1207 | 1212 | ||
1213 | /* | ||
1214 | * if cap writeback already occurred, we should have dropped | ||
1215 | * the capsnap in ceph_put_wrbuffer_cap_refs. | ||
1216 | */ | ||
1217 | BUG_ON(capsnap->dirty == 0); | ||
1218 | |||
1208 | /* pick mds, take s_mutex */ | 1219 | /* pick mds, take s_mutex */ |
1209 | mds = __ceph_get_cap_mds(ci, &mseq); | 1220 | mds = __ceph_get_cap_mds(ci, &mseq); |
1210 | if (session && session->s_mds != mds) { | 1221 | if (session && session->s_mds != mds) { |
@@ -1855,8 +1866,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, | |||
1855 | } else { | 1866 | } else { |
1856 | pr_err("%p auth cap %p not mds%d ???\n", inode, | 1867 | pr_err("%p auth cap %p not mds%d ???\n", inode, |
1857 | cap, session->s_mds); | 1868 | cap, session->s_mds); |
1858 | spin_unlock(&inode->i_lock); | ||
1859 | } | 1869 | } |
1870 | spin_unlock(&inode->i_lock); | ||
1860 | } | 1871 | } |
1861 | } | 1872 | } |
1862 | 1873 | ||
@@ -2118,8 +2129,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) | |||
2118 | } | 2129 | } |
2119 | spin_unlock(&inode->i_lock); | 2130 | spin_unlock(&inode->i_lock); |
2120 | 2131 | ||
2121 | dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had), | 2132 | dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), |
2122 | last ? "last" : ""); | 2133 | last ? " last" : "", put ? " put" : ""); |
2123 | 2134 | ||
2124 | if (last && !flushsnaps) | 2135 | if (last && !flushsnaps) |
2125 | ceph_check_caps(ci, 0, NULL); | 2136 | ceph_check_caps(ci, 0, NULL); |
@@ -2143,7 +2154,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2143 | { | 2154 | { |
2144 | struct inode *inode = &ci->vfs_inode; | 2155 | struct inode *inode = &ci->vfs_inode; |
2145 | int last = 0; | 2156 | int last = 0; |
2146 | int last_snap = 0; | 2157 | int complete_capsnap = 0; |
2158 | int drop_capsnap = 0; | ||
2147 | int found = 0; | 2159 | int found = 0; |
2148 | struct ceph_cap_snap *capsnap = NULL; | 2160 | struct ceph_cap_snap *capsnap = NULL; |
2149 | 2161 | ||
@@ -2166,19 +2178,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2166 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { | 2178 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { |
2167 | if (capsnap->context == snapc) { | 2179 | if (capsnap->context == snapc) { |
2168 | found = 1; | 2180 | found = 1; |
2169 | capsnap->dirty_pages -= nr; | ||
2170 | last_snap = !capsnap->dirty_pages; | ||
2171 | break; | 2181 | break; |
2172 | } | 2182 | } |
2173 | } | 2183 | } |
2174 | BUG_ON(!found); | 2184 | BUG_ON(!found); |
2185 | capsnap->dirty_pages -= nr; | ||
2186 | if (capsnap->dirty_pages == 0) { | ||
2187 | complete_capsnap = 1; | ||
2188 | if (capsnap->dirty == 0) | ||
2189 | /* cap writeback completed before we created | ||
2190 | * the cap_snap; no FLUSHSNAP is needed */ | ||
2191 | drop_capsnap = 1; | ||
2192 | } | ||
2175 | dout("put_wrbuffer_cap_refs on %p cap_snap %p " | 2193 | dout("put_wrbuffer_cap_refs on %p cap_snap %p " |
2176 | " snap %lld %d/%d -> %d/%d %s%s\n", | 2194 | " snap %lld %d/%d -> %d/%d %s%s%s\n", |
2177 | inode, capsnap, capsnap->context->seq, | 2195 | inode, capsnap, capsnap->context->seq, |
2178 | ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, | 2196 | ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, |
2179 | ci->i_wrbuffer_ref, capsnap->dirty_pages, | 2197 | ci->i_wrbuffer_ref, capsnap->dirty_pages, |
2180 | last ? " (wrbuffer last)" : "", | 2198 | last ? " (wrbuffer last)" : "", |
2181 | last_snap ? " (capsnap last)" : ""); | 2199 | complete_capsnap ? " (complete capsnap)" : "", |
2200 | drop_capsnap ? " (drop capsnap)" : ""); | ||
2201 | if (drop_capsnap) { | ||
2202 | ceph_put_snap_context(capsnap->context); | ||
2203 | list_del(&capsnap->ci_item); | ||
2204 | list_del(&capsnap->flushing_item); | ||
2205 | ceph_put_cap_snap(capsnap); | ||
2206 | } | ||
2182 | } | 2207 | } |
2183 | 2208 | ||
2184 | spin_unlock(&inode->i_lock); | 2209 | spin_unlock(&inode->i_lock); |
@@ -2186,10 +2211,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2186 | if (last) { | 2211 | if (last) { |
2187 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); | 2212 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); |
2188 | iput(inode); | 2213 | iput(inode); |
2189 | } else if (last_snap) { | 2214 | } else if (complete_capsnap) { |
2190 | ceph_flush_snaps(ci); | 2215 | ceph_flush_snaps(ci); |
2191 | wake_up(&ci->i_cap_wq); | 2216 | wake_up(&ci->i_cap_wq); |
2192 | } | 2217 | } |
2218 | if (drop_capsnap) | ||
2219 | iput(inode); | ||
2193 | } | 2220 | } |
2194 | 2221 | ||
2195 | /* | 2222 | /* |
@@ -2465,8 +2492,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, | |||
2465 | break; | 2492 | break; |
2466 | } | 2493 | } |
2467 | WARN_ON(capsnap->dirty_pages || capsnap->writing); | 2494 | WARN_ON(capsnap->dirty_pages || capsnap->writing); |
2468 | dout(" removing cap_snap %p follows %lld\n", | 2495 | dout(" removing %p cap_snap %p follows %lld\n", |
2469 | capsnap, follows); | 2496 | inode, capsnap, follows); |
2470 | ceph_put_snap_context(capsnap->context); | 2497 | ceph_put_snap_context(capsnap->context); |
2471 | list_del(&capsnap->ci_item); | 2498 | list_del(&capsnap->ci_item); |
2472 | list_del(&capsnap->flushing_item); | 2499 | list_del(&capsnap->flushing_item); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 7261dc6c2ead..650d2db5ed26 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -171,11 +171,11 @@ more: | |||
171 | spin_lock(&inode->i_lock); | 171 | spin_lock(&inode->i_lock); |
172 | spin_lock(&dcache_lock); | 172 | spin_lock(&dcache_lock); |
173 | 173 | ||
174 | last = dentry; | ||
175 | |||
174 | if (err < 0) | 176 | if (err < 0) |
175 | goto out_unlock; | 177 | goto out_unlock; |
176 | 178 | ||
177 | last = dentry; | ||
178 | |||
179 | p = p->prev; | 179 | p = p->prev; |
180 | filp->f_pos++; | 180 | filp->f_pos++; |
181 | 181 | ||
@@ -312,7 +312,7 @@ more: | |||
312 | req->r_readdir_offset = fi->next_offset; | 312 | req->r_readdir_offset = fi->next_offset; |
313 | req->r_args.readdir.frag = cpu_to_le32(frag); | 313 | req->r_args.readdir.frag = cpu_to_le32(frag); |
314 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); | 314 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); |
315 | req->r_num_caps = max_entries; | 315 | req->r_num_caps = max_entries + 1; |
316 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 316 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
317 | if (err < 0) { | 317 | if (err < 0) { |
318 | ceph_mdsc_put_request(req); | 318 | ceph_mdsc_put_request(req); |
@@ -489,6 +489,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | |||
489 | struct inode *inode = ceph_get_snapdir(parent); | 489 | struct inode *inode = ceph_get_snapdir(parent); |
490 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", | 490 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", |
491 | dentry, dentry->d_name.len, dentry->d_name.name, inode); | 491 | dentry, dentry->d_name.len, dentry->d_name.name, inode); |
492 | BUG_ON(!d_unhashed(dentry)); | ||
492 | d_add(dentry, inode); | 493 | d_add(dentry, inode); |
493 | err = 0; | 494 | err = 0; |
494 | } | 495 | } |
@@ -879,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
879 | * do_request, above). If there is no trace, we need | 880 | * do_request, above). If there is no trace, we need |
880 | * to do it here. | 881 | * to do it here. |
881 | */ | 882 | */ |
883 | |||
884 | /* d_move screws up d_subdirs order */ | ||
885 | ceph_i_clear(new_dir, CEPH_I_COMPLETE); | ||
886 | |||
882 | d_move(old_dentry, new_dentry); | 887 | d_move(old_dentry, new_dentry); |
888 | |||
889 | /* ensure target dentry is invalidated, despite | ||
890 | rehashing bug in vfs_rename_dir */ | ||
891 | new_dentry->d_time = jiffies; | ||
892 | ceph_dentry(new_dentry)->lease_shared_gen = 0; | ||
883 | } | 893 | } |
884 | ceph_mdsc_put_request(req); | 894 | ceph_mdsc_put_request(req); |
885 | return err; | 895 | return err; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4add3d5da2c1..ed6f19721d6e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -665,7 +665,8 @@ more: | |||
665 | * throw out any page cache pages in this range. this | 665 | * throw out any page cache pages in this range. this |
666 | * may block. | 666 | * may block. |
667 | */ | 667 | */ |
668 | truncate_inode_pages_range(inode->i_mapping, pos, pos+len); | 668 | truncate_inode_pages_range(inode->i_mapping, pos, |
669 | (pos+len) | (PAGE_CACHE_SIZE-1)); | ||
669 | } else { | 670 | } else { |
670 | pages = alloc_page_vector(num_pages); | 671 | pages = alloc_page_vector(num_pages); |
671 | if (IS_ERR(pages)) { | 672 | if (IS_ERR(pages)) { |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index aca82d55cc53..85b4d2ffdeba 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -733,6 +733,10 @@ no_change: | |||
733 | __ceph_get_fmode(ci, cap_fmode); | 733 | __ceph_get_fmode(ci, cap_fmode); |
734 | spin_unlock(&inode->i_lock); | 734 | spin_unlock(&inode->i_lock); |
735 | } | 735 | } |
736 | } else if (cap_fmode >= 0) { | ||
737 | pr_warning("mds issued no caps on %llx.%llx\n", | ||
738 | ceph_vinop(inode)); | ||
739 | __ceph_get_fmode(ci, cap_fmode); | ||
736 | } | 740 | } |
737 | 741 | ||
738 | /* update delegation info? */ | 742 | /* update delegation info? */ |
@@ -886,6 +890,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
886 | struct inode *in = NULL; | 890 | struct inode *in = NULL; |
887 | struct ceph_mds_reply_inode *ininfo; | 891 | struct ceph_mds_reply_inode *ininfo; |
888 | struct ceph_vino vino; | 892 | struct ceph_vino vino; |
893 | struct ceph_client *client = ceph_sb_to_client(sb); | ||
889 | int i = 0; | 894 | int i = 0; |
890 | int err = 0; | 895 | int err = 0; |
891 | 896 | ||
@@ -949,7 +954,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
949 | return err; | 954 | return err; |
950 | } | 955 | } |
951 | 956 | ||
952 | if (rinfo->head->is_dentry && !req->r_aborted) { | 957 | /* |
958 | * ignore null lease/binding on snapdir ENOENT, or else we | ||
959 | * will have trouble splicing in the virtual snapdir later | ||
960 | */ | ||
961 | if (rinfo->head->is_dentry && !req->r_aborted && | ||
962 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | ||
963 | client->mount_args->snapdir_name, | ||
964 | req->r_dentry->d_name.len))) { | ||
953 | /* | 965 | /* |
954 | * lookup link rename : null -> possibly existing inode | 966 | * lookup link rename : null -> possibly existing inode |
955 | * mknod symlink mkdir : null -> new inode | 967 | * mknod symlink mkdir : null -> new inode |
@@ -989,6 +1001,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
989 | dn, dn->d_name.len, dn->d_name.name); | 1001 | dn, dn->d_name.len, dn->d_name.name); |
990 | dout("fill_trace doing d_move %p -> %p\n", | 1002 | dout("fill_trace doing d_move %p -> %p\n", |
991 | req->r_old_dentry, dn); | 1003 | req->r_old_dentry, dn); |
1004 | |||
1005 | /* d_move screws up d_subdirs order */ | ||
1006 | ceph_i_clear(dir, CEPH_I_COMPLETE); | ||
1007 | |||
992 | d_move(req->r_old_dentry, dn); | 1008 | d_move(req->r_old_dentry, dn); |
993 | dout(" src %p '%.*s' dst %p '%.*s'\n", | 1009 | dout(" src %p '%.*s' dst %p '%.*s'\n", |
994 | req->r_old_dentry, | 1010 | req->r_old_dentry, |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 60a9a4ae47be..24561a557e01 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session) | |||
736 | } | 736 | } |
737 | 737 | ||
738 | /* | 738 | /* |
739 | * Helper to safely iterate over all caps associated with a session. | 739 | * Helper to safely iterate over all caps associated with a session, with |
740 | * special care taken to handle a racing __ceph_remove_cap(). | ||
740 | * | 741 | * |
741 | * caller must hold session s_mutex | 742 | * Caller must hold session s_mutex. |
742 | */ | 743 | */ |
743 | static int iterate_session_caps(struct ceph_mds_session *session, | 744 | static int iterate_session_caps(struct ceph_mds_session *session, |
744 | int (*cb)(struct inode *, struct ceph_cap *, | 745 | int (*cb)(struct inode *, struct ceph_cap *, |
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
2136 | struct ceph_mds_session *session = NULL; | 2137 | struct ceph_mds_session *session = NULL; |
2137 | struct ceph_msg *reply; | 2138 | struct ceph_msg *reply; |
2138 | struct rb_node *p; | 2139 | struct rb_node *p; |
2139 | int err; | 2140 | int err = -ENOMEM; |
2140 | struct ceph_pagelist *pagelist; | 2141 | struct ceph_pagelist *pagelist; |
2141 | 2142 | ||
2142 | pr_info("reconnect to recovering mds%d\n", mds); | 2143 | pr_info("reconnect to recovering mds%d\n", mds); |
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
2185 | goto fail; | 2186 | goto fail; |
2186 | err = iterate_session_caps(session, encode_caps_cb, pagelist); | 2187 | err = iterate_session_caps(session, encode_caps_cb, pagelist); |
2187 | if (err < 0) | 2188 | if (err < 0) |
2188 | goto out; | 2189 | goto fail; |
2189 | 2190 | ||
2190 | /* | 2191 | /* |
2191 | * snaprealms. we provide mds with the ino, seq (version), and | 2192 | * snaprealms. we provide mds with the ino, seq (version), and |
@@ -2213,28 +2214,31 @@ send: | |||
2213 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2214 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
2214 | ceph_con_send(&session->s_con, reply); | 2215 | ceph_con_send(&session->s_con, reply); |
2215 | 2216 | ||
2216 | if (session) { | 2217 | session->s_state = CEPH_MDS_SESSION_OPEN; |
2217 | session->s_state = CEPH_MDS_SESSION_OPEN; | 2218 | mutex_unlock(&session->s_mutex); |
2218 | __wake_requests(mdsc, &session->s_waiting); | 2219 | |
2219 | } | 2220 | mutex_lock(&mdsc->mutex); |
2221 | __wake_requests(mdsc, &session->s_waiting); | ||
2222 | mutex_unlock(&mdsc->mutex); | ||
2223 | |||
2224 | ceph_put_mds_session(session); | ||
2220 | 2225 | ||
2221 | out: | ||
2222 | up_read(&mdsc->snap_rwsem); | 2226 | up_read(&mdsc->snap_rwsem); |
2223 | if (session) { | ||
2224 | mutex_unlock(&session->s_mutex); | ||
2225 | ceph_put_mds_session(session); | ||
2226 | } | ||
2227 | mutex_lock(&mdsc->mutex); | 2227 | mutex_lock(&mdsc->mutex); |
2228 | return; | 2228 | return; |
2229 | 2229 | ||
2230 | fail: | 2230 | fail: |
2231 | ceph_msg_put(reply); | 2231 | ceph_msg_put(reply); |
2232 | up_read(&mdsc->snap_rwsem); | ||
2233 | mutex_unlock(&session->s_mutex); | ||
2234 | ceph_put_mds_session(session); | ||
2232 | fail_nomsg: | 2235 | fail_nomsg: |
2233 | ceph_pagelist_release(pagelist); | 2236 | ceph_pagelist_release(pagelist); |
2234 | kfree(pagelist); | 2237 | kfree(pagelist); |
2235 | fail_nopagelist: | 2238 | fail_nopagelist: |
2236 | pr_err("ENOMEM preparing reconnect for mds%d\n", mds); | 2239 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); |
2237 | goto out; | 2240 | mutex_lock(&mdsc->mutex); |
2241 | return; | ||
2238 | } | 2242 | } |
2239 | 2243 | ||
2240 | 2244 | ||
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 8f1715ffbe4b..cd4fadb6491a 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -30,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG; | |||
30 | static char tag_ack = CEPH_MSGR_TAG_ACK; | 30 | static char tag_ack = CEPH_MSGR_TAG_ACK; |
31 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | 31 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; |
32 | 32 | ||
33 | #ifdef CONFIG_LOCKDEP | ||
34 | static struct lock_class_key socket_class; | ||
35 | #endif | ||
36 | |||
33 | 37 | ||
34 | static void queue_con(struct ceph_connection *con); | 38 | static void queue_con(struct ceph_connection *con); |
35 | static void con_work(struct work_struct *); | 39 | static void con_work(struct work_struct *); |
@@ -228,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
228 | con->sock = sock; | 232 | con->sock = sock; |
229 | sock->sk->sk_allocation = GFP_NOFS; | 233 | sock->sk->sk_allocation = GFP_NOFS; |
230 | 234 | ||
235 | #ifdef CONFIG_LOCKDEP | ||
236 | lockdep_set_class(&sock->sk->sk_lock, &socket_class); | ||
237 | #endif | ||
238 | |||
231 | set_sock_callbacks(sock, con); | 239 | set_sock_callbacks(sock, con); |
232 | 240 | ||
233 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); | 241 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); |
@@ -333,6 +341,7 @@ static void reset_connection(struct ceph_connection *con) | |||
333 | con->out_msg = NULL; | 341 | con->out_msg = NULL; |
334 | } | 342 | } |
335 | con->in_seq = 0; | 343 | con->in_seq = 0; |
344 | con->in_seq_acked = 0; | ||
336 | } | 345 | } |
337 | 346 | ||
338 | /* | 347 | /* |
@@ -483,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con) | |||
483 | list_move_tail(&m->list_head, &con->out_sent); | 492 | list_move_tail(&m->list_head, &con->out_sent); |
484 | } | 493 | } |
485 | 494 | ||
486 | m->hdr.seq = cpu_to_le64(++con->out_seq); | 495 | /* |
496 | * only assign outgoing seq # if we haven't sent this message | ||
497 | * yet. if it is requeued, resend with it's original seq. | ||
498 | */ | ||
499 | if (m->needs_out_seq) { | ||
500 | m->hdr.seq = cpu_to_le64(++con->out_seq); | ||
501 | m->needs_out_seq = false; | ||
502 | } | ||
487 | 503 | ||
488 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", | 504 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", |
489 | m, con->out_seq, le16_to_cpu(m->hdr.type), | 505 | m, con->out_seq, le16_to_cpu(m->hdr.type), |
@@ -1325,6 +1341,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1325 | unsigned front_len, middle_len, data_len, data_off; | 1341 | unsigned front_len, middle_len, data_len, data_off; |
1326 | int datacrc = con->msgr->nocrc; | 1342 | int datacrc = con->msgr->nocrc; |
1327 | int skip; | 1343 | int skip; |
1344 | u64 seq; | ||
1328 | 1345 | ||
1329 | dout("read_partial_message con %p msg %p\n", con, m); | 1346 | dout("read_partial_message con %p msg %p\n", con, m); |
1330 | 1347 | ||
@@ -1359,6 +1376,25 @@ static int read_partial_message(struct ceph_connection *con) | |||
1359 | return -EIO; | 1376 | return -EIO; |
1360 | data_off = le16_to_cpu(con->in_hdr.data_off); | 1377 | data_off = le16_to_cpu(con->in_hdr.data_off); |
1361 | 1378 | ||
1379 | /* verify seq# */ | ||
1380 | seq = le64_to_cpu(con->in_hdr.seq); | ||
1381 | if ((s64)seq - (s64)con->in_seq < 1) { | ||
1382 | pr_info("skipping %s%lld %s seq %lld, expected %lld\n", | ||
1383 | ENTITY_NAME(con->peer_name), | ||
1384 | pr_addr(&con->peer_addr.in_addr), | ||
1385 | seq, con->in_seq + 1); | ||
1386 | con->in_base_pos = -front_len - middle_len - data_len - | ||
1387 | sizeof(m->footer); | ||
1388 | con->in_tag = CEPH_MSGR_TAG_READY; | ||
1389 | con->in_seq++; | ||
1390 | return 0; | ||
1391 | } else if ((s64)seq - (s64)con->in_seq > 1) { | ||
1392 | pr_err("read_partial_message bad seq %lld expected %lld\n", | ||
1393 | seq, con->in_seq + 1); | ||
1394 | con->error_msg = "bad message sequence # for incoming message"; | ||
1395 | return -EBADMSG; | ||
1396 | } | ||
1397 | |||
1362 | /* allocate message? */ | 1398 | /* allocate message? */ |
1363 | if (!con->in_msg) { | 1399 | if (!con->in_msg) { |
1364 | dout("got hdr type %d front %d data %d\n", con->in_hdr.type, | 1400 | dout("got hdr type %d front %d data %d\n", con->in_hdr.type, |
@@ -1370,6 +1406,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1370 | con->in_base_pos = -front_len - middle_len - data_len - | 1406 | con->in_base_pos = -front_len - middle_len - data_len - |
1371 | sizeof(m->footer); | 1407 | sizeof(m->footer); |
1372 | con->in_tag = CEPH_MSGR_TAG_READY; | 1408 | con->in_tag = CEPH_MSGR_TAG_READY; |
1409 | con->in_seq++; | ||
1373 | return 0; | 1410 | return 0; |
1374 | } | 1411 | } |
1375 | if (IS_ERR(con->in_msg)) { | 1412 | if (IS_ERR(con->in_msg)) { |
@@ -1956,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
1956 | 1993 | ||
1957 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | 1994 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); |
1958 | 1995 | ||
1996 | msg->needs_out_seq = true; | ||
1997 | |||
1959 | /* queue */ | 1998 | /* queue */ |
1960 | mutex_lock(&con->mutex); | 1999 | mutex_lock(&con->mutex); |
1961 | BUG_ON(!list_empty(&msg->list_head)); | 2000 | BUG_ON(!list_empty(&msg->list_head)); |
@@ -2021,6 +2060,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) | |||
2021 | ceph_msg_put(con->in_msg); | 2060 | ceph_msg_put(con->in_msg); |
2022 | con->in_msg = NULL; | 2061 | con->in_msg = NULL; |
2023 | con->in_tag = CEPH_MSGR_TAG_READY; | 2062 | con->in_tag = CEPH_MSGR_TAG_READY; |
2063 | con->in_seq++; | ||
2024 | } else { | 2064 | } else { |
2025 | dout("con_revoke_pages %p msg %p pages %p no-op\n", | 2065 | dout("con_revoke_pages %p msg %p pages %p no-op\n", |
2026 | con, con->in_msg, msg); | 2066 | con, con->in_msg, msg); |
@@ -2054,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2054 | kref_init(&m->kref); | 2094 | kref_init(&m->kref); |
2055 | INIT_LIST_HEAD(&m->list_head); | 2095 | INIT_LIST_HEAD(&m->list_head); |
2056 | 2096 | ||
2097 | m->hdr.tid = 0; | ||
2057 | m->hdr.type = cpu_to_le16(type); | 2098 | m->hdr.type = cpu_to_le16(type); |
2099 | m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); | ||
2100 | m->hdr.version = 0; | ||
2058 | m->hdr.front_len = cpu_to_le32(front_len); | 2101 | m->hdr.front_len = cpu_to_le32(front_len); |
2059 | m->hdr.middle_len = 0; | 2102 | m->hdr.middle_len = 0; |
2060 | m->hdr.data_len = cpu_to_le32(page_len); | 2103 | m->hdr.data_len = cpu_to_le32(page_len); |
2061 | m->hdr.data_off = cpu_to_le16(page_off); | 2104 | m->hdr.data_off = cpu_to_le16(page_off); |
2062 | m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); | 2105 | m->hdr.reserved = 0; |
2063 | m->footer.front_crc = 0; | 2106 | m->footer.front_crc = 0; |
2064 | m->footer.middle_crc = 0; | 2107 | m->footer.middle_crc = 0; |
2065 | m->footer.data_crc = 0; | 2108 | m->footer.data_crc = 0; |
2109 | m->footer.flags = 0; | ||
2066 | m->front_max = front_len; | 2110 | m->front_max = front_len; |
2067 | m->front_is_vmalloc = false; | 2111 | m->front_is_vmalloc = false; |
2068 | m->more_to_follow = false; | 2112 | m->more_to_follow = false; |
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a343dae73cdc..a5caf91cc971 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h | |||
@@ -86,6 +86,7 @@ struct ceph_msg { | |||
86 | struct kref kref; | 86 | struct kref kref; |
87 | bool front_is_vmalloc; | 87 | bool front_is_vmalloc; |
88 | bool more_to_follow; | 88 | bool more_to_follow; |
89 | bool needs_out_seq; | ||
89 | int front_max; | 90 | int front_max; |
90 | 91 | ||
91 | struct ceph_msgpool *pool; | 92 | struct ceph_msgpool *pool; |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index c7b4dedaace6..3514f71ff85f 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
565 | { | 565 | { |
566 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | 566 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; |
567 | struct ceph_pg pgid; | 567 | struct ceph_pg pgid; |
568 | int o = -1; | 568 | int acting[CEPH_PG_MAX_SIZE]; |
569 | int o = -1, num = 0; | ||
569 | int err; | 570 | int err; |
570 | 571 | ||
571 | dout("map_osds %p tid %lld\n", req, req->r_tid); | 572 | dout("map_osds %p tid %lld\n", req, req->r_tid); |
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
576 | pgid = reqhead->layout.ol_pgid; | 577 | pgid = reqhead->layout.ol_pgid; |
577 | req->r_pgid = pgid; | 578 | req->r_pgid = pgid; |
578 | 579 | ||
579 | o = ceph_calc_pg_primary(osdc->osdmap, pgid); | 580 | err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); |
581 | if (err > 0) { | ||
582 | o = acting[0]; | ||
583 | num = err; | ||
584 | } | ||
580 | 585 | ||
581 | if ((req->r_osd && req->r_osd->o_osd == o && | 586 | if ((req->r_osd && req->r_osd->o_osd == o && |
582 | req->r_sent >= req->r_osd->o_incarnation) || | 587 | req->r_sent >= req->r_osd->o_incarnation && |
588 | req->r_num_pg_osds == num && | ||
589 | memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || | ||
583 | (req->r_osd == NULL && o == -1)) | 590 | (req->r_osd == NULL && o == -1)) |
584 | return 0; /* no change */ | 591 | return 0; /* no change */ |
585 | 592 | ||
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
587 | req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, | 594 | req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, |
588 | req->r_osd ? req->r_osd->o_osd : -1); | 595 | req->r_osd ? req->r_osd->o_osd : -1); |
589 | 596 | ||
597 | /* record full pg acting set */ | ||
598 | memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num); | ||
599 | req->r_num_pg_osds = num; | ||
600 | |||
590 | if (req->r_osd) { | 601 | if (req->r_osd) { |
591 | __cancel_request(req); | 602 | __cancel_request(req); |
592 | list_del_init(&req->r_osd_item); | 603 | list_del_init(&req->r_osd_item); |
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
612 | __remove_osd_from_lru(req->r_osd); | 623 | __remove_osd_from_lru(req->r_osd); |
613 | list_add(&req->r_osd_item, &req->r_osd->o_requests); | 624 | list_add(&req->r_osd_item, &req->r_osd->o_requests); |
614 | } | 625 | } |
615 | err = 1; /* osd changed */ | 626 | err = 1; /* osd or pg changed */ |
616 | 627 | ||
617 | out: | 628 | out: |
618 | return err; | 629 | return err; |
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | |||
779 | struct ceph_osd_request *req; | 790 | struct ceph_osd_request *req; |
780 | u64 tid; | 791 | u64 tid; |
781 | int numops, object_len, flags; | 792 | int numops, object_len, flags; |
793 | s32 result; | ||
782 | 794 | ||
783 | tid = le64_to_cpu(msg->hdr.tid); | 795 | tid = le64_to_cpu(msg->hdr.tid); |
784 | if (msg->front.iov_len < sizeof(*rhead)) | 796 | if (msg->front.iov_len < sizeof(*rhead)) |
785 | goto bad; | 797 | goto bad; |
786 | numops = le32_to_cpu(rhead->num_ops); | 798 | numops = le32_to_cpu(rhead->num_ops); |
787 | object_len = le32_to_cpu(rhead->object_len); | 799 | object_len = le32_to_cpu(rhead->object_len); |
800 | result = le32_to_cpu(rhead->result); | ||
788 | if (msg->front.iov_len != sizeof(*rhead) + object_len + | 801 | if (msg->front.iov_len != sizeof(*rhead) + object_len + |
789 | numops * sizeof(struct ceph_osd_op)) | 802 | numops * sizeof(struct ceph_osd_op)) |
790 | goto bad; | 803 | goto bad; |
791 | dout("handle_reply %p tid %llu\n", msg, tid); | 804 | dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); |
792 | 805 | ||
793 | /* lookup */ | 806 | /* lookup */ |
794 | mutex_lock(&osdc->request_mutex); | 807 | mutex_lock(&osdc->request_mutex); |
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | |||
834 | dout("handle_reply tid %llu flags %d\n", tid, flags); | 847 | dout("handle_reply tid %llu flags %d\n", tid, flags); |
835 | 848 | ||
836 | /* either this is a read, or we got the safe response */ | 849 | /* either this is a read, or we got the safe response */ |
837 | if ((flags & CEPH_OSD_FLAG_ONDISK) || | 850 | if (result < 0 || |
851 | (flags & CEPH_OSD_FLAG_ONDISK) || | ||
838 | ((flags & CEPH_OSD_FLAG_WRITE) == 0)) | 852 | ((flags & CEPH_OSD_FLAG_WRITE) == 0)) |
839 | __unregister_request(osdc, req); | 853 | __unregister_request(osdc, req); |
840 | 854 | ||
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index b0759911e7c3..ce776989ef6a 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h | |||
@@ -48,6 +48,8 @@ struct ceph_osd_request { | |||
48 | struct list_head r_osd_item; | 48 | struct list_head r_osd_item; |
49 | struct ceph_osd *r_osd; | 49 | struct ceph_osd *r_osd; |
50 | struct ceph_pg r_pgid; | 50 | struct ceph_pg r_pgid; |
51 | int r_pg_osds[CEPH_PG_MAX_SIZE]; | ||
52 | int r_num_pg_osds; | ||
51 | 53 | ||
52 | struct ceph_connection *r_con_filling_msg; | 54 | struct ceph_connection *r_con_filling_msg; |
53 | 55 | ||
@@ -66,7 +68,6 @@ struct ceph_osd_request { | |||
66 | struct list_head r_unsafe_item; | 68 | struct list_head r_unsafe_item; |
67 | 69 | ||
68 | struct inode *r_inode; /* for use by callbacks */ | 70 | struct inode *r_inode; /* for use by callbacks */ |
69 | struct writeback_control *r_wbc; /* ditto */ | ||
70 | 71 | ||
71 | char r_oid[40]; /* object name */ | 72 | char r_oid[40]; /* object name */ |
72 | int r_oid_len; | 73 | int r_oid_len; |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 21c6623c4b07..cfdd8f4388b7 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -314,71 +314,6 @@ bad: | |||
314 | return ERR_PTR(err); | 314 | return ERR_PTR(err); |
315 | } | 315 | } |
316 | 316 | ||
317 | |||
318 | /* | ||
319 | * osd map | ||
320 | */ | ||
321 | void ceph_osdmap_destroy(struct ceph_osdmap *map) | ||
322 | { | ||
323 | dout("osdmap_destroy %p\n", map); | ||
324 | if (map->crush) | ||
325 | crush_destroy(map->crush); | ||
326 | while (!RB_EMPTY_ROOT(&map->pg_temp)) { | ||
327 | struct ceph_pg_mapping *pg = | ||
328 | rb_entry(rb_first(&map->pg_temp), | ||
329 | struct ceph_pg_mapping, node); | ||
330 | rb_erase(&pg->node, &map->pg_temp); | ||
331 | kfree(pg); | ||
332 | } | ||
333 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { | ||
334 | struct ceph_pg_pool_info *pi = | ||
335 | rb_entry(rb_first(&map->pg_pools), | ||
336 | struct ceph_pg_pool_info, node); | ||
337 | rb_erase(&pi->node, &map->pg_pools); | ||
338 | kfree(pi); | ||
339 | } | ||
340 | kfree(map->osd_state); | ||
341 | kfree(map->osd_weight); | ||
342 | kfree(map->osd_addr); | ||
343 | kfree(map); | ||
344 | } | ||
345 | |||
346 | /* | ||
347 | * adjust max osd value. reallocate arrays. | ||
348 | */ | ||
349 | static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | ||
350 | { | ||
351 | u8 *state; | ||
352 | struct ceph_entity_addr *addr; | ||
353 | u32 *weight; | ||
354 | |||
355 | state = kcalloc(max, sizeof(*state), GFP_NOFS); | ||
356 | addr = kcalloc(max, sizeof(*addr), GFP_NOFS); | ||
357 | weight = kcalloc(max, sizeof(*weight), GFP_NOFS); | ||
358 | if (state == NULL || addr == NULL || weight == NULL) { | ||
359 | kfree(state); | ||
360 | kfree(addr); | ||
361 | kfree(weight); | ||
362 | return -ENOMEM; | ||
363 | } | ||
364 | |||
365 | /* copy old? */ | ||
366 | if (map->osd_state) { | ||
367 | memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); | ||
368 | memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); | ||
369 | memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); | ||
370 | kfree(map->osd_state); | ||
371 | kfree(map->osd_addr); | ||
372 | kfree(map->osd_weight); | ||
373 | } | ||
374 | |||
375 | map->osd_state = state; | ||
376 | map->osd_weight = weight; | ||
377 | map->osd_addr = addr; | ||
378 | map->max_osd = max; | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | /* | 317 | /* |
383 | * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid | 318 | * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid |
384 | * to a set of osds) | 319 | * to a set of osds) |
@@ -482,6 +417,13 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | |||
482 | return NULL; | 417 | return NULL; |
483 | } | 418 | } |
484 | 419 | ||
420 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | ||
421 | { | ||
422 | rb_erase(&pi->node, root); | ||
423 | kfree(pi->name); | ||
424 | kfree(pi); | ||
425 | } | ||
426 | |||
485 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | 427 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) |
486 | { | 428 | { |
487 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 429 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); |
@@ -490,6 +432,98 @@ void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | |||
490 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | 432 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; |
491 | } | 433 | } |
492 | 434 | ||
435 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | ||
436 | { | ||
437 | struct ceph_pg_pool_info *pi; | ||
438 | u32 num, len, pool; | ||
439 | |||
440 | ceph_decode_32_safe(p, end, num, bad); | ||
441 | dout(" %d pool names\n", num); | ||
442 | while (num--) { | ||
443 | ceph_decode_32_safe(p, end, pool, bad); | ||
444 | ceph_decode_32_safe(p, end, len, bad); | ||
445 | dout(" pool %d len %d\n", pool, len); | ||
446 | pi = __lookup_pg_pool(&map->pg_pools, pool); | ||
447 | if (pi) { | ||
448 | kfree(pi->name); | ||
449 | pi->name = kmalloc(len + 1, GFP_NOFS); | ||
450 | if (pi->name) { | ||
451 | memcpy(pi->name, *p, len); | ||
452 | pi->name[len] = '\0'; | ||
453 | dout(" name is %s\n", pi->name); | ||
454 | } | ||
455 | } | ||
456 | *p += len; | ||
457 | } | ||
458 | return 0; | ||
459 | |||
460 | bad: | ||
461 | return -EINVAL; | ||
462 | } | ||
463 | |||
464 | /* | ||
465 | * osd map | ||
466 | */ | ||
467 | void ceph_osdmap_destroy(struct ceph_osdmap *map) | ||
468 | { | ||
469 | dout("osdmap_destroy %p\n", map); | ||
470 | if (map->crush) | ||
471 | crush_destroy(map->crush); | ||
472 | while (!RB_EMPTY_ROOT(&map->pg_temp)) { | ||
473 | struct ceph_pg_mapping *pg = | ||
474 | rb_entry(rb_first(&map->pg_temp), | ||
475 | struct ceph_pg_mapping, node); | ||
476 | rb_erase(&pg->node, &map->pg_temp); | ||
477 | kfree(pg); | ||
478 | } | ||
479 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { | ||
480 | struct ceph_pg_pool_info *pi = | ||
481 | rb_entry(rb_first(&map->pg_pools), | ||
482 | struct ceph_pg_pool_info, node); | ||
483 | __remove_pg_pool(&map->pg_pools, pi); | ||
484 | } | ||
485 | kfree(map->osd_state); | ||
486 | kfree(map->osd_weight); | ||
487 | kfree(map->osd_addr); | ||
488 | kfree(map); | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * adjust max osd value. reallocate arrays. | ||
493 | */ | ||
494 | static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | ||
495 | { | ||
496 | u8 *state; | ||
497 | struct ceph_entity_addr *addr; | ||
498 | u32 *weight; | ||
499 | |||
500 | state = kcalloc(max, sizeof(*state), GFP_NOFS); | ||
501 | addr = kcalloc(max, sizeof(*addr), GFP_NOFS); | ||
502 | weight = kcalloc(max, sizeof(*weight), GFP_NOFS); | ||
503 | if (state == NULL || addr == NULL || weight == NULL) { | ||
504 | kfree(state); | ||
505 | kfree(addr); | ||
506 | kfree(weight); | ||
507 | return -ENOMEM; | ||
508 | } | ||
509 | |||
510 | /* copy old? */ | ||
511 | if (map->osd_state) { | ||
512 | memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); | ||
513 | memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); | ||
514 | memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); | ||
515 | kfree(map->osd_state); | ||
516 | kfree(map->osd_addr); | ||
517 | kfree(map->osd_weight); | ||
518 | } | ||
519 | |||
520 | map->osd_state = state; | ||
521 | map->osd_weight = weight; | ||
522 | map->osd_addr = addr; | ||
523 | map->max_osd = max; | ||
524 | return 0; | ||
525 | } | ||
526 | |||
493 | /* | 527 | /* |
494 | * decode a full map. | 528 | * decode a full map. |
495 | */ | 529 | */ |
@@ -526,7 +560,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
526 | ceph_decode_32_safe(p, end, max, bad); | 560 | ceph_decode_32_safe(p, end, max, bad); |
527 | while (max--) { | 561 | while (max--) { |
528 | ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); | 562 | ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); |
529 | pi = kmalloc(sizeof(*pi), GFP_NOFS); | 563 | pi = kzalloc(sizeof(*pi), GFP_NOFS); |
530 | if (!pi) | 564 | if (!pi) |
531 | goto bad; | 565 | goto bad; |
532 | pi->id = ceph_decode_32(p); | 566 | pi->id = ceph_decode_32(p); |
@@ -539,6 +573,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
539 | __decode_pool(p, pi); | 573 | __decode_pool(p, pi); |
540 | __insert_pg_pool(&map->pg_pools, pi); | 574 | __insert_pg_pool(&map->pg_pools, pi); |
541 | } | 575 | } |
576 | |||
577 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | ||
578 | goto bad; | ||
579 | |||
542 | ceph_decode_32_safe(p, end, map->pool_max, bad); | 580 | ceph_decode_32_safe(p, end, map->pool_max, bad); |
543 | 581 | ||
544 | ceph_decode_32_safe(p, end, map->flags, bad); | 582 | ceph_decode_32_safe(p, end, map->flags, bad); |
@@ -712,7 +750,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
712 | } | 750 | } |
713 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 751 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
714 | if (!pi) { | 752 | if (!pi) { |
715 | pi = kmalloc(sizeof(*pi), GFP_NOFS); | 753 | pi = kzalloc(sizeof(*pi), GFP_NOFS); |
716 | if (!pi) { | 754 | if (!pi) { |
717 | err = -ENOMEM; | 755 | err = -ENOMEM; |
718 | goto bad; | 756 | goto bad; |
@@ -722,6 +760,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
722 | } | 760 | } |
723 | __decode_pool(p, pi); | 761 | __decode_pool(p, pi); |
724 | } | 762 | } |
763 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | ||
764 | goto bad; | ||
725 | 765 | ||
726 | /* old_pool */ | 766 | /* old_pool */ |
727 | ceph_decode_32_safe(p, end, len, bad); | 767 | ceph_decode_32_safe(p, end, len, bad); |
@@ -730,10 +770,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
730 | 770 | ||
731 | ceph_decode_32_safe(p, end, pool, bad); | 771 | ceph_decode_32_safe(p, end, pool, bad); |
732 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 772 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
733 | if (pi) { | 773 | if (pi) |
734 | rb_erase(&pi->node, &map->pg_pools); | 774 | __remove_pg_pool(&map->pg_pools, pi); |
735 | kfree(pi); | ||
736 | } | ||
737 | } | 775 | } |
738 | 776 | ||
739 | /* new_up */ | 777 | /* new_up */ |
@@ -1003,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1003 | } | 1041 | } |
1004 | 1042 | ||
1005 | /* | 1043 | /* |
1044 | * Return acting set for given pgid. | ||
1045 | */ | ||
1046 | int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
1047 | int *acting) | ||
1048 | { | ||
1049 | int rawosds[CEPH_PG_MAX_SIZE], *osds; | ||
1050 | int i, o, num = CEPH_PG_MAX_SIZE; | ||
1051 | |||
1052 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | ||
1053 | if (!osds) | ||
1054 | return -1; | ||
1055 | |||
1056 | /* primary is first up osd */ | ||
1057 | o = 0; | ||
1058 | for (i = 0; i < num; i++) | ||
1059 | if (ceph_osd_is_up(osdmap, osds[i])) | ||
1060 | acting[o++] = osds[i]; | ||
1061 | return o; | ||
1062 | } | ||
1063 | |||
1064 | /* | ||
1006 | * Return primary osd for given pgid, or -1 if none. | 1065 | * Return primary osd for given pgid, or -1 if none. |
1007 | */ | 1066 | */ |
1008 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | 1067 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) |
1009 | { | 1068 | { |
1010 | int rawosds[10], *osds; | 1069 | int rawosds[CEPH_PG_MAX_SIZE], *osds; |
1011 | int i, num = ARRAY_SIZE(rawosds); | 1070 | int i, num = CEPH_PG_MAX_SIZE; |
1012 | 1071 | ||
1013 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | 1072 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); |
1014 | if (!osds) | 1073 | if (!osds) |
@@ -1016,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | |||
1016 | 1075 | ||
1017 | /* primary is first up osd */ | 1076 | /* primary is first up osd */ |
1018 | for (i = 0; i < num; i++) | 1077 | for (i = 0; i < num; i++) |
1019 | if (ceph_osd_is_up(osdmap, osds[i])) { | 1078 | if (ceph_osd_is_up(osdmap, osds[i])) |
1020 | return osds[i]; | 1079 | return osds[i]; |
1021 | break; | ||
1022 | } | ||
1023 | return -1; | 1080 | return -1; |
1024 | } | 1081 | } |
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index 1fb55afb2642..970b547e510d 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h | |||
@@ -23,6 +23,7 @@ struct ceph_pg_pool_info { | |||
23 | int id; | 23 | int id; |
24 | struct ceph_pg_pool v; | 24 | struct ceph_pg_pool v; |
25 | int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; | 25 | int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; |
26 | char *name; | ||
26 | }; | 27 | }; |
27 | 28 | ||
28 | struct ceph_pg_mapping { | 29 | struct ceph_pg_mapping { |
@@ -119,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
119 | const char *oid, | 120 | const char *oid, |
120 | struct ceph_file_layout *fl, | 121 | struct ceph_file_layout *fl, |
121 | struct ceph_osdmap *osdmap); | 122 | struct ceph_osdmap *osdmap); |
123 | extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
124 | int *acting); | ||
122 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | 125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, |
123 | struct ceph_pg pgid); | 126 | struct ceph_pg pgid); |
124 | 127 | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 26ac8b89a676..fd56451a871f 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -11,8 +11,10 @@ | |||
11 | /* | 11 | /* |
12 | * osdmap encoding versions | 12 | * osdmap encoding versions |
13 | */ | 13 | */ |
14 | #define CEPH_OSDMAP_INC_VERSION 4 | 14 | #define CEPH_OSDMAP_INC_VERSION 5 |
15 | #define CEPH_OSDMAP_VERSION 4 | 15 | #define CEPH_OSDMAP_INC_VERSION_EXT 5 |
16 | #define CEPH_OSDMAP_VERSION 5 | ||
17 | #define CEPH_OSDMAP_VERSION_EXT 5 | ||
16 | 18 | ||
17 | /* | 19 | /* |
18 | * fs id | 20 | * fs id |
@@ -56,6 +58,7 @@ struct ceph_timespec { | |||
56 | #define CEPH_PG_LAYOUT_LINEAR 2 | 58 | #define CEPH_PG_LAYOUT_LINEAR 2 |
57 | #define CEPH_PG_LAYOUT_HYBRID 3 | 59 | #define CEPH_PG_LAYOUT_HYBRID 3 |
58 | 60 | ||
61 | #define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ | ||
59 | 62 | ||
60 | /* | 63 | /* |
61 | * placement group. | 64 | * placement group. |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index e6f9bc57d472..d5114db70453 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -431,8 +431,7 @@ static int dup_array(u64 **dst, __le64 *src, int num) | |||
431 | * Caller must hold snap_rwsem for read (i.e., the realm topology won't | 431 | * Caller must hold snap_rwsem for read (i.e., the realm topology won't |
432 | * change). | 432 | * change). |
433 | */ | 433 | */ |
434 | void ceph_queue_cap_snap(struct ceph_inode_info *ci, | 434 | void ceph_queue_cap_snap(struct ceph_inode_info *ci) |
435 | struct ceph_snap_context *snapc) | ||
436 | { | 435 | { |
437 | struct inode *inode = &ci->vfs_inode; | 436 | struct inode *inode = &ci->vfs_inode; |
438 | struct ceph_cap_snap *capsnap; | 437 | struct ceph_cap_snap *capsnap; |
@@ -451,10 +450,11 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, | |||
451 | as no new writes are allowed to start when pending, so any | 450 | as no new writes are allowed to start when pending, so any |
452 | writes in progress now were started before the previous | 451 | writes in progress now were started before the previous |
453 | cap_snap. lucky us. */ | 452 | cap_snap. lucky us. */ |
454 | dout("queue_cap_snap %p snapc %p seq %llu used %d" | 453 | dout("queue_cap_snap %p already pending\n", inode); |
455 | " already pending\n", inode, snapc, snapc->seq, used); | ||
456 | kfree(capsnap); | 454 | kfree(capsnap); |
457 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { | 455 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { |
456 | struct ceph_snap_context *snapc = ci->i_head_snapc; | ||
457 | |||
458 | igrab(inode); | 458 | igrab(inode); |
459 | 459 | ||
460 | atomic_set(&capsnap->nref, 1); | 460 | atomic_set(&capsnap->nref, 1); |
@@ -463,7 +463,6 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, | |||
463 | INIT_LIST_HEAD(&capsnap->flushing_item); | 463 | INIT_LIST_HEAD(&capsnap->flushing_item); |
464 | 464 | ||
465 | capsnap->follows = snapc->seq - 1; | 465 | capsnap->follows = snapc->seq - 1; |
466 | capsnap->context = ceph_get_snap_context(snapc); | ||
467 | capsnap->issued = __ceph_caps_issued(ci, NULL); | 466 | capsnap->issued = __ceph_caps_issued(ci, NULL); |
468 | capsnap->dirty = __ceph_caps_dirty(ci); | 467 | capsnap->dirty = __ceph_caps_dirty(ci); |
469 | 468 | ||
@@ -480,7 +479,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, | |||
480 | snapshot. */ | 479 | snapshot. */ |
481 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; | 480 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; |
482 | ci->i_wrbuffer_ref_head = 0; | 481 | ci->i_wrbuffer_ref_head = 0; |
483 | ceph_put_snap_context(ci->i_head_snapc); | 482 | capsnap->context = snapc; |
484 | ci->i_head_snapc = NULL; | 483 | ci->i_head_snapc = NULL; |
485 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); | 484 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); |
486 | 485 | ||
@@ -522,15 +521,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
522 | capsnap->ctime = inode->i_ctime; | 521 | capsnap->ctime = inode->i_ctime; |
523 | capsnap->time_warp_seq = ci->i_time_warp_seq; | 522 | capsnap->time_warp_seq = ci->i_time_warp_seq; |
524 | if (capsnap->dirty_pages) { | 523 | if (capsnap->dirty_pages) { |
525 | dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu " | 524 | dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu " |
526 | "still has %d dirty pages\n", inode, capsnap, | 525 | "still has %d dirty pages\n", inode, capsnap, |
527 | capsnap->context, capsnap->context->seq, | 526 | capsnap->context, capsnap->context->seq, |
528 | capsnap->size, capsnap->dirty_pages); | 527 | ceph_cap_string(capsnap->dirty), capsnap->size, |
528 | capsnap->dirty_pages); | ||
529 | return 0; | 529 | return 0; |
530 | } | 530 | } |
531 | dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu clean\n", | 531 | dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n", |
532 | inode, capsnap, capsnap->context, | 532 | inode, capsnap, capsnap->context, |
533 | capsnap->context->seq, capsnap->size); | 533 | capsnap->context->seq, ceph_cap_string(capsnap->dirty), |
534 | capsnap->size); | ||
534 | 535 | ||
535 | spin_lock(&mdsc->snap_flush_lock); | 536 | spin_lock(&mdsc->snap_flush_lock); |
536 | list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); | 537 | list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); |
@@ -602,7 +603,7 @@ more: | |||
602 | if (lastinode) | 603 | if (lastinode) |
603 | iput(lastinode); | 604 | iput(lastinode); |
604 | lastinode = inode; | 605 | lastinode = inode; |
605 | ceph_queue_cap_snap(ci, realm->cached_context); | 606 | ceph_queue_cap_snap(ci); |
606 | spin_lock(&realm->inodes_with_caps_lock); | 607 | spin_lock(&realm->inodes_with_caps_lock); |
607 | } | 608 | } |
608 | spin_unlock(&realm->inodes_with_caps_lock); | 609 | spin_unlock(&realm->inodes_with_caps_lock); |
@@ -824,8 +825,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
824 | spin_unlock(&realm->inodes_with_caps_lock); | 825 | spin_unlock(&realm->inodes_with_caps_lock); |
825 | spin_unlock(&inode->i_lock); | 826 | spin_unlock(&inode->i_lock); |
826 | 827 | ||
827 | ceph_queue_cap_snap(ci, | 828 | ceph_queue_cap_snap(ci); |
828 | ci->i_snap_realm->cached_context); | ||
829 | 829 | ||
830 | iput(inode); | 830 | iput(inode); |
831 | continue; | 831 | continue; |
@@ -869,16 +869,20 @@ skip_inode: | |||
869 | continue; | 869 | continue; |
870 | ci = ceph_inode(inode); | 870 | ci = ceph_inode(inode); |
871 | spin_lock(&inode->i_lock); | 871 | spin_lock(&inode->i_lock); |
872 | if (!ci->i_snap_realm) | 872 | if (list_empty(&ci->i_snap_realm_item)) { |
873 | goto split_skip_inode; | 873 | struct ceph_snap_realm *oldrealm = |
874 | ceph_put_snap_realm(mdsc, ci->i_snap_realm); | 874 | ci->i_snap_realm; |
875 | spin_lock(&realm->inodes_with_caps_lock); | 875 | |
876 | list_add(&ci->i_snap_realm_item, | 876 | dout(" moving %p to split realm %llx %p\n", |
877 | &realm->inodes_with_caps); | 877 | inode, realm->ino, realm); |
878 | ci->i_snap_realm = realm; | 878 | spin_lock(&realm->inodes_with_caps_lock); |
879 | spin_unlock(&realm->inodes_with_caps_lock); | 879 | list_add(&ci->i_snap_realm_item, |
880 | ceph_get_snap_realm(mdsc, realm); | 880 | &realm->inodes_with_caps); |
881 | split_skip_inode: | 881 | ci->i_snap_realm = realm; |
882 | spin_unlock(&realm->inodes_with_caps_lock); | ||
883 | ceph_get_snap_realm(mdsc, realm); | ||
884 | ceph_put_snap_realm(mdsc, oldrealm); | ||
885 | } | ||
882 | spin_unlock(&inode->i_lock); | 886 | spin_unlock(&inode->i_lock); |
883 | iput(inode); | 887 | iput(inode); |
884 | } | 888 | } |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 75d02eaa1279..110857ba9269 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len) | |||
47 | */ | 47 | */ |
48 | static void ceph_put_super(struct super_block *s) | 48 | static void ceph_put_super(struct super_block *s) |
49 | { | 49 | { |
50 | struct ceph_client *cl = ceph_client(s); | 50 | struct ceph_client *client = ceph_sb_to_client(s); |
51 | 51 | ||
52 | dout("put_super\n"); | 52 | dout("put_super\n"); |
53 | ceph_mdsc_close_sessions(&cl->mdsc); | 53 | ceph_mdsc_close_sessions(&client->mdsc); |
54 | |||
55 | /* | ||
56 | * ensure we release the bdi before put_anon_super releases | ||
57 | * the device name. | ||
58 | */ | ||
59 | if (s->s_bdi == &client->backing_dev_info) { | ||
60 | bdi_unregister(&client->backing_dev_info); | ||
61 | s->s_bdi = NULL; | ||
62 | } | ||
63 | |||
54 | return; | 64 | return; |
55 | } | 65 | } |
56 | 66 | ||
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client) | |||
636 | destroy_workqueue(client->pg_inv_wq); | 646 | destroy_workqueue(client->pg_inv_wq); |
637 | destroy_workqueue(client->trunc_wq); | 647 | destroy_workqueue(client->trunc_wq); |
638 | 648 | ||
649 | bdi_destroy(&client->backing_dev_info); | ||
650 | |||
639 | if (client->msgr) | 651 | if (client->msgr) |
640 | ceph_messenger_destroy(client->msgr); | 652 | ceph_messenger_destroy(client->msgr); |
641 | mempool_destroy(client->wb_pagevec_pool); | 653 | mempool_destroy(client->wb_pagevec_pool); |
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | |||
876 | { | 888 | { |
877 | int err; | 889 | int err; |
878 | 890 | ||
879 | sb->s_bdi = &client->backing_dev_info; | ||
880 | |||
881 | /* set ra_pages based on rsize mount option? */ | 891 | /* set ra_pages based on rsize mount option? */ |
882 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) | 892 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) |
883 | client->backing_dev_info.ra_pages = | 893 | client->backing_dev_info.ra_pages = |
884 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 894 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) |
885 | >> PAGE_SHIFT; | 895 | >> PAGE_SHIFT; |
886 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); | 896 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); |
897 | if (!err) | ||
898 | sb->s_bdi = &client->backing_dev_info; | ||
887 | return err; | 899 | return err; |
888 | } | 900 | } |
889 | 901 | ||
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s) | |||
957 | dout("kill_sb %p\n", s); | 969 | dout("kill_sb %p\n", s); |
958 | ceph_mdsc_pre_umount(&client->mdsc); | 970 | ceph_mdsc_pre_umount(&client->mdsc); |
959 | kill_anon_super(s); /* will call put_super after sb is r/o */ | 971 | kill_anon_super(s); /* will call put_super after sb is r/o */ |
960 | if (s->s_bdi == &client->backing_dev_info) | ||
961 | bdi_unregister(&client->backing_dev_info); | ||
962 | bdi_destroy(&client->backing_dev_info); | ||
963 | ceph_destroy_client(client); | 972 | ceph_destroy_client(client); |
964 | } | 973 | } |
965 | 974 | ||
@@ -996,9 +1005,10 @@ static int __init init_ceph(void) | |||
996 | if (ret) | 1005 | if (ret) |
997 | goto out_icache; | 1006 | goto out_icache; |
998 | 1007 | ||
999 | pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n", | 1008 | pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n", |
1000 | CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH, | 1009 | CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL, |
1001 | CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL); | 1010 | CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, |
1011 | CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||
1002 | return 0; | 1012 | return 0; |
1003 | 1013 | ||
1004 | out_icache: | 1014 | out_icache: |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ca702c67bc66..13513b80d87f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/mempool.h> | 11 | #include <linux/mempool.h> |
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/slab.h> | ||
13 | #include <linux/wait.h> | 14 | #include <linux/wait.h> |
14 | #include <linux/writeback.h> | 15 | #include <linux/writeback.h> |
15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
@@ -715,8 +716,7 @@ extern int ceph_update_snap_trace(struct ceph_mds_client *m, | |||
715 | extern void ceph_handle_snap(struct ceph_mds_client *mdsc, | 716 | extern void ceph_handle_snap(struct ceph_mds_client *mdsc, |
716 | struct ceph_mds_session *session, | 717 | struct ceph_mds_session *session, |
717 | struct ceph_msg *msg); | 718 | struct ceph_msg *msg); |
718 | extern void ceph_queue_cap_snap(struct ceph_inode_info *ci, | 719 | extern void ceph_queue_cap_snap(struct ceph_inode_info *ci); |
719 | struct ceph_snap_context *snapc); | ||
720 | extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | 720 | extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, |
721 | struct ceph_cap_snap *capsnap); | 721 | struct ceph_cap_snap *capsnap); |
722 | extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); | 722 | extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); |
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 4797787c6a44..246a167cb913 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h | |||
@@ -18,6 +18,8 @@ | |||
18 | #ifndef _CIFS_FS_SB_H | 18 | #ifndef _CIFS_FS_SB_H |
19 | #define _CIFS_FS_SB_H | 19 | #define _CIFS_FS_SB_H |
20 | 20 | ||
21 | #include <linux/backing-dev.h> | ||
22 | |||
21 | #define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */ | 23 | #define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */ |
22 | #define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */ | 24 | #define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */ |
23 | #define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */ | 25 | #define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */ |
@@ -50,5 +52,6 @@ struct cifs_sb_info { | |||
50 | #ifdef CONFIG_CIFS_DFS_UPCALL | 52 | #ifdef CONFIG_CIFS_DFS_UPCALL |
51 | char *mountdata; /* mount options received at mount time */ | 53 | char *mountdata; /* mount options received at mount time */ |
52 | #endif | 54 | #endif |
55 | struct backing_dev_info bdi; | ||
53 | }; | 56 | }; |
54 | #endif /* _CIFS_FS_SB_H */ | 57 | #endif /* _CIFS_FS_SB_H */ |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index ded66be6597c..ad235d604a0b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -103,6 +103,12 @@ cifs_read_super(struct super_block *sb, void *data, | |||
103 | if (cifs_sb == NULL) | 103 | if (cifs_sb == NULL) |
104 | return -ENOMEM; | 104 | return -ENOMEM; |
105 | 105 | ||
106 | rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); | ||
107 | if (rc) { | ||
108 | kfree(cifs_sb); | ||
109 | return rc; | ||
110 | } | ||
111 | |||
106 | #ifdef CONFIG_CIFS_DFS_UPCALL | 112 | #ifdef CONFIG_CIFS_DFS_UPCALL |
107 | /* copy mount params to sb for use in submounts */ | 113 | /* copy mount params to sb for use in submounts */ |
108 | /* BB: should we move this after the mount so we | 114 | /* BB: should we move this after the mount so we |
@@ -115,6 +121,7 @@ cifs_read_super(struct super_block *sb, void *data, | |||
115 | int len = strlen(data); | 121 | int len = strlen(data); |
116 | cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); | 122 | cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); |
117 | if (cifs_sb->mountdata == NULL) { | 123 | if (cifs_sb->mountdata == NULL) { |
124 | bdi_destroy(&cifs_sb->bdi); | ||
118 | kfree(sb->s_fs_info); | 125 | kfree(sb->s_fs_info); |
119 | sb->s_fs_info = NULL; | 126 | sb->s_fs_info = NULL; |
120 | return -ENOMEM; | 127 | return -ENOMEM; |
@@ -135,6 +142,7 @@ cifs_read_super(struct super_block *sb, void *data, | |||
135 | 142 | ||
136 | sb->s_magic = CIFS_MAGIC_NUMBER; | 143 | sb->s_magic = CIFS_MAGIC_NUMBER; |
137 | sb->s_op = &cifs_super_ops; | 144 | sb->s_op = &cifs_super_ops; |
145 | sb->s_bdi = &cifs_sb->bdi; | ||
138 | /* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) | 146 | /* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) |
139 | sb->s_blocksize = | 147 | sb->s_blocksize = |
140 | cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ | 148 | cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ |
@@ -183,6 +191,7 @@ out_mount_failed: | |||
183 | } | 191 | } |
184 | #endif | 192 | #endif |
185 | unload_nls(cifs_sb->local_nls); | 193 | unload_nls(cifs_sb->local_nls); |
194 | bdi_destroy(&cifs_sb->bdi); | ||
186 | kfree(cifs_sb); | 195 | kfree(cifs_sb); |
187 | } | 196 | } |
188 | return rc; | 197 | return rc; |
@@ -214,6 +223,7 @@ cifs_put_super(struct super_block *sb) | |||
214 | #endif | 223 | #endif |
215 | 224 | ||
216 | unload_nls(cifs_sb->local_nls); | 225 | unload_nls(cifs_sb->local_nls); |
226 | bdi_destroy(&cifs_sb->bdi); | ||
217 | kfree(cifs_sb); | 227 | kfree(cifs_sb); |
218 | 228 | ||
219 | unlock_kernel(); | 229 | unlock_kernel(); |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ecf0ffbe2b64..0c2fd17439c8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -502,6 +502,7 @@ struct dfs_info3_param { | |||
502 | #define CIFS_FATTR_DFS_REFERRAL 0x1 | 502 | #define CIFS_FATTR_DFS_REFERRAL 0x1 |
503 | #define CIFS_FATTR_DELETE_PENDING 0x2 | 503 | #define CIFS_FATTR_DELETE_PENDING 0x2 |
504 | #define CIFS_FATTR_NEED_REVAL 0x4 | 504 | #define CIFS_FATTR_NEED_REVAL 0x4 |
505 | #define CIFS_FATTR_INO_COLLISION 0x8 | ||
505 | 506 | ||
506 | struct cifs_fattr { | 507 | struct cifs_fattr { |
507 | u32 cf_flags; | 508 | u32 cf_flags; |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 35ec11716213..29b9ea244c81 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque) | |||
715 | if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) | 715 | if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) |
716 | return 0; | 716 | return 0; |
717 | 717 | ||
718 | /* | ||
719 | * uh oh -- it's a directory. We can't use it since hardlinked dirs are | ||
720 | * verboten. Disable serverino and return it as if it were found, the | ||
721 | * caller can discard it, generate a uniqueid and retry the find | ||
722 | */ | ||
723 | if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) { | ||
724 | fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; | ||
725 | cifs_autodisable_serverino(CIFS_SB(inode->i_sb)); | ||
726 | } | ||
727 | |||
718 | return 1; | 728 | return 1; |
719 | } | 729 | } |
720 | 730 | ||
@@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) | |||
734 | unsigned long hash; | 744 | unsigned long hash; |
735 | struct inode *inode; | 745 | struct inode *inode; |
736 | 746 | ||
747 | retry_iget5_locked: | ||
737 | cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); | 748 | cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); |
738 | 749 | ||
739 | /* hash down to 32-bits on 32-bit arch */ | 750 | /* hash down to 32-bits on 32-bit arch */ |
740 | hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); | 751 | hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); |
741 | 752 | ||
742 | inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); | 753 | inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); |
743 | |||
744 | /* we have fattrs in hand, update the inode */ | ||
745 | if (inode) { | 754 | if (inode) { |
755 | /* was there a problematic inode number collision? */ | ||
756 | if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) { | ||
757 | iput(inode); | ||
758 | fattr->cf_uniqueid = iunique(sb, ROOT_I); | ||
759 | fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION; | ||
760 | goto retry_iget5_locked; | ||
761 | } | ||
762 | |||
746 | cifs_fattr_to_inode(inode, fattr); | 763 | cifs_fattr_to_inode(inode, fattr); |
747 | if (sb->s_flags & MS_NOATIME) | 764 | if (sb->s_flags & MS_NOATIME) |
748 | inode->i_flags |= S_NOATIME | S_NOCMTIME; | 765 | inode->i_flags |= S_NOATIME | S_NOCMTIME; |
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index a1695dcadd99..d97f9935a028 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
@@ -167,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) | |||
167 | return -EBUSY; | 167 | return -EBUSY; |
168 | } | 168 | } |
169 | 169 | ||
170 | error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY); | ||
171 | if (error) | ||
172 | goto bdi_err; | ||
173 | |||
170 | vc->vc_sb = sb; | 174 | vc->vc_sb = sb; |
171 | 175 | ||
172 | sb->s_fs_info = vc; | 176 | sb->s_fs_info = vc; |
@@ -175,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) | |||
175 | sb->s_blocksize_bits = 12; | 179 | sb->s_blocksize_bits = 12; |
176 | sb->s_magic = CODA_SUPER_MAGIC; | 180 | sb->s_magic = CODA_SUPER_MAGIC; |
177 | sb->s_op = &coda_super_operations; | 181 | sb->s_op = &coda_super_operations; |
182 | sb->s_bdi = &vc->bdi; | ||
178 | 183 | ||
179 | /* get root fid from Venus: this needs the root inode */ | 184 | /* get root fid from Venus: this needs the root inode */ |
180 | error = venus_rootfid(sb, &fid); | 185 | error = venus_rootfid(sb, &fid); |
@@ -200,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) | |||
200 | return 0; | 205 | return 0; |
201 | 206 | ||
202 | error: | 207 | error: |
208 | bdi_destroy(&vc->bdi); | ||
209 | bdi_err: | ||
203 | if (root) | 210 | if (root) |
204 | iput(root); | 211 | iput(root); |
205 | if (vc) | 212 | if (vc) |
@@ -210,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) | |||
210 | 217 | ||
211 | static void coda_put_super(struct super_block *sb) | 218 | static void coda_put_super(struct super_block *sb) |
212 | { | 219 | { |
220 | bdi_destroy(&coda_vcp(sb)->bdi); | ||
213 | coda_vcp(sb)->vc_sb = NULL; | 221 | coda_vcp(sb)->vc_sb = NULL; |
214 | sb->s_fs_info = NULL; | 222 | sb->s_fs_info = NULL; |
215 | 223 | ||
diff --git a/fs/compat.c b/fs/compat.c index 4b6ed03cc478..05448730f840 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename, | |||
1531 | if (retval < 0) | 1531 | if (retval < 0) |
1532 | goto out; | 1532 | goto out; |
1533 | 1533 | ||
1534 | current->stack_start = current->mm->start_stack; | ||
1535 | |||
1536 | /* execve succeeded */ | 1534 | /* execve succeeded */ |
1537 | current->fs->in_exec = 0; | 1535 | current->fs->in_exec = 0; |
1538 | current->in_execve = 0; | 1536 | current->in_execve = 0; |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c32a1b6a856b..641640dc7ae5 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -102,7 +102,6 @@ | |||
102 | #include <linux/nbd.h> | 102 | #include <linux/nbd.h> |
103 | #include <linux/random.h> | 103 | #include <linux/random.h> |
104 | #include <linux/filter.h> | 104 | #include <linux/filter.h> |
105 | #include <linux/pktcdvd.h> | ||
106 | 105 | ||
107 | #include <linux/hiddev.h> | 106 | #include <linux/hiddev.h> |
108 | 107 | ||
@@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE) | |||
1126 | COMPATIBLE_IOCTL(PPGETPHASE) | 1125 | COMPATIBLE_IOCTL(PPGETPHASE) |
1127 | COMPATIBLE_IOCTL(PPGETFLAGS) | 1126 | COMPATIBLE_IOCTL(PPGETFLAGS) |
1128 | COMPATIBLE_IOCTL(PPSETFLAGS) | 1127 | COMPATIBLE_IOCTL(PPSETFLAGS) |
1129 | /* pktcdvd */ | ||
1130 | COMPATIBLE_IOCTL(PACKET_CTRL_CMD) | ||
1131 | /* Big A */ | 1128 | /* Big A */ |
1132 | /* sparc only */ | 1129 | /* sparc only */ |
1133 | /* Big Q for sound/OSS */ | 1130 | /* Big Q for sound/OSS */ |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 8e48b52205aa..0b502f80c691 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group) | |||
645 | 645 | ||
646 | configfs_detach_group(sd->s_element); | 646 | configfs_detach_group(sd->s_element); |
647 | child->d_inode->i_flags |= S_DEAD; | 647 | child->d_inode->i_flags |= S_DEAD; |
648 | dont_mount(child); | ||
648 | 649 | ||
649 | mutex_unlock(&child->d_inode->i_mutex); | 650 | mutex_unlock(&child->d_inode->i_mutex); |
650 | 651 | ||
@@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item, | |||
840 | mutex_lock(&dentry->d_inode->i_mutex); | 841 | mutex_lock(&dentry->d_inode->i_mutex); |
841 | configfs_remove_dir(item); | 842 | configfs_remove_dir(item); |
842 | dentry->d_inode->i_flags |= S_DEAD; | 843 | dentry->d_inode->i_flags |= S_DEAD; |
844 | dont_mount(dentry); | ||
843 | mutex_unlock(&dentry->d_inode->i_mutex); | 845 | mutex_unlock(&dentry->d_inode->i_mutex); |
844 | d_delete(dentry); | 846 | d_delete(dentry); |
845 | } | 847 | } |
@@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item, | |||
882 | if (ret) { | 884 | if (ret) { |
883 | configfs_detach_item(item); | 885 | configfs_detach_item(item); |
884 | dentry->d_inode->i_flags |= S_DEAD; | 886 | dentry->d_inode->i_flags |= S_DEAD; |
887 | dont_mount(dentry); | ||
885 | } | 888 | } |
886 | configfs_adjust_dir_dirent_depth_after_populate(sd); | 889 | configfs_adjust_dir_dirent_depth_after_populate(sd); |
887 | mutex_unlock(&dentry->d_inode->i_mutex); | 890 | mutex_unlock(&dentry->d_inode->i_mutex); |
@@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) | |||
1725 | mutex_unlock(&configfs_symlink_mutex); | 1728 | mutex_unlock(&configfs_symlink_mutex); |
1726 | configfs_detach_group(&group->cg_item); | 1729 | configfs_detach_group(&group->cg_item); |
1727 | dentry->d_inode->i_flags |= S_DEAD; | 1730 | dentry->d_inode->i_flags |= S_DEAD; |
1731 | dont_mount(dentry); | ||
1728 | mutex_unlock(&dentry->d_inode->i_mutex); | 1732 | mutex_unlock(&dentry->d_inode->i_mutex); |
1729 | 1733 | ||
1730 | d_delete(dentry); | 1734 | d_delete(dentry); |
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index efb2b9400391..1cc087635a5e 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -382,8 +382,8 @@ out: | |||
382 | static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, | 382 | static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, |
383 | struct ecryptfs_crypt_stat *crypt_stat) | 383 | struct ecryptfs_crypt_stat *crypt_stat) |
384 | { | 384 | { |
385 | (*offset) = (crypt_stat->num_header_bytes_at_front | 385 | (*offset) = ecryptfs_lower_header_size(crypt_stat) |
386 | + (crypt_stat->extent_size * extent_num)); | 386 | + (crypt_stat->extent_size * extent_num); |
387 | } | 387 | } |
388 | 388 | ||
389 | /** | 389 | /** |
@@ -835,13 +835,13 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat) | |||
835 | set_extent_mask_and_shift(crypt_stat); | 835 | set_extent_mask_and_shift(crypt_stat); |
836 | crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; | 836 | crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; |
837 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) | 837 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) |
838 | crypt_stat->num_header_bytes_at_front = 0; | 838 | crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; |
839 | else { | 839 | else { |
840 | if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) | 840 | if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) |
841 | crypt_stat->num_header_bytes_at_front = | 841 | crypt_stat->metadata_size = |
842 | ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; | 842 | ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; |
843 | else | 843 | else |
844 | crypt_stat->num_header_bytes_at_front = PAGE_CACHE_SIZE; | 844 | crypt_stat->metadata_size = PAGE_CACHE_SIZE; |
845 | } | 845 | } |
846 | } | 846 | } |
847 | 847 | ||
@@ -1108,9 +1108,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written) | |||
1108 | (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; | 1108 | (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; |
1109 | } | 1109 | } |
1110 | 1110 | ||
1111 | static void | 1111 | void ecryptfs_write_crypt_stat_flags(char *page_virt, |
1112 | write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, | 1112 | struct ecryptfs_crypt_stat *crypt_stat, |
1113 | size_t *written) | 1113 | size_t *written) |
1114 | { | 1114 | { |
1115 | u32 flags = 0; | 1115 | u32 flags = 0; |
1116 | int i; | 1116 | int i; |
@@ -1238,8 +1238,7 @@ ecryptfs_write_header_metadata(char *virt, | |||
1238 | 1238 | ||
1239 | header_extent_size = (u32)crypt_stat->extent_size; | 1239 | header_extent_size = (u32)crypt_stat->extent_size; |
1240 | num_header_extents_at_front = | 1240 | num_header_extents_at_front = |
1241 | (u16)(crypt_stat->num_header_bytes_at_front | 1241 | (u16)(crypt_stat->metadata_size / crypt_stat->extent_size); |
1242 | / crypt_stat->extent_size); | ||
1243 | put_unaligned_be32(header_extent_size, virt); | 1242 | put_unaligned_be32(header_extent_size, virt); |
1244 | virt += 4; | 1243 | virt += 4; |
1245 | put_unaligned_be16(num_header_extents_at_front, virt); | 1244 | put_unaligned_be16(num_header_extents_at_front, virt); |
@@ -1292,7 +1291,8 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max, | |||
1292 | offset = ECRYPTFS_FILE_SIZE_BYTES; | 1291 | offset = ECRYPTFS_FILE_SIZE_BYTES; |
1293 | write_ecryptfs_marker((page_virt + offset), &written); | 1292 | write_ecryptfs_marker((page_virt + offset), &written); |
1294 | offset += written; | 1293 | offset += written; |
1295 | write_ecryptfs_flags((page_virt + offset), crypt_stat, &written); | 1294 | ecryptfs_write_crypt_stat_flags((page_virt + offset), crypt_stat, |
1295 | &written); | ||
1296 | offset += written; | 1296 | offset += written; |
1297 | ecryptfs_write_header_metadata((page_virt + offset), crypt_stat, | 1297 | ecryptfs_write_header_metadata((page_virt + offset), crypt_stat, |
1298 | &written); | 1298 | &written); |
@@ -1382,7 +1382,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) | |||
1382 | rc = -EINVAL; | 1382 | rc = -EINVAL; |
1383 | goto out; | 1383 | goto out; |
1384 | } | 1384 | } |
1385 | virt_len = crypt_stat->num_header_bytes_at_front; | 1385 | virt_len = crypt_stat->metadata_size; |
1386 | order = get_order(virt_len); | 1386 | order = get_order(virt_len); |
1387 | /* Released in this function */ | 1387 | /* Released in this function */ |
1388 | virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order); | 1388 | virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order); |
@@ -1428,16 +1428,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, | |||
1428 | header_extent_size = get_unaligned_be32(virt); | 1428 | header_extent_size = get_unaligned_be32(virt); |
1429 | virt += sizeof(__be32); | 1429 | virt += sizeof(__be32); |
1430 | num_header_extents_at_front = get_unaligned_be16(virt); | 1430 | num_header_extents_at_front = get_unaligned_be16(virt); |
1431 | crypt_stat->num_header_bytes_at_front = | 1431 | crypt_stat->metadata_size = (((size_t)num_header_extents_at_front |
1432 | (((size_t)num_header_extents_at_front | 1432 | * (size_t)header_extent_size)); |
1433 | * (size_t)header_extent_size)); | ||
1434 | (*bytes_read) = (sizeof(__be32) + sizeof(__be16)); | 1433 | (*bytes_read) = (sizeof(__be32) + sizeof(__be16)); |
1435 | if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) | 1434 | if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) |
1436 | && (crypt_stat->num_header_bytes_at_front | 1435 | && (crypt_stat->metadata_size |
1437 | < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { | 1436 | < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { |
1438 | rc = -EINVAL; | 1437 | rc = -EINVAL; |
1439 | printk(KERN_WARNING "Invalid header size: [%zd]\n", | 1438 | printk(KERN_WARNING "Invalid header size: [%zd]\n", |
1440 | crypt_stat->num_header_bytes_at_front); | 1439 | crypt_stat->metadata_size); |
1441 | } | 1440 | } |
1442 | return rc; | 1441 | return rc; |
1443 | } | 1442 | } |
@@ -1452,8 +1451,7 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, | |||
1452 | */ | 1451 | */ |
1453 | static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) | 1452 | static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) |
1454 | { | 1453 | { |
1455 | crypt_stat->num_header_bytes_at_front = | 1454 | crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; |
1456 | ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; | ||
1457 | } | 1455 | } |
1458 | 1456 | ||
1459 | /** | 1457 | /** |
@@ -1607,6 +1605,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) | |||
1607 | ecryptfs_dentry, | 1605 | ecryptfs_dentry, |
1608 | ECRYPTFS_VALIDATE_HEADER_SIZE); | 1606 | ECRYPTFS_VALIDATE_HEADER_SIZE); |
1609 | if (rc) { | 1607 | if (rc) { |
1608 | memset(page_virt, 0, PAGE_CACHE_SIZE); | ||
1610 | rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); | 1609 | rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); |
1611 | if (rc) { | 1610 | if (rc) { |
1612 | printk(KERN_DEBUG "Valid eCryptfs headers not found in " | 1611 | printk(KERN_DEBUG "Valid eCryptfs headers not found in " |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 542f625312f3..bfc2e0f78f00 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/scatterlist.h> | 35 | #include <linux/scatterlist.h> |
36 | #include <linux/hash.h> | 36 | #include <linux/hash.h> |
37 | #include <linux/nsproxy.h> | 37 | #include <linux/nsproxy.h> |
38 | #include <linux/backing-dev.h> | ||
38 | 39 | ||
39 | /* Version verification for shared data structures w/ userspace */ | 40 | /* Version verification for shared data structures w/ userspace */ |
40 | #define ECRYPTFS_VERSION_MAJOR 0x00 | 41 | #define ECRYPTFS_VERSION_MAJOR 0x00 |
@@ -273,7 +274,7 @@ struct ecryptfs_crypt_stat { | |||
273 | u32 flags; | 274 | u32 flags; |
274 | unsigned int file_version; | 275 | unsigned int file_version; |
275 | size_t iv_bytes; | 276 | size_t iv_bytes; |
276 | size_t num_header_bytes_at_front; | 277 | size_t metadata_size; |
277 | size_t extent_size; /* Data extent size; default is 4096 */ | 278 | size_t extent_size; /* Data extent size; default is 4096 */ |
278 | size_t key_size; | 279 | size_t key_size; |
279 | size_t extent_shift; | 280 | size_t extent_shift; |
@@ -393,6 +394,7 @@ struct ecryptfs_mount_crypt_stat { | |||
393 | struct ecryptfs_sb_info { | 394 | struct ecryptfs_sb_info { |
394 | struct super_block *wsi_sb; | 395 | struct super_block *wsi_sb; |
395 | struct ecryptfs_mount_crypt_stat mount_crypt_stat; | 396 | struct ecryptfs_mount_crypt_stat mount_crypt_stat; |
397 | struct backing_dev_info bdi; | ||
396 | }; | 398 | }; |
397 | 399 | ||
398 | /* file private data. */ | 400 | /* file private data. */ |
@@ -464,6 +466,14 @@ struct ecryptfs_daemon { | |||
464 | 466 | ||
465 | extern struct mutex ecryptfs_daemon_hash_mux; | 467 | extern struct mutex ecryptfs_daemon_hash_mux; |
466 | 468 | ||
469 | static inline size_t | ||
470 | ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat) | ||
471 | { | ||
472 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) | ||
473 | return 0; | ||
474 | return crypt_stat->metadata_size; | ||
475 | } | ||
476 | |||
467 | static inline struct ecryptfs_file_info * | 477 | static inline struct ecryptfs_file_info * |
468 | ecryptfs_file_to_private(struct file *file) | 478 | ecryptfs_file_to_private(struct file *file) |
469 | { | 479 | { |
@@ -651,6 +661,9 @@ int ecryptfs_decrypt_page(struct page *page); | |||
651 | int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry); | 661 | int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry); |
652 | int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry); | 662 | int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry); |
653 | int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); | 663 | int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); |
664 | void ecryptfs_write_crypt_stat_flags(char *page_virt, | ||
665 | struct ecryptfs_crypt_stat *crypt_stat, | ||
666 | size_t *written); | ||
654 | int ecryptfs_read_and_validate_header_region(char *data, | 667 | int ecryptfs_read_and_validate_header_region(char *data, |
655 | struct inode *ecryptfs_inode); | 668 | struct inode *ecryptfs_inode); |
656 | int ecryptfs_read_and_validate_xattr_region(char *page_virt, | 669 | int ecryptfs_read_and_validate_xattr_region(char *page_virt, |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index d3362faf3852..e2d4418affac 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -324,6 +324,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, | |||
324 | rc = ecryptfs_read_and_validate_header_region(page_virt, | 324 | rc = ecryptfs_read_and_validate_header_region(page_virt, |
325 | ecryptfs_dentry->d_inode); | 325 | ecryptfs_dentry->d_inode); |
326 | if (rc) { | 326 | if (rc) { |
327 | memset(page_virt, 0, PAGE_CACHE_SIZE); | ||
327 | rc = ecryptfs_read_and_validate_xattr_region(page_virt, | 328 | rc = ecryptfs_read_and_validate_xattr_region(page_virt, |
328 | ecryptfs_dentry); | 329 | ecryptfs_dentry); |
329 | if (rc) { | 330 | if (rc) { |
@@ -336,7 +337,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, | |||
336 | ecryptfs_dentry->d_sb)->mount_crypt_stat; | 337 | ecryptfs_dentry->d_sb)->mount_crypt_stat; |
337 | if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { | 338 | if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { |
338 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) | 339 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) |
339 | file_size = (crypt_stat->num_header_bytes_at_front | 340 | file_size = (crypt_stat->metadata_size |
340 | + i_size_read(lower_dentry->d_inode)); | 341 | + i_size_read(lower_dentry->d_inode)); |
341 | else | 342 | else |
342 | file_size = i_size_read(lower_dentry->d_inode); | 343 | file_size = i_size_read(lower_dentry->d_inode); |
@@ -388,9 +389,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
388 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); | 389 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); |
389 | if (IS_ERR(lower_dentry)) { | 390 | if (IS_ERR(lower_dentry)) { |
390 | rc = PTR_ERR(lower_dentry); | 391 | rc = PTR_ERR(lower_dentry); |
391 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " | 392 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " |
392 | "lower_dentry = [%s]\n", __func__, rc, | 393 | "[%d] on lower_dentry = [%s]\n", __func__, rc, |
393 | ecryptfs_dentry->d_name.name); | 394 | encrypted_and_encoded_name); |
394 | goto out_d_drop; | 395 | goto out_d_drop; |
395 | } | 396 | } |
396 | if (lower_dentry->d_inode) | 397 | if (lower_dentry->d_inode) |
@@ -417,9 +418,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
417 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); | 418 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); |
418 | if (IS_ERR(lower_dentry)) { | 419 | if (IS_ERR(lower_dentry)) { |
419 | rc = PTR_ERR(lower_dentry); | 420 | rc = PTR_ERR(lower_dentry); |
420 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " | 421 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " |
421 | "lower_dentry = [%s]\n", __func__, rc, | 422 | "[%d] on lower_dentry = [%s]\n", __func__, rc, |
422 | encrypted_and_encoded_name); | 423 | encrypted_and_encoded_name); |
423 | goto out_d_drop; | 424 | goto out_d_drop; |
424 | } | 425 | } |
425 | lookup_and_interpose: | 426 | lookup_and_interpose: |
@@ -456,8 +457,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, | |||
456 | rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); | 457 | rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); |
457 | if (rc) | 458 | if (rc) |
458 | goto out_lock; | 459 | goto out_lock; |
459 | fsstack_copy_attr_times(dir, lower_new_dentry->d_inode); | 460 | fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); |
460 | fsstack_copy_inode_size(dir, lower_new_dentry->d_inode); | 461 | fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); |
461 | old_dentry->d_inode->i_nlink = | 462 | old_dentry->d_inode->i_nlink = |
462 | ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; | 463 | ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; |
463 | i_size_write(new_dentry->d_inode, file_size_save); | 464 | i_size_write(new_dentry->d_inode, file_size_save); |
@@ -648,38 +649,17 @@ out_lock: | |||
648 | return rc; | 649 | return rc; |
649 | } | 650 | } |
650 | 651 | ||
651 | static int | 652 | static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, |
652 | ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | 653 | size_t *bufsiz) |
653 | { | 654 | { |
655 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
654 | char *lower_buf; | 656 | char *lower_buf; |
655 | size_t lower_bufsiz; | 657 | size_t lower_bufsiz = PATH_MAX; |
656 | struct dentry *lower_dentry; | ||
657 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; | ||
658 | char *plaintext_name; | ||
659 | size_t plaintext_name_size; | ||
660 | mm_segment_t old_fs; | 658 | mm_segment_t old_fs; |
661 | int rc; | 659 | int rc; |
662 | 660 | ||
663 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
664 | if (!lower_dentry->d_inode->i_op->readlink) { | ||
665 | rc = -EINVAL; | ||
666 | goto out; | ||
667 | } | ||
668 | mount_crypt_stat = &ecryptfs_superblock_to_private( | ||
669 | dentry->d_sb)->mount_crypt_stat; | ||
670 | /* | ||
671 | * If the lower filename is encrypted, it will result in a significantly | ||
672 | * longer name. If needed, truncate the name after decode and decrypt. | ||
673 | */ | ||
674 | if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) | ||
675 | lower_bufsiz = PATH_MAX; | ||
676 | else | ||
677 | lower_bufsiz = bufsiz; | ||
678 | /* Released in this function */ | ||
679 | lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); | 661 | lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); |
680 | if (lower_buf == NULL) { | 662 | if (!lower_buf) { |
681 | printk(KERN_ERR "%s: Out of memory whilst attempting to " | ||
682 | "kmalloc [%zd] bytes\n", __func__, lower_bufsiz); | ||
683 | rc = -ENOMEM; | 663 | rc = -ENOMEM; |
684 | goto out; | 664 | goto out; |
685 | } | 665 | } |
@@ -689,29 +669,31 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | |||
689 | (char __user *)lower_buf, | 669 | (char __user *)lower_buf, |
690 | lower_bufsiz); | 670 | lower_bufsiz); |
691 | set_fs(old_fs); | 671 | set_fs(old_fs); |
692 | if (rc >= 0) { | 672 | if (rc < 0) |
693 | rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, | 673 | goto out; |
694 | &plaintext_name_size, | 674 | lower_bufsiz = rc; |
695 | dentry, lower_buf, | 675 | rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, |
696 | rc); | 676 | lower_buf, lower_bufsiz); |
697 | if (rc) { | 677 | out: |
698 | printk(KERN_ERR "%s: Error attempting to decode and " | ||
699 | "decrypt filename; rc = [%d]\n", __func__, | ||
700 | rc); | ||
701 | goto out_free_lower_buf; | ||
702 | } | ||
703 | /* Check for bufsiz <= 0 done in sys_readlinkat() */ | ||
704 | rc = copy_to_user(buf, plaintext_name, | ||
705 | min((size_t) bufsiz, plaintext_name_size)); | ||
706 | if (rc) | ||
707 | rc = -EFAULT; | ||
708 | else | ||
709 | rc = plaintext_name_size; | ||
710 | kfree(plaintext_name); | ||
711 | fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode); | ||
712 | } | ||
713 | out_free_lower_buf: | ||
714 | kfree(lower_buf); | 678 | kfree(lower_buf); |
679 | return rc; | ||
680 | } | ||
681 | |||
682 | static int | ||
683 | ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | ||
684 | { | ||
685 | char *kbuf; | ||
686 | size_t kbufsiz, copied; | ||
687 | int rc; | ||
688 | |||
689 | rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz); | ||
690 | if (rc) | ||
691 | goto out; | ||
692 | copied = min_t(size_t, bufsiz, kbufsiz); | ||
693 | rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied; | ||
694 | kfree(kbuf); | ||
695 | fsstack_copy_attr_atime(dentry->d_inode, | ||
696 | ecryptfs_dentry_to_lower(dentry)->d_inode); | ||
715 | out: | 697 | out: |
716 | return rc; | 698 | return rc; |
717 | } | 699 | } |
@@ -769,7 +751,7 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat, | |||
769 | { | 751 | { |
770 | loff_t lower_size; | 752 | loff_t lower_size; |
771 | 753 | ||
772 | lower_size = crypt_stat->num_header_bytes_at_front; | 754 | lower_size = ecryptfs_lower_header_size(crypt_stat); |
773 | if (upper_size != 0) { | 755 | if (upper_size != 0) { |
774 | loff_t num_extents; | 756 | loff_t num_extents; |
775 | 757 | ||
@@ -1016,6 +998,28 @@ out: | |||
1016 | return rc; | 998 | return rc; |
1017 | } | 999 | } |
1018 | 1000 | ||
1001 | int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, | ||
1002 | struct kstat *stat) | ||
1003 | { | ||
1004 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; | ||
1005 | int rc = 0; | ||
1006 | |||
1007 | mount_crypt_stat = &ecryptfs_superblock_to_private( | ||
1008 | dentry->d_sb)->mount_crypt_stat; | ||
1009 | generic_fillattr(dentry->d_inode, stat); | ||
1010 | if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) { | ||
1011 | char *target; | ||
1012 | size_t targetsiz; | ||
1013 | |||
1014 | rc = ecryptfs_readlink_lower(dentry, &target, &targetsiz); | ||
1015 | if (!rc) { | ||
1016 | kfree(target); | ||
1017 | stat->size = targetsiz; | ||
1018 | } | ||
1019 | } | ||
1020 | return rc; | ||
1021 | } | ||
1022 | |||
1019 | int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1023 | int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1020 | struct kstat *stat) | 1024 | struct kstat *stat) |
1021 | { | 1025 | { |
@@ -1040,7 +1044,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
1040 | 1044 | ||
1041 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | 1045 | lower_dentry = ecryptfs_dentry_to_lower(dentry); |
1042 | if (!lower_dentry->d_inode->i_op->setxattr) { | 1046 | if (!lower_dentry->d_inode->i_op->setxattr) { |
1043 | rc = -ENOSYS; | 1047 | rc = -EOPNOTSUPP; |
1044 | goto out; | 1048 | goto out; |
1045 | } | 1049 | } |
1046 | mutex_lock(&lower_dentry->d_inode->i_mutex); | 1050 | mutex_lock(&lower_dentry->d_inode->i_mutex); |
@@ -1058,7 +1062,7 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, | |||
1058 | int rc = 0; | 1062 | int rc = 0; |
1059 | 1063 | ||
1060 | if (!lower_dentry->d_inode->i_op->getxattr) { | 1064 | if (!lower_dentry->d_inode->i_op->getxattr) { |
1061 | rc = -ENOSYS; | 1065 | rc = -EOPNOTSUPP; |
1062 | goto out; | 1066 | goto out; |
1063 | } | 1067 | } |
1064 | mutex_lock(&lower_dentry->d_inode->i_mutex); | 1068 | mutex_lock(&lower_dentry->d_inode->i_mutex); |
@@ -1085,7 +1089,7 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size) | |||
1085 | 1089 | ||
1086 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | 1090 | lower_dentry = ecryptfs_dentry_to_lower(dentry); |
1087 | if (!lower_dentry->d_inode->i_op->listxattr) { | 1091 | if (!lower_dentry->d_inode->i_op->listxattr) { |
1088 | rc = -ENOSYS; | 1092 | rc = -EOPNOTSUPP; |
1089 | goto out; | 1093 | goto out; |
1090 | } | 1094 | } |
1091 | mutex_lock(&lower_dentry->d_inode->i_mutex); | 1095 | mutex_lock(&lower_dentry->d_inode->i_mutex); |
@@ -1102,7 +1106,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name) | |||
1102 | 1106 | ||
1103 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | 1107 | lower_dentry = ecryptfs_dentry_to_lower(dentry); |
1104 | if (!lower_dentry->d_inode->i_op->removexattr) { | 1108 | if (!lower_dentry->d_inode->i_op->removexattr) { |
1105 | rc = -ENOSYS; | 1109 | rc = -EOPNOTSUPP; |
1106 | goto out; | 1110 | goto out; |
1107 | } | 1111 | } |
1108 | mutex_lock(&lower_dentry->d_inode->i_mutex); | 1112 | mutex_lock(&lower_dentry->d_inode->i_mutex); |
@@ -1133,6 +1137,7 @@ const struct inode_operations ecryptfs_symlink_iops = { | |||
1133 | .put_link = ecryptfs_put_link, | 1137 | .put_link = ecryptfs_put_link, |
1134 | .permission = ecryptfs_permission, | 1138 | .permission = ecryptfs_permission, |
1135 | .setattr = ecryptfs_setattr, | 1139 | .setattr = ecryptfs_setattr, |
1140 | .getattr = ecryptfs_getattr_link, | ||
1136 | .setxattr = ecryptfs_setxattr, | 1141 | .setxattr = ecryptfs_setxattr, |
1137 | .getxattr = ecryptfs_getxattr, | 1142 | .getxattr = ecryptfs_getxattr, |
1138 | .listxattr = ecryptfs_listxattr, | 1143 | .listxattr = ecryptfs_listxattr, |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index af1a8f01ebac..760983d0f25e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -497,17 +497,25 @@ struct kmem_cache *ecryptfs_sb_info_cache; | |||
497 | static int | 497 | static int |
498 | ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) | 498 | ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) |
499 | { | 499 | { |
500 | struct ecryptfs_sb_info *esi; | ||
500 | int rc = 0; | 501 | int rc = 0; |
501 | 502 | ||
502 | /* Released in ecryptfs_put_super() */ | 503 | /* Released in ecryptfs_put_super() */ |
503 | ecryptfs_set_superblock_private(sb, | 504 | ecryptfs_set_superblock_private(sb, |
504 | kmem_cache_zalloc(ecryptfs_sb_info_cache, | 505 | kmem_cache_zalloc(ecryptfs_sb_info_cache, |
505 | GFP_KERNEL)); | 506 | GFP_KERNEL)); |
506 | if (!ecryptfs_superblock_to_private(sb)) { | 507 | esi = ecryptfs_superblock_to_private(sb); |
508 | if (!esi) { | ||
507 | ecryptfs_printk(KERN_WARNING, "Out of memory\n"); | 509 | ecryptfs_printk(KERN_WARNING, "Out of memory\n"); |
508 | rc = -ENOMEM; | 510 | rc = -ENOMEM; |
509 | goto out; | 511 | goto out; |
510 | } | 512 | } |
513 | |||
514 | rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); | ||
515 | if (rc) | ||
516 | goto out; | ||
517 | |||
518 | sb->s_bdi = &esi->bdi; | ||
511 | sb->s_op = &ecryptfs_sops; | 519 | sb->s_op = &ecryptfs_sops; |
512 | /* Released through deactivate_super(sb) from get_sb_nodev */ | 520 | /* Released through deactivate_super(sb) from get_sb_nodev */ |
513 | sb->s_root = d_alloc(NULL, &(const struct qstr) { | 521 | sb->s_root = d_alloc(NULL, &(const struct qstr) { |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index d491237c98e7..2ee9a3a7b68c 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -83,6 +83,19 @@ out: | |||
83 | return rc; | 83 | return rc; |
84 | } | 84 | } |
85 | 85 | ||
86 | static void strip_xattr_flag(char *page_virt, | ||
87 | struct ecryptfs_crypt_stat *crypt_stat) | ||
88 | { | ||
89 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) { | ||
90 | size_t written; | ||
91 | |||
92 | crypt_stat->flags &= ~ECRYPTFS_METADATA_IN_XATTR; | ||
93 | ecryptfs_write_crypt_stat_flags(page_virt, crypt_stat, | ||
94 | &written); | ||
95 | crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; | ||
96 | } | ||
97 | } | ||
98 | |||
86 | /** | 99 | /** |
87 | * Header Extent: | 100 | * Header Extent: |
88 | * Octets 0-7: Unencrypted file size (big-endian) | 101 | * Octets 0-7: Unencrypted file size (big-endian) |
@@ -98,19 +111,6 @@ out: | |||
98 | * (big-endian) | 111 | * (big-endian) |
99 | * Octet 26: Begin RFC 2440 authentication token packet set | 112 | * Octet 26: Begin RFC 2440 authentication token packet set |
100 | */ | 113 | */ |
101 | static void set_header_info(char *page_virt, | ||
102 | struct ecryptfs_crypt_stat *crypt_stat) | ||
103 | { | ||
104 | size_t written; | ||
105 | size_t save_num_header_bytes_at_front = | ||
106 | crypt_stat->num_header_bytes_at_front; | ||
107 | |||
108 | crypt_stat->num_header_bytes_at_front = | ||
109 | ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; | ||
110 | ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written); | ||
111 | crypt_stat->num_header_bytes_at_front = | ||
112 | save_num_header_bytes_at_front; | ||
113 | } | ||
114 | 114 | ||
115 | /** | 115 | /** |
116 | * ecryptfs_copy_up_encrypted_with_header | 116 | * ecryptfs_copy_up_encrypted_with_header |
@@ -136,8 +136,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, | |||
136 | * num_extents_per_page) | 136 | * num_extents_per_page) |
137 | + extent_num_in_page); | 137 | + extent_num_in_page); |
138 | size_t num_header_extents_at_front = | 138 | size_t num_header_extents_at_front = |
139 | (crypt_stat->num_header_bytes_at_front | 139 | (crypt_stat->metadata_size / crypt_stat->extent_size); |
140 | / crypt_stat->extent_size); | ||
141 | 140 | ||
142 | if (view_extent_num < num_header_extents_at_front) { | 141 | if (view_extent_num < num_header_extents_at_front) { |
143 | /* This is a header extent */ | 142 | /* This is a header extent */ |
@@ -147,9 +146,14 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, | |||
147 | memset(page_virt, 0, PAGE_CACHE_SIZE); | 146 | memset(page_virt, 0, PAGE_CACHE_SIZE); |
148 | /* TODO: Support more than one header extent */ | 147 | /* TODO: Support more than one header extent */ |
149 | if (view_extent_num == 0) { | 148 | if (view_extent_num == 0) { |
149 | size_t written; | ||
150 | |||
150 | rc = ecryptfs_read_xattr_region( | 151 | rc = ecryptfs_read_xattr_region( |
151 | page_virt, page->mapping->host); | 152 | page_virt, page->mapping->host); |
152 | set_header_info(page_virt, crypt_stat); | 153 | strip_xattr_flag(page_virt + 16, crypt_stat); |
154 | ecryptfs_write_header_metadata(page_virt + 20, | ||
155 | crypt_stat, | ||
156 | &written); | ||
153 | } | 157 | } |
154 | kunmap_atomic(page_virt, KM_USER0); | 158 | kunmap_atomic(page_virt, KM_USER0); |
155 | flush_dcache_page(page); | 159 | flush_dcache_page(page); |
@@ -162,7 +166,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, | |||
162 | /* This is an encrypted data extent */ | 166 | /* This is an encrypted data extent */ |
163 | loff_t lower_offset = | 167 | loff_t lower_offset = |
164 | ((view_extent_num * crypt_stat->extent_size) | 168 | ((view_extent_num * crypt_stat->extent_size) |
165 | - crypt_stat->num_header_bytes_at_front); | 169 | - crypt_stat->metadata_size); |
166 | 170 | ||
167 | rc = ecryptfs_read_lower_page_segment( | 171 | rc = ecryptfs_read_lower_page_segment( |
168 | page, (lower_offset >> PAGE_CACHE_SHIFT), | 172 | page, (lower_offset >> PAGE_CACHE_SHIFT), |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index fcef41c1d2cf..0c0ae491d231 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -86,7 +86,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) | |||
86 | if (lower_dentry->d_inode) { | 86 | if (lower_dentry->d_inode) { |
87 | fput(inode_info->lower_file); | 87 | fput(inode_info->lower_file); |
88 | inode_info->lower_file = NULL; | 88 | inode_info->lower_file = NULL; |
89 | d_drop(lower_dentry); | ||
90 | } | 89 | } |
91 | } | 90 | } |
92 | ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); | 91 | ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); |
@@ -123,6 +122,7 @@ static void ecryptfs_put_super(struct super_block *sb) | |||
123 | lock_kernel(); | 122 | lock_kernel(); |
124 | 123 | ||
125 | ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); | 124 | ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); |
125 | bdi_destroy(&sb_info->bdi); | ||
126 | kmem_cache_free(ecryptfs_sb_info_cache, sb_info); | 126 | kmem_cache_free(ecryptfs_sb_info_cache, sb_info); |
127 | ecryptfs_set_superblock_private(sb, NULL); | 127 | ecryptfs_set_superblock_private(sb, NULL); |
128 | 128 | ||
@@ -1387,8 +1387,6 @@ int do_execve(char * filename, | |||
1387 | if (retval < 0) | 1387 | if (retval < 0) |
1388 | goto out; | 1388 | goto out; |
1389 | 1389 | ||
1390 | current->stack_start = current->mm->start_stack; | ||
1391 | |||
1392 | /* execve succeeded */ | 1390 | /* execve succeeded */ |
1393 | current->fs->in_exec = 0; | 1391 | current->fs->in_exec = 0; |
1394 | current->in_execve = 0; | 1392 | current->in_execve = 0; |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 8442e353309f..22721b2fd890 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -35,6 +35,7 @@ | |||
35 | 35 | ||
36 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
37 | #include <linux/time.h> | 37 | #include <linux/time.h> |
38 | #include <linux/backing-dev.h> | ||
38 | #include "common.h" | 39 | #include "common.h" |
39 | 40 | ||
40 | /* FIXME: Remove once pnfs hits mainline | 41 | /* FIXME: Remove once pnfs hits mainline |
@@ -84,6 +85,7 @@ struct exofs_sb_info { | |||
84 | u32 s_next_generation; /* next gen # to use */ | 85 | u32 s_next_generation; /* next gen # to use */ |
85 | atomic_t s_curr_pending; /* number of pending commands */ | 86 | atomic_t s_curr_pending; /* number of pending commands */ |
86 | uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ | 87 | uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ |
88 | struct backing_dev_info bdi; /* register our bdi with VFS */ | ||
87 | 89 | ||
88 | struct pnfs_osd_data_map data_map; /* Default raid to use | 90 | struct pnfs_osd_data_map data_map; /* Default raid to use |
89 | * FIXME: Needed ? | 91 | * FIXME: Needed ? |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 18e57ea1e5b4..03149b9a5178 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -302,6 +302,7 @@ static void exofs_put_super(struct super_block *sb) | |||
302 | _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], | 302 | _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], |
303 | sbi->layout.s_pid); | 303 | sbi->layout.s_pid); |
304 | 304 | ||
305 | bdi_destroy(&sbi->bdi); | ||
305 | exofs_free_sbi(sbi); | 306 | exofs_free_sbi(sbi); |
306 | sb->s_fs_info = NULL; | 307 | sb->s_fs_info = NULL; |
307 | } | 308 | } |
@@ -546,6 +547,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
546 | if (!sbi) | 547 | if (!sbi) |
547 | return -ENOMEM; | 548 | return -ENOMEM; |
548 | 549 | ||
550 | ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); | ||
551 | if (ret) | ||
552 | goto free_bdi; | ||
553 | |||
549 | /* use mount options to fill superblock */ | 554 | /* use mount options to fill superblock */ |
550 | od = osduld_path_lookup(opts->dev_name); | 555 | od = osduld_path_lookup(opts->dev_name); |
551 | if (IS_ERR(od)) { | 556 | if (IS_ERR(od)) { |
@@ -612,6 +617,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
612 | } | 617 | } |
613 | 618 | ||
614 | /* set up operation vectors */ | 619 | /* set up operation vectors */ |
620 | sb->s_bdi = &sbi->bdi; | ||
615 | sb->s_fs_info = sbi; | 621 | sb->s_fs_info = sbi; |
616 | sb->s_op = &exofs_sops; | 622 | sb->s_op = &exofs_sops; |
617 | sb->s_export_op = &exofs_export_ops; | 623 | sb->s_export_op = &exofs_export_ops; |
@@ -643,6 +649,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
643 | return 0; | 649 | return 0; |
644 | 650 | ||
645 | free_sbi: | 651 | free_sbi: |
652 | bdi_destroy(&sbi->bdi); | ||
653 | free_bdi: | ||
646 | EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", | 654 | EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", |
647 | opts->dev_name, sbi->layout.s_pid, ret); | 655 | opts->dev_name, sbi->layout.s_pid, ret); |
648 | exofs_free_sbi(sbi); | 656 | exofs_free_sbi(sbi); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 94c8ee81f5e1..236b834b4ca8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
3879 | physical += offset; | 3879 | physical += offset; |
3880 | length = EXT4_SB(inode->i_sb)->s_inode_size - offset; | 3880 | length = EXT4_SB(inode->i_sb)->s_inode_size - offset; |
3881 | flags |= FIEMAP_EXTENT_DATA_INLINE; | 3881 | flags |= FIEMAP_EXTENT_DATA_INLINE; |
3882 | brelse(iloc.bh); | ||
3882 | } else { /* external block */ | 3883 | } else { /* external block */ |
3883 | physical = EXT4_I(inode)->i_file_acl << blockbits; | 3884 | physical = EXT4_I(inode)->i_file_acl << blockbits; |
3884 | length = inode->i_sb->s_blocksize; | 3885 | length = inode->i_sb->s_blocksize; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5381802d6052..81d605412844 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -5375,7 +5375,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
5375 | } else { | 5375 | } else { |
5376 | struct ext4_iloc iloc; | 5376 | struct ext4_iloc iloc; |
5377 | 5377 | ||
5378 | err = ext4_get_inode_loc(inode, &iloc); | 5378 | err = __ext4_get_inode_loc(inode, &iloc, 0); |
5379 | if (err) | 5379 | if (err) |
5380 | return err; | 5380 | return err; |
5381 | if (wbc->sync_mode == WB_SYNC_ALL) | 5381 | if (wbc->sync_mode == WB_SYNC_ALL) |
@@ -5386,6 +5386,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
5386 | (unsigned long long)iloc.bh->b_blocknr); | 5386 | (unsigned long long)iloc.bh->b_blocknr); |
5387 | err = -EIO; | 5387 | err = -EIO; |
5388 | } | 5388 | } |
5389 | brelse(iloc.bh); | ||
5389 | } | 5390 | } |
5390 | return err; | 5391 | return err; |
5391 | } | 5392 | } |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bde9d0b170c2..b423a364dca3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2535,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2535 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", | 2535 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2536 | entry->count, entry->group, entry); | 2536 | entry->count, entry->group, entry); |
2537 | 2537 | ||
2538 | if (test_opt(sb, DISCARD)) { | ||
2539 | ext4_fsblk_t discard_block; | ||
2540 | |||
2541 | discard_block = entry->start_blk + | ||
2542 | ext4_group_first_block_no(sb, entry->group); | ||
2543 | trace_ext4_discard_blocks(sb, | ||
2544 | (unsigned long long)discard_block, | ||
2545 | entry->count); | ||
2546 | sb_issue_discard(sb, discard_block, entry->count); | ||
2547 | } | ||
2548 | |||
2538 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2549 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2539 | /* we expect to find existing buddy because it's pinned */ | 2550 | /* we expect to find existing buddy because it's pinned */ |
2540 | BUG_ON(err != 0); | 2551 | BUG_ON(err != 0); |
@@ -2556,16 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2556 | page_cache_release(e4b.bd_bitmap_page); | 2567 | page_cache_release(e4b.bd_bitmap_page); |
2557 | } | 2568 | } |
2558 | ext4_unlock_group(sb, entry->group); | 2569 | ext4_unlock_group(sb, entry->group); |
2559 | if (test_opt(sb, DISCARD)) { | ||
2560 | ext4_fsblk_t discard_block; | ||
2561 | |||
2562 | discard_block = entry->start_blk + | ||
2563 | ext4_group_first_block_no(sb, entry->group); | ||
2564 | trace_ext4_discard_blocks(sb, | ||
2565 | (unsigned long long)discard_block, | ||
2566 | entry->count); | ||
2567 | sb_issue_discard(sb, discard_block, entry->count); | ||
2568 | } | ||
2569 | kmem_cache_free(ext4_free_ext_cachep, entry); | 2570 | kmem_cache_free(ext4_free_ext_cachep, entry); |
2570 | ext4_mb_release_desc(&e4b); | 2571 | ext4_mb_release_desc(&e4b); |
2571 | } | 2572 | } |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 6c751106c2e5..7faefb4da939 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) | |||
228 | 228 | ||
229 | #ifdef CONFIG_BLOCK | 229 | #ifdef CONFIG_BLOCK |
230 | 230 | ||
231 | #define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) | 231 | static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) |
232 | #define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); | 232 | { |
233 | return (offset >> inode->i_blkbits); | ||
234 | } | ||
235 | |||
236 | static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) | ||
237 | { | ||
238 | return (blk << inode->i_blkbits); | ||
239 | } | ||
233 | 240 | ||
234 | /** | 241 | /** |
235 | * __generic_block_fiemap - FIEMAP for block based inodes (no locking) | 242 | * __generic_block_fiemap - FIEMAP for block based inodes (no locking) |
236 | * @inode - the inode to map | 243 | * @inode: the inode to map |
237 | * @arg - the pointer to userspace where we copy everything to | 244 | * @fieinfo: the fiemap info struct that will be passed back to userspace |
238 | * @get_block - the fs's get_block function | 245 | * @start: where to start mapping in the inode |
246 | * @len: how much space to map | ||
247 | * @get_block: the fs's get_block function | ||
239 | * | 248 | * |
240 | * This does FIEMAP for block based inodes. Basically it will just loop | 249 | * This does FIEMAP for block based inodes. Basically it will just loop |
241 | * through get_block until we hit the number of extents we want to map, or we | 250 | * through get_block until we hit the number of extents we want to map, or we |
@@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) | |||
250 | */ | 259 | */ |
251 | 260 | ||
252 | int __generic_block_fiemap(struct inode *inode, | 261 | int __generic_block_fiemap(struct inode *inode, |
253 | struct fiemap_extent_info *fieinfo, u64 start, | 262 | struct fiemap_extent_info *fieinfo, loff_t start, |
254 | u64 len, get_block_t *get_block) | 263 | loff_t len, get_block_t *get_block) |
255 | { | 264 | { |
256 | struct buffer_head tmp; | 265 | struct buffer_head map_bh; |
257 | unsigned long long start_blk; | 266 | sector_t start_blk, last_blk; |
258 | long long length = 0, map_len = 0; | 267 | loff_t isize = i_size_read(inode); |
259 | u64 logical = 0, phys = 0, size = 0; | 268 | u64 logical = 0, phys = 0, size = 0; |
260 | u32 flags = FIEMAP_EXTENT_MERGED; | 269 | u32 flags = FIEMAP_EXTENT_MERGED; |
261 | int ret = 0, past_eof = 0, whole_file = 0; | 270 | bool past_eof = false, whole_file = false; |
271 | int ret = 0; | ||
262 | 272 | ||
263 | if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) | 273 | ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); |
274 | if (ret) | ||
264 | return ret; | 275 | return ret; |
265 | 276 | ||
266 | start_blk = logical_to_blk(inode, start); | 277 | /* |
267 | 278 | * Either the i_mutex or other appropriate locking needs to be held | |
268 | length = (long long)min_t(u64, len, i_size_read(inode)); | 279 | * since we expect isize to not change at all through the duration of |
269 | if (length < len) | 280 | * this call. |
270 | whole_file = 1; | 281 | */ |
282 | if (len >= isize) { | ||
283 | whole_file = true; | ||
284 | len = isize; | ||
285 | } | ||
271 | 286 | ||
272 | map_len = length; | 287 | start_blk = logical_to_blk(inode, start); |
288 | last_blk = logical_to_blk(inode, start + len - 1); | ||
273 | 289 | ||
274 | do { | 290 | do { |
275 | /* | 291 | /* |
276 | * we set b_size to the total size we want so it will map as | 292 | * we set b_size to the total size we want so it will map as |
277 | * many contiguous blocks as possible at once | 293 | * many contiguous blocks as possible at once |
278 | */ | 294 | */ |
279 | memset(&tmp, 0, sizeof(struct buffer_head)); | 295 | memset(&map_bh, 0, sizeof(struct buffer_head)); |
280 | tmp.b_size = map_len; | 296 | map_bh.b_size = len; |
281 | 297 | ||
282 | ret = get_block(inode, start_blk, &tmp, 0); | 298 | ret = get_block(inode, start_blk, &map_bh, 0); |
283 | if (ret) | 299 | if (ret) |
284 | break; | 300 | break; |
285 | 301 | ||
286 | /* HOLE */ | 302 | /* HOLE */ |
287 | if (!buffer_mapped(&tmp)) { | 303 | if (!buffer_mapped(&map_bh)) { |
288 | length -= blk_to_logical(inode, 1); | ||
289 | start_blk++; | 304 | start_blk++; |
290 | 305 | ||
291 | /* | 306 | /* |
292 | * we want to handle the case where there is an | 307 | * We want to handle the case where there is an |
293 | * allocated block at the front of the file, and then | 308 | * allocated block at the front of the file, and then |
294 | * nothing but holes up to the end of the file properly, | 309 | * nothing but holes up to the end of the file properly, |
295 | * to make sure that extent at the front gets properly | 310 | * to make sure that extent at the front gets properly |
296 | * marked with FIEMAP_EXTENT_LAST | 311 | * marked with FIEMAP_EXTENT_LAST |
297 | */ | 312 | */ |
298 | if (!past_eof && | 313 | if (!past_eof && |
299 | blk_to_logical(inode, start_blk) >= | 314 | blk_to_logical(inode, start_blk) >= isize) |
300 | blk_to_logical(inode, 0)+i_size_read(inode)) | ||
301 | past_eof = 1; | 315 | past_eof = 1; |
302 | 316 | ||
303 | /* | 317 | /* |
304 | * first hole after going past the EOF, this is our | 318 | * First hole after going past the EOF, this is our |
305 | * last extent | 319 | * last extent |
306 | */ | 320 | */ |
307 | if (past_eof && size) { | 321 | if (past_eof && size) { |
@@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode, | |||
309 | ret = fiemap_fill_next_extent(fieinfo, logical, | 323 | ret = fiemap_fill_next_extent(fieinfo, logical, |
310 | phys, size, | 324 | phys, size, |
311 | flags); | 325 | flags); |
312 | break; | 326 | } else if (size) { |
327 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
328 | phys, size, flags); | ||
329 | size = 0; | ||
313 | } | 330 | } |
314 | 331 | ||
315 | /* if we have holes up to/past EOF then we're done */ | 332 | /* if we have holes up to/past EOF then we're done */ |
316 | if (length <= 0 || past_eof) | 333 | if (start_blk > last_blk || past_eof || ret) |
317 | break; | 334 | break; |
318 | } else { | 335 | } else { |
319 | /* | 336 | /* |
320 | * we have gone over the length of what we wanted to | 337 | * We have gone over the length of what we wanted to |
321 | * map, and it wasn't the entire file, so add the extent | 338 | * map, and it wasn't the entire file, so add the extent |
322 | * we got last time and exit. | 339 | * we got last time and exit. |
323 | * | 340 | * |
@@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode, | |||
331 | * are good to go, just add the extent to the fieinfo | 348 | * are good to go, just add the extent to the fieinfo |
332 | * and break | 349 | * and break |
333 | */ | 350 | */ |
334 | if (length <= 0 && !whole_file) { | 351 | if (start_blk > last_blk && !whole_file) { |
335 | ret = fiemap_fill_next_extent(fieinfo, logical, | 352 | ret = fiemap_fill_next_extent(fieinfo, logical, |
336 | phys, size, | 353 | phys, size, |
337 | flags); | 354 | flags); |
@@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode, | |||
351 | } | 368 | } |
352 | 369 | ||
353 | logical = blk_to_logical(inode, start_blk); | 370 | logical = blk_to_logical(inode, start_blk); |
354 | phys = blk_to_logical(inode, tmp.b_blocknr); | 371 | phys = blk_to_logical(inode, map_bh.b_blocknr); |
355 | size = tmp.b_size; | 372 | size = map_bh.b_size; |
356 | flags = FIEMAP_EXTENT_MERGED; | 373 | flags = FIEMAP_EXTENT_MERGED; |
357 | 374 | ||
358 | length -= tmp.b_size; | ||
359 | start_blk += logical_to_blk(inode, size); | 375 | start_blk += logical_to_blk(inode, size); |
360 | 376 | ||
361 | /* | 377 | /* |
@@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode, | |||
363 | * soon as we find a hole that the last extent we found | 379 | * soon as we find a hole that the last extent we found |
364 | * is marked with FIEMAP_EXTENT_LAST | 380 | * is marked with FIEMAP_EXTENT_LAST |
365 | */ | 381 | */ |
366 | if (!past_eof && | 382 | if (!past_eof && logical + size >= isize) |
367 | logical+size >= | 383 | past_eof = true; |
368 | blk_to_logical(inode, 0)+i_size_read(inode)) | ||
369 | past_eof = 1; | ||
370 | } | 384 | } |
371 | cond_resched(); | 385 | cond_resched(); |
372 | } while (1); | 386 | } while (1); |
373 | 387 | ||
374 | /* if ret is 1 then we just hit the end of the extent array */ | 388 | /* If ret is 1 then we just hit the end of the extent array */ |
375 | if (ret == 1) | 389 | if (ret == 1) |
376 | ret = 0; | 390 | ret = 0; |
377 | 391 | ||
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 9dd126276c9f..ed9ba6fe04f5 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -61,7 +61,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino) | |||
61 | inode->i_op = &page_symlink_inode_operations; | 61 | inode->i_op = &page_symlink_inode_operations; |
62 | inode->i_mapping->a_ops = &jfs_aops; | 62 | inode->i_mapping->a_ops = &jfs_aops; |
63 | } else { | 63 | } else { |
64 | inode->i_op = &jfs_symlink_inode_operations; | 64 | inode->i_op = &jfs_fast_symlink_inode_operations; |
65 | /* | 65 | /* |
66 | * The inline data should be null-terminated, but | 66 | * The inline data should be null-terminated, but |
67 | * don't let on-disk corruption crash the kernel | 67 | * don't let on-disk corruption crash the kernel |
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 6c4dfcbf3f55..9e2f6a721668 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
@@ -196,7 +196,7 @@ int dbMount(struct inode *ipbmap) | |||
196 | bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); | 196 | bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); |
197 | bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); | 197 | bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); |
198 | bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); | 198 | bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); |
199 | bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); | 199 | bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight); |
200 | bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); | 200 | bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); |
201 | bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); | 201 | bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); |
202 | bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); | 202 | bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); |
@@ -288,7 +288,7 @@ int dbSync(struct inode *ipbmap) | |||
288 | dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); | 288 | dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); |
289 | dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); | 289 | dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); |
290 | dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); | 290 | dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); |
291 | dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); | 291 | dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight); |
292 | dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); | 292 | dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); |
293 | dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); | 293 | dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); |
294 | dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); | 294 | dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); |
@@ -1441,7 +1441,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1441 | * tree index of this allocation group within the control page. | 1441 | * tree index of this allocation group within the control page. |
1442 | */ | 1442 | */ |
1443 | agperlev = | 1443 | agperlev = |
1444 | (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; | 1444 | (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth; |
1445 | ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); | 1445 | ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); |
1446 | 1446 | ||
1447 | /* dmap control page trees fan-out by 4 and a single allocation | 1447 | /* dmap control page trees fan-out by 4 and a single allocation |
@@ -1460,7 +1460,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1460 | * the subtree to find the leftmost leaf that describes this | 1460 | * the subtree to find the leftmost leaf that describes this |
1461 | * free space. | 1461 | * free space. |
1462 | */ | 1462 | */ |
1463 | for (k = bmp->db_agheigth; k > 0; k--) { | 1463 | for (k = bmp->db_agheight; k > 0; k--) { |
1464 | for (n = 0, m = (ti << 2) + 1; n < 4; n++) { | 1464 | for (n = 0, m = (ti << 2) + 1; n < 4; n++) { |
1465 | if (l2nb <= dcp->stree[m + n]) { | 1465 | if (l2nb <= dcp->stree[m + n]) { |
1466 | ti = m + n; | 1466 | ti = m + n; |
@@ -3607,7 +3607,7 @@ void dbFinalizeBmap(struct inode *ipbmap) | |||
3607 | } | 3607 | } |
3608 | 3608 | ||
3609 | /* | 3609 | /* |
3610 | * compute db_aglevel, db_agheigth, db_width, db_agstart: | 3610 | * compute db_aglevel, db_agheight, db_width, db_agstart: |
3611 | * an ag is covered in aglevel dmapctl summary tree, | 3611 | * an ag is covered in aglevel dmapctl summary tree, |
3612 | * at agheight level height (from leaf) with agwidth number of nodes | 3612 | * at agheight level height (from leaf) with agwidth number of nodes |
3613 | * each, which starts at agstart index node of the smmary tree node | 3613 | * each, which starts at agstart index node of the smmary tree node |
@@ -3616,9 +3616,9 @@ void dbFinalizeBmap(struct inode *ipbmap) | |||
3616 | bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); | 3616 | bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); |
3617 | l2nl = | 3617 | l2nl = |
3618 | bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); | 3618 | bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); |
3619 | bmp->db_agheigth = l2nl >> 1; | 3619 | bmp->db_agheight = l2nl >> 1; |
3620 | bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); | 3620 | bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1)); |
3621 | for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; | 3621 | for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0; |
3622 | i--) { | 3622 | i--) { |
3623 | bmp->db_agstart += n; | 3623 | bmp->db_agstart += n; |
3624 | n <<= 2; | 3624 | n <<= 2; |
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 1a6eb41569bc..6dcb906c55d8 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h | |||
@@ -210,7 +210,7 @@ struct dbmap_disk { | |||
210 | __le32 dn_maxag; /* 4: max active alloc group number */ | 210 | __le32 dn_maxag; /* 4: max active alloc group number */ |
211 | __le32 dn_agpref; /* 4: preferred alloc group (hint) */ | 211 | __le32 dn_agpref; /* 4: preferred alloc group (hint) */ |
212 | __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ | 212 | __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ |
213 | __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ | 213 | __le32 dn_agheight; /* 4: height in dmapctl of the AG */ |
214 | __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ | 214 | __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ |
215 | __le32 dn_agstart; /* 4: start tree index at AG height */ | 215 | __le32 dn_agstart; /* 4: start tree index at AG height */ |
216 | __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ | 216 | __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ |
@@ -229,7 +229,7 @@ struct dbmap { | |||
229 | int dn_maxag; /* max active alloc group number */ | 229 | int dn_maxag; /* max active alloc group number */ |
230 | int dn_agpref; /* preferred alloc group (hint) */ | 230 | int dn_agpref; /* preferred alloc group (hint) */ |
231 | int dn_aglevel; /* dmapctl level holding the AG */ | 231 | int dn_aglevel; /* dmapctl level holding the AG */ |
232 | int dn_agheigth; /* height in dmapctl of the AG */ | 232 | int dn_agheight; /* height in dmapctl of the AG */ |
233 | int dn_agwidth; /* width in dmapctl of the AG */ | 233 | int dn_agwidth; /* width in dmapctl of the AG */ |
234 | int dn_agstart; /* start tree index at AG height */ | 234 | int dn_agstart; /* start tree index at AG height */ |
235 | int dn_agl2size; /* l2 num of blks per alloc group */ | 235 | int dn_agl2size; /* l2 num of blks per alloc group */ |
@@ -255,7 +255,7 @@ struct bmap { | |||
255 | #define db_agsize db_bmap.dn_agsize | 255 | #define db_agsize db_bmap.dn_agsize |
256 | #define db_agl2size db_bmap.dn_agl2size | 256 | #define db_agl2size db_bmap.dn_agl2size |
257 | #define db_agwidth db_bmap.dn_agwidth | 257 | #define db_agwidth db_bmap.dn_agwidth |
258 | #define db_agheigth db_bmap.dn_agheigth | 258 | #define db_agheight db_bmap.dn_agheight |
259 | #define db_agstart db_bmap.dn_agstart | 259 | #define db_agstart db_bmap.dn_agstart |
260 | #define db_numag db_bmap.dn_numag | 260 | #define db_numag db_bmap.dn_numag |
261 | #define db_maxlevel db_bmap.dn_maxlevel | 261 | #define db_maxlevel db_bmap.dn_maxlevel |
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 79e2c79661df..9e6bda30a6e8 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
@@ -48,5 +48,6 @@ extern const struct file_operations jfs_dir_operations; | |||
48 | extern const struct inode_operations jfs_file_inode_operations; | 48 | extern const struct inode_operations jfs_file_inode_operations; |
49 | extern const struct file_operations jfs_file_operations; | 49 | extern const struct file_operations jfs_file_operations; |
50 | extern const struct inode_operations jfs_symlink_inode_operations; | 50 | extern const struct inode_operations jfs_symlink_inode_operations; |
51 | extern const struct inode_operations jfs_fast_symlink_inode_operations; | ||
51 | extern const struct dentry_operations jfs_ci_dentry_operations; | 52 | extern const struct dentry_operations jfs_ci_dentry_operations; |
52 | #endif /* _H_JFS_INODE */ | 53 | #endif /* _H_JFS_INODE */ |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 4a3e9f39c21d..a9cf8e8675be 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -956,7 +956,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
956 | */ | 956 | */ |
957 | 957 | ||
958 | if (ssize <= IDATASIZE) { | 958 | if (ssize <= IDATASIZE) { |
959 | ip->i_op = &jfs_symlink_inode_operations; | 959 | ip->i_op = &jfs_fast_symlink_inode_operations; |
960 | 960 | ||
961 | i_fastsymlink = JFS_IP(ip)->i_inline; | 961 | i_fastsymlink = JFS_IP(ip)->i_inline; |
962 | memcpy(i_fastsymlink, name, ssize); | 962 | memcpy(i_fastsymlink, name, ssize); |
@@ -978,7 +978,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
978 | else { | 978 | else { |
979 | jfs_info("jfs_symlink: allocate extent ip:0x%p", ip); | 979 | jfs_info("jfs_symlink: allocate extent ip:0x%p", ip); |
980 | 980 | ||
981 | ip->i_op = &page_symlink_inode_operations; | 981 | ip->i_op = &jfs_symlink_inode_operations; |
982 | ip->i_mapping->a_ops = &jfs_aops; | 982 | ip->i_mapping->a_ops = &jfs_aops; |
983 | 983 | ||
984 | /* | 984 | /* |
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 7f24a0bb08ca..1aba0039f1c9 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c | |||
@@ -81,6 +81,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
81 | struct inode *iplist[1]; | 81 | struct inode *iplist[1]; |
82 | struct jfs_superblock *j_sb, *j_sb2; | 82 | struct jfs_superblock *j_sb, *j_sb2; |
83 | uint old_agsize; | 83 | uint old_agsize; |
84 | int agsizechanged = 0; | ||
84 | struct buffer_head *bh, *bh2; | 85 | struct buffer_head *bh, *bh2; |
85 | 86 | ||
86 | /* If the volume hasn't grown, get out now */ | 87 | /* If the volume hasn't grown, get out now */ |
@@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
333 | */ | 334 | */ |
334 | if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) | 335 | if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) |
335 | goto error_out; | 336 | goto error_out; |
337 | |||
338 | agsizechanged |= (bmp->db_agsize != old_agsize); | ||
339 | |||
336 | /* | 340 | /* |
337 | * the map now has extended to cover additional nblocks: | 341 | * the map now has extended to cover additional nblocks: |
338 | * dn_mapsize = oldMapsize + nblocks; | 342 | * dn_mapsize = oldMapsize + nblocks; |
@@ -432,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
432 | * will correctly identify the new ag); | 436 | * will correctly identify the new ag); |
433 | */ | 437 | */ |
434 | /* if new AG size the same as old AG size, done! */ | 438 | /* if new AG size the same as old AG size, done! */ |
435 | if (bmp->db_agsize != old_agsize) { | 439 | if (agsizechanged) { |
436 | if ((rc = diExtendFS(ipimap, ipbmap))) | 440 | if ((rc = diExtendFS(ipimap, ipbmap))) |
437 | goto error_out; | 441 | goto error_out; |
438 | 442 | ||
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 157382fa6256..b66832ac33ac 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -446,10 +446,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
446 | /* initialize the mount flag and determine the default error handler */ | 446 | /* initialize the mount flag and determine the default error handler */ |
447 | flag = JFS_ERR_REMOUNT_RO; | 447 | flag = JFS_ERR_REMOUNT_RO; |
448 | 448 | ||
449 | if (!parse_options((char *) data, sb, &newLVSize, &flag)) { | 449 | if (!parse_options((char *) data, sb, &newLVSize, &flag)) |
450 | kfree(sbi); | 450 | goto out_kfree; |
451 | return -EINVAL; | ||
452 | } | ||
453 | sbi->flag = flag; | 451 | sbi->flag = flag; |
454 | 452 | ||
455 | #ifdef CONFIG_JFS_POSIX_ACL | 453 | #ifdef CONFIG_JFS_POSIX_ACL |
@@ -458,7 +456,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
458 | 456 | ||
459 | if (newLVSize) { | 457 | if (newLVSize) { |
460 | printk(KERN_ERR "resize option for remount only\n"); | 458 | printk(KERN_ERR "resize option for remount only\n"); |
461 | return -EINVAL; | 459 | goto out_kfree; |
462 | } | 460 | } |
463 | 461 | ||
464 | /* | 462 | /* |
@@ -478,7 +476,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
478 | inode = new_inode(sb); | 476 | inode = new_inode(sb); |
479 | if (inode == NULL) { | 477 | if (inode == NULL) { |
480 | ret = -ENOMEM; | 478 | ret = -ENOMEM; |
481 | goto out_kfree; | 479 | goto out_unload; |
482 | } | 480 | } |
483 | inode->i_ino = 0; | 481 | inode->i_ino = 0; |
484 | inode->i_nlink = 1; | 482 | inode->i_nlink = 1; |
@@ -550,9 +548,10 @@ out_mount_failed: | |||
550 | make_bad_inode(sbi->direct_inode); | 548 | make_bad_inode(sbi->direct_inode); |
551 | iput(sbi->direct_inode); | 549 | iput(sbi->direct_inode); |
552 | sbi->direct_inode = NULL; | 550 | sbi->direct_inode = NULL; |
553 | out_kfree: | 551 | out_unload: |
554 | if (sbi->nls_tab) | 552 | if (sbi->nls_tab) |
555 | unload_nls(sbi->nls_tab); | 553 | unload_nls(sbi->nls_tab); |
554 | out_kfree: | ||
556 | kfree(sbi); | 555 | kfree(sbi); |
557 | return ret; | 556 | return ret; |
558 | } | 557 | } |
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index 4af1a05aad0a..205b946d8e0d 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c | |||
@@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
29 | return NULL; | 29 | return NULL; |
30 | } | 30 | } |
31 | 31 | ||
32 | const struct inode_operations jfs_symlink_inode_operations = { | 32 | const struct inode_operations jfs_fast_symlink_inode_operations = { |
33 | .readlink = generic_readlink, | 33 | .readlink = generic_readlink, |
34 | .follow_link = jfs_follow_link, | 34 | .follow_link = jfs_follow_link, |
35 | .setattr = jfs_setattr, | ||
36 | .setxattr = jfs_setxattr, | ||
37 | .getxattr = jfs_getxattr, | ||
38 | .listxattr = jfs_listxattr, | ||
39 | .removexattr = jfs_removexattr, | ||
40 | }; | ||
41 | |||
42 | const struct inode_operations jfs_symlink_inode_operations = { | ||
43 | .readlink = generic_readlink, | ||
44 | .follow_link = page_follow_link_light, | ||
45 | .put_link = page_put_link, | ||
46 | .setattr = jfs_setattr, | ||
35 | .setxattr = jfs_setxattr, | 47 | .setxattr = jfs_setxattr, |
36 | .getxattr = jfs_getxattr, | 48 | .getxattr = jfs_getxattr, |
37 | .listxattr = jfs_listxattr, | 49 | .listxattr = jfs_listxattr, |
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c index 84e36f52fe95..76c242fbe1b0 100644 --- a/fs/logfs/gc.c +++ b/fs/logfs/gc.c | |||
@@ -459,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target) | |||
459 | struct logfs_block *block; | 459 | struct logfs_block *block; |
460 | int round, progress, last_progress = 0; | 460 | int round, progress, last_progress = 0; |
461 | 461 | ||
462 | /* | ||
463 | * Doing too many changes to the segfile at once would result | ||
464 | * in a large number of aliases. Write the journal before | ||
465 | * things get out of hand. | ||
466 | */ | ||
467 | if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES) | ||
468 | logfs_write_anchor(sb); | ||
469 | |||
462 | if (no_free_segments(sb) >= target && | 470 | if (no_free_segments(sb) >= target && |
463 | super->s_no_object_aliases < MAX_OBJ_ALIASES) | 471 | super->s_no_object_aliases < MAX_OBJ_ALIASES) |
464 | return; | 472 | return; |
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 33bd260b8309..fb0a613f885b 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c | |||
@@ -389,7 +389,10 @@ static void journal_get_erase_count(struct logfs_area *area) | |||
389 | static int journal_erase_segment(struct logfs_area *area) | 389 | static int journal_erase_segment(struct logfs_area *area) |
390 | { | 390 | { |
391 | struct super_block *sb = area->a_sb; | 391 | struct super_block *sb = area->a_sb; |
392 | struct logfs_segment_header sh; | 392 | union { |
393 | struct logfs_segment_header sh; | ||
394 | unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)]; | ||
395 | } u; | ||
393 | u64 ofs; | 396 | u64 ofs; |
394 | int err; | 397 | int err; |
395 | 398 | ||
@@ -397,20 +400,21 @@ static int journal_erase_segment(struct logfs_area *area) | |||
397 | if (err) | 400 | if (err) |
398 | return err; | 401 | return err; |
399 | 402 | ||
400 | sh.pad = 0; | 403 | memset(&u, 0, sizeof(u)); |
401 | sh.type = SEG_JOURNAL; | 404 | u.sh.pad = 0; |
402 | sh.level = 0; | 405 | u.sh.type = SEG_JOURNAL; |
403 | sh.segno = cpu_to_be32(area->a_segno); | 406 | u.sh.level = 0; |
404 | sh.ec = cpu_to_be32(area->a_erase_count); | 407 | u.sh.segno = cpu_to_be32(area->a_segno); |
405 | sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); | 408 | u.sh.ec = cpu_to_be32(area->a_erase_count); |
406 | sh.crc = logfs_crc32(&sh, sizeof(sh), 4); | 409 | u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); |
410 | u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4); | ||
407 | 411 | ||
408 | /* This causes a bug in segment.c. Not yet. */ | 412 | /* This causes a bug in segment.c. Not yet. */ |
409 | //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); | 413 | //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); |
410 | 414 | ||
411 | ofs = dev_ofs(sb, area->a_segno, 0); | 415 | ofs = dev_ofs(sb, area->a_segno, 0); |
412 | area->a_used_bytes = ALIGN(sizeof(sh), 16); | 416 | area->a_used_bytes = sizeof(u); |
413 | logfs_buf_write(area, ofs, &sh, sizeof(sh)); | 417 | logfs_buf_write(area, ofs, &u, sizeof(u)); |
414 | return 0; | 418 | return 0; |
415 | } | 419 | } |
416 | 420 | ||
@@ -494,6 +498,8 @@ static void account_shadows(struct super_block *sb) | |||
494 | 498 | ||
495 | btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); | 499 | btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); |
496 | btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); | 500 | btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); |
501 | btree_grim_visitor32(&tree->segment_map, 0, NULL); | ||
502 | tree->no_shadowed_segments = 0; | ||
497 | 503 | ||
498 | if (li->li_block) { | 504 | if (li->li_block) { |
499 | /* | 505 | /* |
@@ -607,9 +613,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type, | |||
607 | if (len == 0) | 613 | if (len == 0) |
608 | return logfs_write_header(super, header, 0, type); | 614 | return logfs_write_header(super, header, 0, type); |
609 | 615 | ||
616 | BUG_ON(len > sb->s_blocksize); | ||
610 | compr_len = logfs_compress(buf, data, len, sb->s_blocksize); | 617 | compr_len = logfs_compress(buf, data, len, sb->s_blocksize); |
611 | if (compr_len < 0 || type == JE_ANCHOR) { | 618 | if (compr_len < 0 || type == JE_ANCHOR) { |
612 | BUG_ON(len > sb->s_blocksize); | ||
613 | memcpy(data, buf, len); | 619 | memcpy(data, buf, len); |
614 | compr_len = len; | 620 | compr_len = len; |
615 | compr = COMPR_NONE; | 621 | compr = COMPR_NONE; |
@@ -661,6 +667,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type, | |||
661 | if (ofs < 0) | 667 | if (ofs < 0) |
662 | return ofs; | 668 | return ofs; |
663 | logfs_buf_write(area, ofs, super->s_compressed_je, len); | 669 | logfs_buf_write(area, ofs, super->s_compressed_je, len); |
670 | BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES); | ||
664 | super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); | 671 | super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); |
665 | return 0; | 672 | return 0; |
666 | } | 673 | } |
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index b84b0eec6024..0a3df1a0c936 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h | |||
@@ -257,10 +257,14 @@ struct logfs_shadow { | |||
257 | * struct shadow_tree | 257 | * struct shadow_tree |
258 | * @new: shadows where old_ofs==0, indexed by new_ofs | 258 | * @new: shadows where old_ofs==0, indexed by new_ofs |
259 | * @old: shadows where old_ofs!=0, indexed by old_ofs | 259 | * @old: shadows where old_ofs!=0, indexed by old_ofs |
260 | * @segment_map: bitfield of segments containing shadows | ||
261 | * @no_shadowed_segment: number of segments containing shadows | ||
260 | */ | 262 | */ |
261 | struct shadow_tree { | 263 | struct shadow_tree { |
262 | struct btree_head64 new; | 264 | struct btree_head64 new; |
263 | struct btree_head64 old; | 265 | struct btree_head64 old; |
266 | struct btree_head32 segment_map; | ||
267 | int no_shadowed_segments; | ||
264 | }; | 268 | }; |
265 | 269 | ||
266 | struct object_alias_item { | 270 | struct object_alias_item { |
@@ -305,13 +309,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix, | |||
305 | level_t level, int child_no, __be64 val); | 309 | level_t level, int child_no, __be64 val); |
306 | struct logfs_block_ops { | 310 | struct logfs_block_ops { |
307 | void (*write_block)(struct logfs_block *block); | 311 | void (*write_block)(struct logfs_block *block); |
308 | gc_level_t (*block_level)(struct logfs_block *block); | ||
309 | void (*free_block)(struct super_block *sb, struct logfs_block*block); | 312 | void (*free_block)(struct super_block *sb, struct logfs_block*block); |
310 | int (*write_alias)(struct super_block *sb, | 313 | int (*write_alias)(struct super_block *sb, |
311 | struct logfs_block *block, | 314 | struct logfs_block *block, |
312 | write_alias_t *write_one_alias); | 315 | write_alias_t *write_one_alias); |
313 | }; | 316 | }; |
314 | 317 | ||
318 | #define MAX_JOURNAL_ENTRIES 256 | ||
319 | |||
315 | struct logfs_super { | 320 | struct logfs_super { |
316 | struct mtd_info *s_mtd; /* underlying device */ | 321 | struct mtd_info *s_mtd; /* underlying device */ |
317 | struct block_device *s_bdev; /* underlying device */ | 322 | struct block_device *s_bdev; /* underlying device */ |
@@ -378,7 +383,7 @@ struct logfs_super { | |||
378 | u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */ | 383 | u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */ |
379 | u64 s_last_version; | 384 | u64 s_last_version; |
380 | struct logfs_area *s_journal_area; /* open journal segment */ | 385 | struct logfs_area *s_journal_area; /* open journal segment */ |
381 | __be64 s_je_array[64]; | 386 | __be64 s_je_array[MAX_JOURNAL_ENTRIES]; |
382 | int s_no_je; | 387 | int s_no_je; |
383 | 388 | ||
384 | int s_sum_index; /* for the 12 summaries */ | 389 | int s_sum_index; /* for the 12 summaries */ |
@@ -722,4 +727,10 @@ static inline struct logfs_area *get_area(struct super_block *sb, | |||
722 | return logfs_super(sb)->s_area[(__force u8)gc_level]; | 727 | return logfs_super(sb)->s_area[(__force u8)gc_level]; |
723 | } | 728 | } |
724 | 729 | ||
730 | static inline void logfs_mempool_destroy(mempool_t *pool) | ||
731 | { | ||
732 | if (pool) | ||
733 | mempool_destroy(pool); | ||
734 | } | ||
735 | |||
725 | #endif | 736 | #endif |
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index bff40253dfb2..3159db6958e5 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c | |||
@@ -430,25 +430,6 @@ static void inode_write_block(struct logfs_block *block) | |||
430 | } | 430 | } |
431 | } | 431 | } |
432 | 432 | ||
433 | static gc_level_t inode_block_level(struct logfs_block *block) | ||
434 | { | ||
435 | BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER); | ||
436 | return GC_LEVEL(LOGFS_MAX_LEVELS); | ||
437 | } | ||
438 | |||
439 | static gc_level_t indirect_block_level(struct logfs_block *block) | ||
440 | { | ||
441 | struct page *page; | ||
442 | struct inode *inode; | ||
443 | u64 bix; | ||
444 | level_t level; | ||
445 | |||
446 | page = block->page; | ||
447 | inode = page->mapping->host; | ||
448 | logfs_unpack_index(page->index, &bix, &level); | ||
449 | return expand_level(inode->i_ino, level); | ||
450 | } | ||
451 | |||
452 | /* | 433 | /* |
453 | * This silences a false, yet annoying gcc warning. I hate it when my editor | 434 | * This silences a false, yet annoying gcc warning. I hate it when my editor |
454 | * jumps into bitops.h each time I recompile this file. | 435 | * jumps into bitops.h each time I recompile this file. |
@@ -587,14 +568,12 @@ static void indirect_free_block(struct super_block *sb, | |||
587 | 568 | ||
588 | static struct logfs_block_ops inode_block_ops = { | 569 | static struct logfs_block_ops inode_block_ops = { |
589 | .write_block = inode_write_block, | 570 | .write_block = inode_write_block, |
590 | .block_level = inode_block_level, | ||
591 | .free_block = inode_free_block, | 571 | .free_block = inode_free_block, |
592 | .write_alias = inode_write_alias, | 572 | .write_alias = inode_write_alias, |
593 | }; | 573 | }; |
594 | 574 | ||
595 | struct logfs_block_ops indirect_block_ops = { | 575 | struct logfs_block_ops indirect_block_ops = { |
596 | .write_block = indirect_write_block, | 576 | .write_block = indirect_write_block, |
597 | .block_level = indirect_block_level, | ||
598 | .free_block = indirect_free_block, | 577 | .free_block = indirect_free_block, |
599 | .write_alias = indirect_write_alias, | 578 | .write_alias = indirect_write_alias, |
600 | }; | 579 | }; |
@@ -1241,6 +1220,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow) | |||
1241 | mempool_free(shadow, super->s_shadow_pool); | 1220 | mempool_free(shadow, super->s_shadow_pool); |
1242 | } | 1221 | } |
1243 | 1222 | ||
1223 | static void mark_segment(struct shadow_tree *tree, u32 segno) | ||
1224 | { | ||
1225 | int err; | ||
1226 | |||
1227 | if (!btree_lookup32(&tree->segment_map, segno)) { | ||
1228 | err = btree_insert32(&tree->segment_map, segno, (void *)1, | ||
1229 | GFP_NOFS); | ||
1230 | BUG_ON(err); | ||
1231 | tree->no_shadowed_segments++; | ||
1232 | } | ||
1233 | } | ||
1234 | |||
1244 | /** | 1235 | /** |
1245 | * fill_shadow_tree - Propagate shadow tree changes due to a write | 1236 | * fill_shadow_tree - Propagate shadow tree changes due to a write |
1246 | * @inode: Inode owning the page | 1237 | * @inode: Inode owning the page |
@@ -1288,6 +1279,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page, | |||
1288 | 1279 | ||
1289 | super->s_dirty_used_bytes += shadow->new_len; | 1280 | super->s_dirty_used_bytes += shadow->new_len; |
1290 | super->s_dirty_free_bytes += shadow->old_len; | 1281 | super->s_dirty_free_bytes += shadow->old_len; |
1282 | mark_segment(tree, shadow->old_ofs >> super->s_segshift); | ||
1283 | mark_segment(tree, shadow->new_ofs >> super->s_segshift); | ||
1291 | } | 1284 | } |
1292 | } | 1285 | } |
1293 | 1286 | ||
@@ -1845,19 +1838,37 @@ static int __logfs_truncate(struct inode *inode, u64 size) | |||
1845 | return logfs_truncate_direct(inode, size); | 1838 | return logfs_truncate_direct(inode, size); |
1846 | } | 1839 | } |
1847 | 1840 | ||
1848 | int logfs_truncate(struct inode *inode, u64 size) | 1841 | /* |
1842 | * Truncate, by changing the segment file, can consume a fair amount | ||
1843 | * of resources. So back off from time to time and do some GC. | ||
1844 | * 8 or 2048 blocks should be well within safety limits even if | ||
1845 | * every single block resided in a different segment. | ||
1846 | */ | ||
1847 | #define TRUNCATE_STEP (8 * 1024 * 1024) | ||
1848 | int logfs_truncate(struct inode *inode, u64 target) | ||
1849 | { | 1849 | { |
1850 | struct super_block *sb = inode->i_sb; | 1850 | struct super_block *sb = inode->i_sb; |
1851 | int err; | 1851 | u64 size = i_size_read(inode); |
1852 | int err = 0; | ||
1852 | 1853 | ||
1853 | logfs_get_wblocks(sb, NULL, 1); | 1854 | size = ALIGN(size, TRUNCATE_STEP); |
1854 | err = __logfs_truncate(inode, size); | 1855 | while (size > target) { |
1855 | if (!err) | 1856 | if (size > TRUNCATE_STEP) |
1856 | err = __logfs_write_inode(inode, 0); | 1857 | size -= TRUNCATE_STEP; |
1857 | logfs_put_wblocks(sb, NULL, 1); | 1858 | else |
1859 | size = 0; | ||
1860 | if (size < target) | ||
1861 | size = target; | ||
1862 | |||
1863 | logfs_get_wblocks(sb, NULL, 1); | ||
1864 | err = __logfs_truncate(inode, target); | ||
1865 | if (!err) | ||
1866 | err = __logfs_write_inode(inode, 0); | ||
1867 | logfs_put_wblocks(sb, NULL, 1); | ||
1868 | } | ||
1858 | 1869 | ||
1859 | if (!err) | 1870 | if (!err) |
1860 | err = vmtruncate(inode, size); | 1871 | err = vmtruncate(inode, target); |
1861 | 1872 | ||
1862 | /* I don't trust error recovery yet. */ | 1873 | /* I don't trust error recovery yet. */ |
1863 | WARN_ON(err); | 1874 | WARN_ON(err); |
@@ -2251,8 +2262,6 @@ void logfs_cleanup_rw(struct super_block *sb) | |||
2251 | struct logfs_super *super = logfs_super(sb); | 2262 | struct logfs_super *super = logfs_super(sb); |
2252 | 2263 | ||
2253 | destroy_meta_inode(super->s_segfile_inode); | 2264 | destroy_meta_inode(super->s_segfile_inode); |
2254 | if (super->s_block_pool) | 2265 | logfs_mempool_destroy(super->s_block_pool); |
2255 | mempool_destroy(super->s_block_pool); | 2266 | logfs_mempool_destroy(super->s_shadow_pool); |
2256 | if (super->s_shadow_pool) | ||
2257 | mempool_destroy(super->s_shadow_pool); | ||
2258 | } | 2267 | } |
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 801a3a141625..f77ce2b470ba 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c | |||
@@ -183,14 +183,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block, | |||
183 | return 0; | 183 | return 0; |
184 | } | 184 | } |
185 | 185 | ||
186 | static gc_level_t btree_block_level(struct logfs_block *block) | ||
187 | { | ||
188 | return expand_level(block->ino, block->level); | ||
189 | } | ||
190 | |||
191 | static struct logfs_block_ops btree_block_ops = { | 186 | static struct logfs_block_ops btree_block_ops = { |
192 | .write_block = btree_write_block, | 187 | .write_block = btree_write_block, |
193 | .block_level = btree_block_level, | ||
194 | .free_block = __free_block, | 188 | .free_block = __free_block, |
195 | .write_alias = btree_write_alias, | 189 | .write_alias = btree_write_alias, |
196 | }; | 190 | }; |
@@ -919,7 +913,7 @@ err: | |||
919 | for (i--; i >= 0; i--) | 913 | for (i--; i >= 0; i--) |
920 | free_area(super->s_area[i]); | 914 | free_area(super->s_area[i]); |
921 | free_area(super->s_journal_area); | 915 | free_area(super->s_journal_area); |
922 | mempool_destroy(super->s_alias_pool); | 916 | logfs_mempool_destroy(super->s_alias_pool); |
923 | return -ENOMEM; | 917 | return -ENOMEM; |
924 | } | 918 | } |
925 | 919 | ||
diff --git a/fs/logfs/super.c b/fs/logfs/super.c index b60bfac3263c..d7c23ed8349a 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "logfs.h" | 12 | #include "logfs.h" |
13 | #include <linux/bio.h> | 13 | #include <linux/bio.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/blkdev.h> | ||
15 | #include <linux/mtd/mtd.h> | 16 | #include <linux/mtd/mtd.h> |
16 | #include <linux/statfs.h> | 17 | #include <linux/statfs.h> |
17 | #include <linux/buffer_head.h> | 18 | #include <linux/buffer_head.h> |
@@ -137,6 +138,10 @@ static int logfs_sb_set(struct super_block *sb, void *_super) | |||
137 | sb->s_fs_info = super; | 138 | sb->s_fs_info = super; |
138 | sb->s_mtd = super->s_mtd; | 139 | sb->s_mtd = super->s_mtd; |
139 | sb->s_bdev = super->s_bdev; | 140 | sb->s_bdev = super->s_bdev; |
141 | if (sb->s_bdev) | ||
142 | sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; | ||
143 | if (sb->s_mtd) | ||
144 | sb->s_bdi = sb->s_mtd->backing_dev_info; | ||
140 | return 0; | 145 | return 0; |
141 | } | 146 | } |
142 | 147 | ||
@@ -328,27 +333,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) | |||
328 | goto fail; | 333 | goto fail; |
329 | 334 | ||
330 | sb->s_root = d_alloc_root(rootdir); | 335 | sb->s_root = d_alloc_root(rootdir); |
331 | if (!sb->s_root) | 336 | if (!sb->s_root) { |
332 | goto fail2; | 337 | iput(rootdir); |
338 | goto fail; | ||
339 | } | ||
333 | 340 | ||
334 | super->s_erase_page = alloc_pages(GFP_KERNEL, 0); | 341 | super->s_erase_page = alloc_pages(GFP_KERNEL, 0); |
335 | if (!super->s_erase_page) | 342 | if (!super->s_erase_page) |
336 | goto fail2; | 343 | goto fail; |
337 | memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); | 344 | memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); |
338 | 345 | ||
339 | /* FIXME: check for read-only mounts */ | 346 | /* FIXME: check for read-only mounts */ |
340 | err = logfs_make_writeable(sb); | 347 | err = logfs_make_writeable(sb); |
341 | if (err) | 348 | if (err) |
342 | goto fail3; | 349 | goto fail1; |
343 | 350 | ||
344 | log_super("LogFS: Finished mounting\n"); | 351 | log_super("LogFS: Finished mounting\n"); |
345 | simple_set_mnt(mnt, sb); | 352 | simple_set_mnt(mnt, sb); |
346 | return 0; | 353 | return 0; |
347 | 354 | ||
348 | fail3: | 355 | fail1: |
349 | __free_page(super->s_erase_page); | 356 | __free_page(super->s_erase_page); |
350 | fail2: | ||
351 | iput(rootdir); | ||
352 | fail: | 357 | fail: |
353 | iput(logfs_super(sb)->s_master_inode); | 358 | iput(logfs_super(sb)->s_master_inode); |
354 | return -EIO; | 359 | return -EIO; |
@@ -452,6 +457,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only) | |||
452 | 457 | ||
453 | btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool); | 458 | btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool); |
454 | btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool); | 459 | btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool); |
460 | btree_init_mempool32(&super->s_shadow_tree.segment_map, | ||
461 | super->s_btree_pool); | ||
455 | 462 | ||
456 | ret = logfs_init_mapping(sb); | 463 | ret = logfs_init_mapping(sb); |
457 | if (ret) | 464 | if (ret) |
@@ -516,8 +523,8 @@ static void logfs_kill_sb(struct super_block *sb) | |||
516 | if (super->s_erase_page) | 523 | if (super->s_erase_page) |
517 | __free_page(super->s_erase_page); | 524 | __free_page(super->s_erase_page); |
518 | super->s_devops->put_device(sb); | 525 | super->s_devops->put_device(sb); |
519 | mempool_destroy(super->s_btree_pool); | 526 | logfs_mempool_destroy(super->s_btree_pool); |
520 | mempool_destroy(super->s_alias_pool); | 527 | logfs_mempool_destroy(super->s_alias_pool); |
521 | kfree(super); | 528 | kfree(super); |
522 | log_super("LogFS: Finished unmounting\n"); | 529 | log_super("LogFS: Finished unmounting\n"); |
523 | } | 530 | } |
diff --git a/fs/namei.c b/fs/namei.c index a7dce91a7e42..b86b96fe1dc3 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
1641 | if (nd->last.name[nd->last.len]) { | 1641 | if (nd->last.name[nd->last.len]) { |
1642 | if (open_flag & O_CREAT) | 1642 | if (open_flag & O_CREAT) |
1643 | goto exit; | 1643 | goto exit; |
1644 | nd->flags |= LOOKUP_DIRECTORY; | 1644 | nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW; |
1645 | } | 1645 | } |
1646 | 1646 | ||
1647 | /* just plain open? */ | 1647 | /* just plain open? */ |
@@ -1830,6 +1830,8 @@ reval: | |||
1830 | } | 1830 | } |
1831 | if (open_flag & O_DIRECTORY) | 1831 | if (open_flag & O_DIRECTORY) |
1832 | nd.flags |= LOOKUP_DIRECTORY; | 1832 | nd.flags |= LOOKUP_DIRECTORY; |
1833 | if (!(open_flag & O_NOFOLLOW)) | ||
1834 | nd.flags |= LOOKUP_FOLLOW; | ||
1833 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | 1835 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); |
1834 | while (unlikely(!filp)) { /* trailing symlink */ | 1836 | while (unlikely(!filp)) { /* trailing symlink */ |
1835 | struct path holder; | 1837 | struct path holder; |
@@ -1837,7 +1839,7 @@ reval: | |||
1837 | void *cookie; | 1839 | void *cookie; |
1838 | error = -ELOOP; | 1840 | error = -ELOOP; |
1839 | /* S_ISDIR part is a temporary automount kludge */ | 1841 | /* S_ISDIR part is a temporary automount kludge */ |
1840 | if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode)) | 1842 | if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) |
1841 | goto exit_dput; | 1843 | goto exit_dput; |
1842 | if (count++ == 32) | 1844 | if (count++ == 32) |
1843 | goto exit_dput; | 1845 | goto exit_dput; |
@@ -2174,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2174 | error = security_inode_rmdir(dir, dentry); | 2176 | error = security_inode_rmdir(dir, dentry); |
2175 | if (!error) { | 2177 | if (!error) { |
2176 | error = dir->i_op->rmdir(dir, dentry); | 2178 | error = dir->i_op->rmdir(dir, dentry); |
2177 | if (!error) | 2179 | if (!error) { |
2178 | dentry->d_inode->i_flags |= S_DEAD; | 2180 | dentry->d_inode->i_flags |= S_DEAD; |
2181 | dont_mount(dentry); | ||
2182 | } | ||
2179 | } | 2183 | } |
2180 | } | 2184 | } |
2181 | mutex_unlock(&dentry->d_inode->i_mutex); | 2185 | mutex_unlock(&dentry->d_inode->i_mutex); |
@@ -2259,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2259 | if (!error) { | 2263 | if (!error) { |
2260 | error = dir->i_op->unlink(dir, dentry); | 2264 | error = dir->i_op->unlink(dir, dentry); |
2261 | if (!error) | 2265 | if (!error) |
2262 | dentry->d_inode->i_flags |= S_DEAD; | 2266 | dont_mount(dentry); |
2263 | } | 2267 | } |
2264 | } | 2268 | } |
2265 | mutex_unlock(&dentry->d_inode->i_mutex); | 2269 | mutex_unlock(&dentry->d_inode->i_mutex); |
@@ -2570,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
2570 | return error; | 2574 | return error; |
2571 | 2575 | ||
2572 | target = new_dentry->d_inode; | 2576 | target = new_dentry->d_inode; |
2573 | if (target) { | 2577 | if (target) |
2574 | mutex_lock(&target->i_mutex); | 2578 | mutex_lock(&target->i_mutex); |
2575 | dentry_unhash(new_dentry); | ||
2576 | } | ||
2577 | if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) | 2579 | if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) |
2578 | error = -EBUSY; | 2580 | error = -EBUSY; |
2579 | else | 2581 | else { |
2582 | if (target) | ||
2583 | dentry_unhash(new_dentry); | ||
2580 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 2584 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); |
2585 | } | ||
2581 | if (target) { | 2586 | if (target) { |
2582 | if (!error) | 2587 | if (!error) { |
2583 | target->i_flags |= S_DEAD; | 2588 | target->i_flags |= S_DEAD; |
2589 | dont_mount(new_dentry); | ||
2590 | } | ||
2584 | mutex_unlock(&target->i_mutex); | 2591 | mutex_unlock(&target->i_mutex); |
2585 | if (d_unhashed(new_dentry)) | 2592 | if (d_unhashed(new_dentry)) |
2586 | d_rehash(new_dentry); | 2593 | d_rehash(new_dentry); |
@@ -2612,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, | |||
2612 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 2619 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); |
2613 | if (!error) { | 2620 | if (!error) { |
2614 | if (target) | 2621 | if (target) |
2615 | target->i_flags |= S_DEAD; | 2622 | dont_mount(new_dentry); |
2616 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | 2623 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
2617 | d_move(old_dentry, new_dentry); | 2624 | d_move(old_dentry, new_dentry); |
2618 | } | 2625 | } |
diff --git a/fs/namespace.c b/fs/namespace.c index 8174c8ab5c70..f20cb57d1067 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1432,7 +1432,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) | |||
1432 | 1432 | ||
1433 | err = -ENOENT; | 1433 | err = -ENOENT; |
1434 | mutex_lock(&path->dentry->d_inode->i_mutex); | 1434 | mutex_lock(&path->dentry->d_inode->i_mutex); |
1435 | if (IS_DEADDIR(path->dentry->d_inode)) | 1435 | if (cant_mount(path->dentry)) |
1436 | goto out_unlock; | 1436 | goto out_unlock; |
1437 | 1437 | ||
1438 | err = security_sb_check_sb(mnt, path); | 1438 | err = security_sb_check_sb(mnt, path); |
@@ -1623,7 +1623,7 @@ static int do_move_mount(struct path *path, char *old_name) | |||
1623 | 1623 | ||
1624 | err = -ENOENT; | 1624 | err = -ENOENT; |
1625 | mutex_lock(&path->dentry->d_inode->i_mutex); | 1625 | mutex_lock(&path->dentry->d_inode->i_mutex); |
1626 | if (IS_DEADDIR(path->dentry->d_inode)) | 1626 | if (cant_mount(path->dentry)) |
1627 | goto out1; | 1627 | goto out1; |
1628 | 1628 | ||
1629 | if (d_unlinked(path->dentry)) | 1629 | if (d_unlinked(path->dentry)) |
@@ -2234,7 +2234,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2234 | if (!check_mnt(root.mnt)) | 2234 | if (!check_mnt(root.mnt)) |
2235 | goto out2; | 2235 | goto out2; |
2236 | error = -ENOENT; | 2236 | error = -ENOENT; |
2237 | if (IS_DEADDIR(new.dentry->d_inode)) | 2237 | if (cant_mount(old.dentry)) |
2238 | goto out2; | 2238 | goto out2; |
2239 | if (d_unlinked(new.dentry)) | 2239 | if (d_unlinked(new.dentry)) |
2240 | goto out2; | 2240 | goto out2; |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index cf98da1be23e..fa3385154023 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
526 | sb->s_blocksize_bits = 10; | 526 | sb->s_blocksize_bits = 10; |
527 | sb->s_magic = NCP_SUPER_MAGIC; | 527 | sb->s_magic = NCP_SUPER_MAGIC; |
528 | sb->s_op = &ncp_sops; | 528 | sb->s_op = &ncp_sops; |
529 | sb->s_bdi = &server->bdi; | ||
529 | 530 | ||
530 | server = NCP_SBP(sb); | 531 | server = NCP_SBP(sb); |
531 | memset(server, 0, sizeof(*server)); | 532 | memset(server, 0, sizeof(*server)); |
532 | 533 | ||
534 | error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY); | ||
535 | if (error) | ||
536 | goto out_bdi; | ||
537 | |||
533 | server->ncp_filp = ncp_filp; | 538 | server->ncp_filp = ncp_filp; |
534 | server->ncp_sock = sock; | 539 | server->ncp_sock = sock; |
535 | 540 | ||
@@ -719,6 +724,8 @@ out_fput2: | |||
719 | if (server->info_filp) | 724 | if (server->info_filp) |
720 | fput(server->info_filp); | 725 | fput(server->info_filp); |
721 | out_fput: | 726 | out_fput: |
727 | bdi_destroy(&server->bdi); | ||
728 | out_bdi: | ||
722 | /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: | 729 | /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: |
723 | * | 730 | * |
724 | * The previously used put_filp(ncp_filp); was bogous, since | 731 | * The previously used put_filp(ncp_filp); was bogous, since |
@@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb) | |||
756 | kill_pid(server->m.wdog_pid, SIGTERM, 1); | 763 | kill_pid(server->m.wdog_pid, SIGTERM, 1); |
757 | put_pid(server->m.wdog_pid); | 764 | put_pid(server->m.wdog_pid); |
758 | 765 | ||
766 | bdi_destroy(&server->bdi); | ||
759 | kfree(server->priv.data); | 767 | kfree(server->priv.data); |
760 | kfree(server->auth.object_name); | 768 | kfree(server->auth.object_name); |
761 | vfree(server->rxbuf); | 769 | vfree(server->rxbuf); |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2a3d352c0bff..acc9c4943b84 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -966,6 +966,8 @@ out_error: | |||
966 | static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) | 966 | static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) |
967 | { | 967 | { |
968 | target->flags = source->flags; | 968 | target->flags = source->flags; |
969 | target->rsize = source->rsize; | ||
970 | target->wsize = source->wsize; | ||
969 | target->acregmin = source->acregmin; | 971 | target->acregmin = source->acregmin; |
970 | target->acregmax = source->acregmax; | 972 | target->acregmax = source->acregmax; |
971 | target->acdirmin = source->acdirmin; | 973 | target->acdirmin = source->acdirmin; |
@@ -1294,7 +1296,8 @@ static int nfs4_init_server(struct nfs_server *server, | |||
1294 | 1296 | ||
1295 | /* Initialise the client representation from the mount data */ | 1297 | /* Initialise the client representation from the mount data */ |
1296 | server->flags = data->flags; | 1298 | server->flags = data->flags; |
1297 | server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; | 1299 | server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| |
1300 | NFS_CAP_POSIX_LOCK; | ||
1298 | server->options = data->options; | 1301 | server->options = data->options; |
1299 | 1302 | ||
1300 | /* Get a client record */ | 1303 | /* Get a client record */ |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 15671245c6ee..ea61d26e7871 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -24,6 +24,8 @@ | |||
24 | 24 | ||
25 | static void nfs_do_free_delegation(struct nfs_delegation *delegation) | 25 | static void nfs_do_free_delegation(struct nfs_delegation *delegation) |
26 | { | 26 | { |
27 | if (delegation->cred) | ||
28 | put_rpccred(delegation->cred); | ||
27 | kfree(delegation); | 29 | kfree(delegation); |
28 | } | 30 | } |
29 | 31 | ||
@@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head) | |||
36 | 38 | ||
37 | static void nfs_free_delegation(struct nfs_delegation *delegation) | 39 | static void nfs_free_delegation(struct nfs_delegation *delegation) |
38 | { | 40 | { |
39 | struct rpc_cred *cred; | ||
40 | |||
41 | cred = rcu_dereference(delegation->cred); | ||
42 | rcu_assign_pointer(delegation->cred, NULL); | ||
43 | call_rcu(&delegation->rcu, nfs_free_delegation_callback); | 41 | call_rcu(&delegation->rcu, nfs_free_delegation_callback); |
44 | if (cred) | ||
45 | put_rpccred(cred); | ||
46 | } | 42 | } |
47 | 43 | ||
48 | void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) | 44 | void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) |
@@ -129,21 +125,35 @@ again: | |||
129 | */ | 125 | */ |
130 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) | 126 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) |
131 | { | 127 | { |
132 | struct nfs_delegation *delegation = NFS_I(inode)->delegation; | 128 | struct nfs_delegation *delegation; |
133 | struct rpc_cred *oldcred; | 129 | struct rpc_cred *oldcred = NULL; |
134 | 130 | ||
135 | if (delegation == NULL) | 131 | rcu_read_lock(); |
136 | return; | 132 | delegation = rcu_dereference(NFS_I(inode)->delegation); |
137 | memcpy(delegation->stateid.data, res->delegation.data, | 133 | if (delegation != NULL) { |
138 | sizeof(delegation->stateid.data)); | 134 | spin_lock(&delegation->lock); |
139 | delegation->type = res->delegation_type; | 135 | if (delegation->inode != NULL) { |
140 | delegation->maxsize = res->maxsize; | 136 | memcpy(delegation->stateid.data, res->delegation.data, |
141 | oldcred = delegation->cred; | 137 | sizeof(delegation->stateid.data)); |
142 | delegation->cred = get_rpccred(cred); | 138 | delegation->type = res->delegation_type; |
143 | clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); | 139 | delegation->maxsize = res->maxsize; |
144 | NFS_I(inode)->delegation_state = delegation->type; | 140 | oldcred = delegation->cred; |
145 | smp_wmb(); | 141 | delegation->cred = get_rpccred(cred); |
146 | put_rpccred(oldcred); | 142 | clear_bit(NFS_DELEGATION_NEED_RECLAIM, |
143 | &delegation->flags); | ||
144 | NFS_I(inode)->delegation_state = delegation->type; | ||
145 | spin_unlock(&delegation->lock); | ||
146 | put_rpccred(oldcred); | ||
147 | rcu_read_unlock(); | ||
148 | } else { | ||
149 | /* We appear to have raced with a delegation return. */ | ||
150 | spin_unlock(&delegation->lock); | ||
151 | rcu_read_unlock(); | ||
152 | nfs_inode_set_delegation(inode, cred, res); | ||
153 | } | ||
154 | } else { | ||
155 | rcu_read_unlock(); | ||
156 | } | ||
147 | } | 157 | } |
148 | 158 | ||
149 | static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) | 159 | static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) |
@@ -166,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation | |||
166 | return inode; | 176 | return inode; |
167 | } | 177 | } |
168 | 178 | ||
169 | static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) | 179 | static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, |
180 | const nfs4_stateid *stateid, | ||
181 | struct nfs_client *clp) | ||
170 | { | 182 | { |
171 | struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); | 183 | struct nfs_delegation *delegation = |
184 | rcu_dereference_protected(nfsi->delegation, | ||
185 | lockdep_is_held(&clp->cl_lock)); | ||
172 | 186 | ||
173 | if (delegation == NULL) | 187 | if (delegation == NULL) |
174 | goto nomatch; | 188 | goto nomatch; |
@@ -195,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
195 | { | 209 | { |
196 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | 210 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; |
197 | struct nfs_inode *nfsi = NFS_I(inode); | 211 | struct nfs_inode *nfsi = NFS_I(inode); |
198 | struct nfs_delegation *delegation; | 212 | struct nfs_delegation *delegation, *old_delegation; |
199 | struct nfs_delegation *freeme = NULL; | 213 | struct nfs_delegation *freeme = NULL; |
200 | int status = 0; | 214 | int status = 0; |
201 | 215 | ||
@@ -213,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
213 | spin_lock_init(&delegation->lock); | 227 | spin_lock_init(&delegation->lock); |
214 | 228 | ||
215 | spin_lock(&clp->cl_lock); | 229 | spin_lock(&clp->cl_lock); |
216 | if (rcu_dereference(nfsi->delegation) != NULL) { | 230 | old_delegation = rcu_dereference_protected(nfsi->delegation, |
217 | if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, | 231 | lockdep_is_held(&clp->cl_lock)); |
218 | sizeof(delegation->stateid)) == 0 && | 232 | if (old_delegation != NULL) { |
219 | delegation->type == nfsi->delegation->type) { | 233 | if (memcmp(&delegation->stateid, &old_delegation->stateid, |
234 | sizeof(old_delegation->stateid)) == 0 && | ||
235 | delegation->type == old_delegation->type) { | ||
220 | goto out; | 236 | goto out; |
221 | } | 237 | } |
222 | /* | 238 | /* |
@@ -226,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct | |||
226 | dfprintk(FILE, "%s: server %s handed out " | 242 | dfprintk(FILE, "%s: server %s handed out " |
227 | "a duplicate delegation!\n", | 243 | "a duplicate delegation!\n", |
228 | __func__, clp->cl_hostname); | 244 | __func__, clp->cl_hostname); |
229 | if (delegation->type <= nfsi->delegation->type) { | 245 | if (delegation->type <= old_delegation->type) { |
230 | freeme = delegation; | 246 | freeme = delegation; |
231 | delegation = NULL; | 247 | delegation = NULL; |
232 | goto out; | 248 | goto out; |
233 | } | 249 | } |
234 | freeme = nfs_detach_delegation_locked(nfsi, NULL); | 250 | freeme = nfs_detach_delegation_locked(nfsi, NULL, clp); |
235 | } | 251 | } |
236 | list_add_rcu(&delegation->super_list, &clp->cl_delegations); | 252 | list_add_rcu(&delegation->super_list, &clp->cl_delegations); |
237 | nfsi->delegation_state = delegation->type; | 253 | nfsi->delegation_state = delegation->type; |
@@ -301,7 +317,7 @@ restart: | |||
301 | if (inode == NULL) | 317 | if (inode == NULL) |
302 | continue; | 318 | continue; |
303 | spin_lock(&clp->cl_lock); | 319 | spin_lock(&clp->cl_lock); |
304 | delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); | 320 | delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); |
305 | spin_unlock(&clp->cl_lock); | 321 | spin_unlock(&clp->cl_lock); |
306 | rcu_read_unlock(); | 322 | rcu_read_unlock(); |
307 | if (delegation != NULL) { | 323 | if (delegation != NULL) { |
@@ -330,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) | |||
330 | struct nfs_inode *nfsi = NFS_I(inode); | 346 | struct nfs_inode *nfsi = NFS_I(inode); |
331 | struct nfs_delegation *delegation; | 347 | struct nfs_delegation *delegation; |
332 | 348 | ||
333 | if (rcu_dereference(nfsi->delegation) != NULL) { | 349 | if (rcu_access_pointer(nfsi->delegation) != NULL) { |
334 | spin_lock(&clp->cl_lock); | 350 | spin_lock(&clp->cl_lock); |
335 | delegation = nfs_detach_delegation_locked(nfsi, NULL); | 351 | delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); |
336 | spin_unlock(&clp->cl_lock); | 352 | spin_unlock(&clp->cl_lock); |
337 | if (delegation != NULL) | 353 | if (delegation != NULL) |
338 | nfs_do_return_delegation(inode, delegation, 0); | 354 | nfs_do_return_delegation(inode, delegation, 0); |
@@ -346,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode) | |||
346 | struct nfs_delegation *delegation; | 362 | struct nfs_delegation *delegation; |
347 | int err = 0; | 363 | int err = 0; |
348 | 364 | ||
349 | if (rcu_dereference(nfsi->delegation) != NULL) { | 365 | if (rcu_access_pointer(nfsi->delegation) != NULL) { |
350 | spin_lock(&clp->cl_lock); | 366 | spin_lock(&clp->cl_lock); |
351 | delegation = nfs_detach_delegation_locked(nfsi, NULL); | 367 | delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); |
352 | spin_unlock(&clp->cl_lock); | 368 | spin_unlock(&clp->cl_lock); |
353 | if (delegation != NULL) { | 369 | if (delegation != NULL) { |
354 | nfs_msync_inode(inode); | 370 | nfs_msync_inode(inode); |
@@ -526,7 +542,7 @@ restart: | |||
526 | if (inode == NULL) | 542 | if (inode == NULL) |
527 | continue; | 543 | continue; |
528 | spin_lock(&clp->cl_lock); | 544 | spin_lock(&clp->cl_lock); |
529 | delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); | 545 | delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); |
530 | spin_unlock(&clp->cl_lock); | 546 | spin_unlock(&clp->cl_lock); |
531 | rcu_read_unlock(); | 547 | rcu_read_unlock(); |
532 | if (delegation != NULL) | 548 | if (delegation != NULL) |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c6f2750648f4..a7bb5c694aa3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -837,6 +837,8 @@ out_zap_parent: | |||
837 | /* If we have submounts, don't unhash ! */ | 837 | /* If we have submounts, don't unhash ! */ |
838 | if (have_submounts(dentry)) | 838 | if (have_submounts(dentry)) |
839 | goto out_valid; | 839 | goto out_valid; |
840 | if (dentry->d_flags & DCACHE_DISCONNECTED) | ||
841 | goto out_valid; | ||
840 | shrink_dcache_parent(dentry); | 842 | shrink_dcache_parent(dentry); |
841 | } | 843 | } |
842 | d_drop(dentry); | 844 | d_drop(dentry); |
@@ -1025,12 +1027,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1025 | res = NULL; | 1027 | res = NULL; |
1026 | goto out; | 1028 | goto out; |
1027 | /* This turned out not to be a regular file */ | 1029 | /* This turned out not to be a regular file */ |
1030 | case -EISDIR: | ||
1028 | case -ENOTDIR: | 1031 | case -ENOTDIR: |
1029 | goto no_open; | 1032 | goto no_open; |
1030 | case -ELOOP: | 1033 | case -ELOOP: |
1031 | if (!(nd->intent.open.flags & O_NOFOLLOW)) | 1034 | if (!(nd->intent.open.flags & O_NOFOLLOW)) |
1032 | goto no_open; | 1035 | goto no_open; |
1033 | /* case -EISDIR: */ | ||
1034 | /* case -EINVAL: */ | 1036 | /* case -EINVAL: */ |
1035 | default: | 1037 | default: |
1036 | goto out; | 1038 | goto out; |
@@ -1050,7 +1052,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1050 | struct inode *dir; | 1052 | struct inode *dir; |
1051 | int openflags, ret = 0; | 1053 | int openflags, ret = 0; |
1052 | 1054 | ||
1053 | if (!is_atomic_open(nd)) | 1055 | if (!is_atomic_open(nd) || d_mountpoint(dentry)) |
1054 | goto no_open; | 1056 | goto no_open; |
1055 | parent = dget_parent(dentry); | 1057 | parent = dget_parent(dentry); |
1056 | dir = parent->d_inode; | 1058 | dir = parent->d_inode; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 737128f777f3..50a56edca0b5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -623,10 +623,10 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c | |||
623 | list_for_each_entry(pos, &nfsi->open_files, list) { | 623 | list_for_each_entry(pos, &nfsi->open_files, list) { |
624 | if (cred != NULL && pos->cred != cred) | 624 | if (cred != NULL && pos->cred != cred) |
625 | continue; | 625 | continue; |
626 | if ((pos->mode & mode) == mode) { | 626 | if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) |
627 | ctx = get_nfs_open_context(pos); | 627 | continue; |
628 | break; | 628 | ctx = get_nfs_open_context(pos); |
629 | } | 629 | break; |
630 | } | 630 | } |
631 | spin_unlock(&inode->i_lock); | 631 | spin_unlock(&inode->i_lock); |
632 | return ctx; | 632 | return ctx; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fe0cd9eb1d4d..071fcedd517c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -1523,6 +1523,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
1523 | nfs_post_op_update_inode(dir, o_res->dir_attr); | 1523 | nfs_post_op_update_inode(dir, o_res->dir_attr); |
1524 | } else | 1524 | } else |
1525 | nfs_refresh_inode(dir, o_res->dir_attr); | 1525 | nfs_refresh_inode(dir, o_res->dir_attr); |
1526 | if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) | ||
1527 | server->caps &= ~NFS_CAP_POSIX_LOCK; | ||
1526 | if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { | 1528 | if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { |
1527 | status = _nfs4_proc_open_confirm(data); | 1529 | status = _nfs4_proc_open_confirm(data); |
1528 | if (status != 0) | 1530 | if (status != 0) |
@@ -1664,7 +1666,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in | |||
1664 | status = PTR_ERR(state); | 1666 | status = PTR_ERR(state); |
1665 | if (IS_ERR(state)) | 1667 | if (IS_ERR(state)) |
1666 | goto err_opendata_put; | 1668 | goto err_opendata_put; |
1667 | if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) | 1669 | if (server->caps & NFS_CAP_POSIX_LOCK) |
1668 | set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); | 1670 | set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); |
1669 | nfs4_opendata_put(opendata); | 1671 | nfs4_opendata_put(opendata); |
1670 | nfs4_put_state_owner(sp); | 1672 | nfs4_put_state_owner(sp); |
@@ -5216,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) | |||
5216 | msg.rpc_resp = &calldata->res; | 5218 | msg.rpc_resp = &calldata->res; |
5217 | task_setup_data.callback_data = calldata; | 5219 | task_setup_data.callback_data = calldata; |
5218 | task = rpc_run_task(&task_setup_data); | 5220 | task = rpc_run_task(&task_setup_data); |
5219 | if (IS_ERR(task)) | 5221 | if (IS_ERR(task)) { |
5220 | status = PTR_ERR(task); | 5222 | status = PTR_ERR(task); |
5223 | goto out; | ||
5224 | } | ||
5221 | rpc_put_task(task); | 5225 | rpc_put_task(task); |
5226 | return 0; | ||
5222 | out: | 5227 | out: |
5223 | dprintk("<-- %s status=%d\n", __func__, status); | 5228 | dprintk("<-- %s status=%d\n", __func__, status); |
5224 | return status; | 5229 | return status; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e01637240eeb..b4148fc00f9f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2187,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
2187 | if (data->version == 4) { | 2187 | if (data->version == 4) { |
2188 | error = nfs4_try_mount(flags, dev_name, data, mnt); | 2188 | error = nfs4_try_mount(flags, dev_name, data, mnt); |
2189 | kfree(data->client_address); | 2189 | kfree(data->client_address); |
2190 | kfree(data->nfs_server.export_path); | ||
2190 | goto out; | 2191 | goto out; |
2191 | } | 2192 | } |
2192 | #endif /* CONFIG_NFS_V4 */ | 2193 | #endif /* CONFIG_NFS_V4 */ |
@@ -2657,7 +2658,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt) | |||
2657 | devname = nfs_path(path->mnt->mnt_devname, | 2658 | devname = nfs_path(path->mnt->mnt_devname, |
2658 | path->mnt->mnt_root, path->dentry, | 2659 | path->mnt->mnt_root, path->dentry, |
2659 | page, PAGE_SIZE); | 2660 | page, PAGE_SIZE); |
2660 | if (devname == NULL) | 2661 | if (IS_ERR(devname)) |
2661 | goto out_freepage; | 2662 | goto out_freepage; |
2662 | tmp = kstrdup(devname, GFP_KERNEL); | 2663 | tmp = kstrdup(devname, GFP_KERNEL); |
2663 | if (tmp == NULL) | 2664 | if (tmp == NULL) |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 53ff70e23993..3aea3ca98ab7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -201,6 +201,7 @@ static int nfs_set_page_writeback(struct page *page) | |||
201 | struct inode *inode = page->mapping->host; | 201 | struct inode *inode = page->mapping->host; |
202 | struct nfs_server *nfss = NFS_SERVER(inode); | 202 | struct nfs_server *nfss = NFS_SERVER(inode); |
203 | 203 | ||
204 | page_cache_get(page); | ||
204 | if (atomic_long_inc_return(&nfss->writeback) > | 205 | if (atomic_long_inc_return(&nfss->writeback) > |
205 | NFS_CONGESTION_ON_THRESH) { | 206 | NFS_CONGESTION_ON_THRESH) { |
206 | set_bdi_congested(&nfss->backing_dev_info, | 207 | set_bdi_congested(&nfss->backing_dev_info, |
@@ -216,6 +217,7 @@ static void nfs_end_page_writeback(struct page *page) | |||
216 | struct nfs_server *nfss = NFS_SERVER(inode); | 217 | struct nfs_server *nfss = NFS_SERVER(inode); |
217 | 218 | ||
218 | end_page_writeback(page); | 219 | end_page_writeback(page); |
220 | page_cache_release(page); | ||
219 | if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) | 221 | if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) |
220 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | 222 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); |
221 | } | 223 | } |
@@ -421,6 +423,7 @@ static void | |||
421 | nfs_mark_request_dirty(struct nfs_page *req) | 423 | nfs_mark_request_dirty(struct nfs_page *req) |
422 | { | 424 | { |
423 | __set_page_dirty_nobuffers(req->wb_page); | 425 | __set_page_dirty_nobuffers(req->wb_page); |
426 | __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC); | ||
424 | } | 427 | } |
425 | 428 | ||
426 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 429 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
@@ -660,9 +663,11 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, | |||
660 | req = nfs_setup_write_request(ctx, page, offset, count); | 663 | req = nfs_setup_write_request(ctx, page, offset, count); |
661 | if (IS_ERR(req)) | 664 | if (IS_ERR(req)) |
662 | return PTR_ERR(req); | 665 | return PTR_ERR(req); |
666 | nfs_mark_request_dirty(req); | ||
663 | /* Update file length */ | 667 | /* Update file length */ |
664 | nfs_grow_file(page, offset, count); | 668 | nfs_grow_file(page, offset, count); |
665 | nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); | 669 | nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); |
670 | nfs_mark_request_dirty(req); | ||
666 | nfs_clear_page_tag_locked(req); | 671 | nfs_clear_page_tag_locked(req); |
667 | return 0; | 672 | return 0; |
668 | } | 673 | } |
@@ -739,8 +744,6 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
739 | status = nfs_writepage_setup(ctx, page, offset, count); | 744 | status = nfs_writepage_setup(ctx, page, offset, count); |
740 | if (status < 0) | 745 | if (status < 0) |
741 | nfs_set_pageerror(page); | 746 | nfs_set_pageerror(page); |
742 | else | ||
743 | __set_page_dirty_nobuffers(page); | ||
744 | 747 | ||
745 | dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", | 748 | dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", |
746 | status, (long long)i_size_read(inode)); | 749 | status, (long long)i_size_read(inode)); |
@@ -749,13 +752,12 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
749 | 752 | ||
750 | static void nfs_writepage_release(struct nfs_page *req) | 753 | static void nfs_writepage_release(struct nfs_page *req) |
751 | { | 754 | { |
755 | struct page *page = req->wb_page; | ||
752 | 756 | ||
753 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { | 757 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) |
754 | nfs_end_page_writeback(req->wb_page); | ||
755 | nfs_inode_remove_request(req); | 758 | nfs_inode_remove_request(req); |
756 | } else | ||
757 | nfs_end_page_writeback(req->wb_page); | ||
758 | nfs_clear_page_tag_locked(req); | 759 | nfs_clear_page_tag_locked(req); |
760 | nfs_end_page_writeback(page); | ||
759 | } | 761 | } |
760 | 762 | ||
761 | static int flush_task_priority(int how) | 763 | static int flush_task_priority(int how) |
@@ -779,7 +781,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
779 | int how) | 781 | int how) |
780 | { | 782 | { |
781 | struct inode *inode = req->wb_context->path.dentry->d_inode; | 783 | struct inode *inode = req->wb_context->path.dentry->d_inode; |
782 | int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; | ||
783 | int priority = flush_task_priority(how); | 784 | int priority = flush_task_priority(how); |
784 | struct rpc_task *task; | 785 | struct rpc_task *task; |
785 | struct rpc_message msg = { | 786 | struct rpc_message msg = { |
@@ -794,9 +795,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
794 | .callback_ops = call_ops, | 795 | .callback_ops = call_ops, |
795 | .callback_data = data, | 796 | .callback_data = data, |
796 | .workqueue = nfsiod_workqueue, | 797 | .workqueue = nfsiod_workqueue, |
797 | .flags = flags, | 798 | .flags = RPC_TASK_ASYNC, |
798 | .priority = priority, | 799 | .priority = priority, |
799 | }; | 800 | }; |
801 | int ret = 0; | ||
800 | 802 | ||
801 | /* Set up the RPC argument and reply structs | 803 | /* Set up the RPC argument and reply structs |
802 | * NB: take care not to mess about with data->commit et al. */ | 804 | * NB: take care not to mess about with data->commit et al. */ |
@@ -835,10 +837,18 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
835 | (unsigned long long)data->args.offset); | 837 | (unsigned long long)data->args.offset); |
836 | 838 | ||
837 | task = rpc_run_task(&task_setup_data); | 839 | task = rpc_run_task(&task_setup_data); |
838 | if (IS_ERR(task)) | 840 | if (IS_ERR(task)) { |
839 | return PTR_ERR(task); | 841 | ret = PTR_ERR(task); |
842 | goto out; | ||
843 | } | ||
844 | if (how & FLUSH_SYNC) { | ||
845 | ret = rpc_wait_for_completion_task(task); | ||
846 | if (ret == 0) | ||
847 | ret = task->tk_status; | ||
848 | } | ||
840 | rpc_put_task(task); | 849 | rpc_put_task(task); |
841 | return 0; | 850 | out: |
851 | return ret; | ||
842 | } | 852 | } |
843 | 853 | ||
844 | /* If a nfs_flush_* function fails, it should remove reqs from @head and | 854 | /* If a nfs_flush_* function fails, it should remove reqs from @head and |
@@ -847,9 +857,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
847 | */ | 857 | */ |
848 | static void nfs_redirty_request(struct nfs_page *req) | 858 | static void nfs_redirty_request(struct nfs_page *req) |
849 | { | 859 | { |
860 | struct page *page = req->wb_page; | ||
861 | |||
850 | nfs_mark_request_dirty(req); | 862 | nfs_mark_request_dirty(req); |
851 | nfs_end_page_writeback(req->wb_page); | ||
852 | nfs_clear_page_tag_locked(req); | 863 | nfs_clear_page_tag_locked(req); |
864 | nfs_end_page_writeback(page); | ||
853 | } | 865 | } |
854 | 866 | ||
855 | /* | 867 | /* |
@@ -1084,16 +1096,15 @@ static void nfs_writeback_release_full(void *calldata) | |||
1084 | if (nfs_write_need_commit(data)) { | 1096 | if (nfs_write_need_commit(data)) { |
1085 | memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); | 1097 | memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); |
1086 | nfs_mark_request_commit(req); | 1098 | nfs_mark_request_commit(req); |
1087 | nfs_end_page_writeback(page); | ||
1088 | dprintk(" marked for commit\n"); | 1099 | dprintk(" marked for commit\n"); |
1089 | goto next; | 1100 | goto next; |
1090 | } | 1101 | } |
1091 | dprintk(" OK\n"); | 1102 | dprintk(" OK\n"); |
1092 | remove_request: | 1103 | remove_request: |
1093 | nfs_end_page_writeback(page); | ||
1094 | nfs_inode_remove_request(req); | 1104 | nfs_inode_remove_request(req); |
1095 | next: | 1105 | next: |
1096 | nfs_clear_page_tag_locked(req); | 1106 | nfs_clear_page_tag_locked(req); |
1107 | nfs_end_page_writeback(page); | ||
1097 | } | 1108 | } |
1098 | nfs_writedata_release(calldata); | 1109 | nfs_writedata_release(calldata); |
1099 | } | 1110 | } |
@@ -1190,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1190 | 1201 | ||
1191 | 1202 | ||
1192 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 1203 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
1204 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) | ||
1205 | { | ||
1206 | if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) | ||
1207 | return 1; | ||
1208 | if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, | ||
1209 | NFS_INO_COMMIT, nfs_wait_bit_killable, | ||
1210 | TASK_KILLABLE)) | ||
1211 | return 1; | ||
1212 | return 0; | ||
1213 | } | ||
1214 | |||
1215 | static void nfs_commit_clear_lock(struct nfs_inode *nfsi) | ||
1216 | { | ||
1217 | clear_bit(NFS_INO_COMMIT, &nfsi->flags); | ||
1218 | smp_mb__after_clear_bit(); | ||
1219 | wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); | ||
1220 | } | ||
1221 | |||
1222 | |||
1193 | static void nfs_commitdata_release(void *data) | 1223 | static void nfs_commitdata_release(void *data) |
1194 | { | 1224 | { |
1195 | struct nfs_write_data *wdata = data; | 1225 | struct nfs_write_data *wdata = data; |
@@ -1207,7 +1237,6 @@ static int nfs_commit_rpcsetup(struct list_head *head, | |||
1207 | { | 1237 | { |
1208 | struct nfs_page *first = nfs_list_entry(head->next); | 1238 | struct nfs_page *first = nfs_list_entry(head->next); |
1209 | struct inode *inode = first->wb_context->path.dentry->d_inode; | 1239 | struct inode *inode = first->wb_context->path.dentry->d_inode; |
1210 | int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; | ||
1211 | int priority = flush_task_priority(how); | 1240 | int priority = flush_task_priority(how); |
1212 | struct rpc_task *task; | 1241 | struct rpc_task *task; |
1213 | struct rpc_message msg = { | 1242 | struct rpc_message msg = { |
@@ -1222,7 +1251,7 @@ static int nfs_commit_rpcsetup(struct list_head *head, | |||
1222 | .callback_ops = &nfs_commit_ops, | 1251 | .callback_ops = &nfs_commit_ops, |
1223 | .callback_data = data, | 1252 | .callback_data = data, |
1224 | .workqueue = nfsiod_workqueue, | 1253 | .workqueue = nfsiod_workqueue, |
1225 | .flags = flags, | 1254 | .flags = RPC_TASK_ASYNC, |
1226 | .priority = priority, | 1255 | .priority = priority, |
1227 | }; | 1256 | }; |
1228 | 1257 | ||
@@ -1282,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) | |||
1282 | BDI_RECLAIMABLE); | 1311 | BDI_RECLAIMABLE); |
1283 | nfs_clear_page_tag_locked(req); | 1312 | nfs_clear_page_tag_locked(req); |
1284 | } | 1313 | } |
1314 | nfs_commit_clear_lock(NFS_I(inode)); | ||
1285 | return -ENOMEM; | 1315 | return -ENOMEM; |
1286 | } | 1316 | } |
1287 | 1317 | ||
@@ -1337,6 +1367,7 @@ static void nfs_commit_release(void *calldata) | |||
1337 | next: | 1367 | next: |
1338 | nfs_clear_page_tag_locked(req); | 1368 | nfs_clear_page_tag_locked(req); |
1339 | } | 1369 | } |
1370 | nfs_commit_clear_lock(NFS_I(data->inode)); | ||
1340 | nfs_commitdata_release(calldata); | 1371 | nfs_commitdata_release(calldata); |
1341 | } | 1372 | } |
1342 | 1373 | ||
@@ -1351,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = { | |||
1351 | static int nfs_commit_inode(struct inode *inode, int how) | 1382 | static int nfs_commit_inode(struct inode *inode, int how) |
1352 | { | 1383 | { |
1353 | LIST_HEAD(head); | 1384 | LIST_HEAD(head); |
1354 | int res; | 1385 | int may_wait = how & FLUSH_SYNC; |
1386 | int res = 0; | ||
1355 | 1387 | ||
1388 | if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) | ||
1389 | goto out; | ||
1356 | spin_lock(&inode->i_lock); | 1390 | spin_lock(&inode->i_lock); |
1357 | res = nfs_scan_commit(inode, &head, 0, 0); | 1391 | res = nfs_scan_commit(inode, &head, 0, 0); |
1358 | spin_unlock(&inode->i_lock); | 1392 | spin_unlock(&inode->i_lock); |
@@ -1360,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how) | |||
1360 | int error = nfs_commit_list(inode, &head, how); | 1394 | int error = nfs_commit_list(inode, &head, how); |
1361 | if (error < 0) | 1395 | if (error < 0) |
1362 | return error; | 1396 | return error; |
1363 | } | 1397 | if (may_wait) |
1398 | wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, | ||
1399 | nfs_wait_bit_killable, | ||
1400 | TASK_KILLABLE); | ||
1401 | } else | ||
1402 | nfs_commit_clear_lock(NFS_I(inode)); | ||
1403 | out: | ||
1364 | return res; | 1404 | return res; |
1365 | } | 1405 | } |
1366 | 1406 | ||
@@ -1432,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) | |||
1432 | 1472 | ||
1433 | BUG_ON(!PageLocked(page)); | 1473 | BUG_ON(!PageLocked(page)); |
1434 | for (;;) { | 1474 | for (;;) { |
1475 | wait_on_page_writeback(page); | ||
1435 | req = nfs_page_find_request(page); | 1476 | req = nfs_page_find_request(page); |
1436 | if (req == NULL) | 1477 | if (req == NULL) |
1437 | break; | 1478 | break; |
@@ -1466,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page) | |||
1466 | .range_start = range_start, | 1507 | .range_start = range_start, |
1467 | .range_end = range_end, | 1508 | .range_end = range_end, |
1468 | }; | 1509 | }; |
1469 | struct nfs_page *req; | ||
1470 | int need_commit; | ||
1471 | int ret; | 1510 | int ret; |
1472 | 1511 | ||
1473 | while(PagePrivate(page)) { | 1512 | while(PagePrivate(page)) { |
1513 | wait_on_page_writeback(page); | ||
1474 | if (clear_page_dirty_for_io(page)) { | 1514 | if (clear_page_dirty_for_io(page)) { |
1475 | ret = nfs_writepage_locked(page, &wbc); | 1515 | ret = nfs_writepage_locked(page, &wbc); |
1476 | if (ret < 0) | 1516 | if (ret < 0) |
1477 | goto out_error; | 1517 | goto out_error; |
1478 | } | 1518 | } |
1479 | req = nfs_find_and_lock_request(page); | 1519 | ret = sync_inode(inode, &wbc); |
1480 | if (!req) | 1520 | if (ret < 0) |
1481 | break; | ||
1482 | if (IS_ERR(req)) { | ||
1483 | ret = PTR_ERR(req); | ||
1484 | goto out_error; | 1521 | goto out_error; |
1485 | } | ||
1486 | need_commit = test_bit(PG_CLEAN, &req->wb_flags); | ||
1487 | nfs_clear_page_tag_locked(req); | ||
1488 | if (need_commit) { | ||
1489 | ret = nfs_commit_inode(inode, FLUSH_SYNC); | ||
1490 | if (ret < 0) | ||
1491 | goto out_error; | ||
1492 | } | ||
1493 | } | 1522 | } |
1494 | return 0; | 1523 | return 0; |
1495 | out_error: | 1524 | out_error: |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e1703175ee28..34ccf815ea8a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -161,10 +161,10 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) | |||
161 | argp->p = page_address(argp->pagelist[0]); | 161 | argp->p = page_address(argp->pagelist[0]); |
162 | argp->pagelist++; | 162 | argp->pagelist++; |
163 | if (argp->pagelen < PAGE_SIZE) { | 163 | if (argp->pagelen < PAGE_SIZE) { |
164 | argp->end = p + (argp->pagelen>>2); | 164 | argp->end = argp->p + (argp->pagelen>>2); |
165 | argp->pagelen = 0; | 165 | argp->pagelen = 0; |
166 | } else { | 166 | } else { |
167 | argp->end = p + (PAGE_SIZE>>2); | 167 | argp->end = argp->p + (PAGE_SIZE>>2); |
168 | argp->pagelen -= PAGE_SIZE; | 168 | argp->pagelen -= PAGE_SIZE; |
169 | } | 169 | } |
170 | memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); | 170 | memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); |
@@ -1426,10 +1426,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
1426 | argp->p = page_address(argp->pagelist[0]); | 1426 | argp->p = page_address(argp->pagelist[0]); |
1427 | argp->pagelist++; | 1427 | argp->pagelist++; |
1428 | if (argp->pagelen < PAGE_SIZE) { | 1428 | if (argp->pagelen < PAGE_SIZE) { |
1429 | argp->end = p + (argp->pagelen>>2); | 1429 | argp->end = argp->p + (argp->pagelen>>2); |
1430 | argp->pagelen = 0; | 1430 | argp->pagelen = 0; |
1431 | } else { | 1431 | } else { |
1432 | argp->end = p + (PAGE_SIZE>>2); | 1432 | argp->end = argp->p + (PAGE_SIZE>>2); |
1433 | argp->pagelen -= PAGE_SIZE; | 1433 | argp->pagelen -= PAGE_SIZE; |
1434 | } | 1434 | } |
1435 | } | 1435 | } |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 0cdbc5e7655a..48145f505a6a 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -749,6 +749,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, | |||
749 | sb->s_export_op = &nilfs_export_ops; | 749 | sb->s_export_op = &nilfs_export_ops; |
750 | sb->s_root = NULL; | 750 | sb->s_root = NULL; |
751 | sb->s_time_gran = 1; | 751 | sb->s_time_gran = 1; |
752 | sb->s_bdi = nilfs->ns_bdi; | ||
752 | 753 | ||
753 | err = load_nilfs(nilfs, sbi); | 754 | err = load_nilfs(nilfs, sbi); |
754 | if (err) | 755 | if (err) |
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig index 3e56dbffe729..b3a159b21cfd 100644 --- a/fs/notify/inotify/Kconfig +++ b/fs/notify/inotify/Kconfig | |||
@@ -15,6 +15,7 @@ config INOTIFY | |||
15 | 15 | ||
16 | config INOTIFY_USER | 16 | config INOTIFY_USER |
17 | bool "Inotify support for userspace" | 17 | bool "Inotify support for userspace" |
18 | select ANON_INODES | ||
18 | select FSNOTIFY | 19 | select FSNOTIFY |
19 | default y | 20 | default y |
20 | ---help--- | 21 | ---help--- |
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 1afb0a10229f..e27960cd76ab 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/path.h> /* struct path */ | 28 | #include <linux/path.h> /* struct path */ |
29 | #include <linux/slab.h> /* kmem_* */ | 29 | #include <linux/slab.h> /* kmem_* */ |
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/sched.h> | ||
31 | 32 | ||
32 | #include "inotify.h" | 33 | #include "inotify.h" |
33 | 34 | ||
@@ -146,6 +147,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group) | |||
146 | idr_for_each(&group->inotify_data.idr, idr_callback, group); | 147 | idr_for_each(&group->inotify_data.idr, idr_callback, group); |
147 | idr_remove_all(&group->inotify_data.idr); | 148 | idr_remove_all(&group->inotify_data.idr); |
148 | idr_destroy(&group->inotify_data.idr); | 149 | idr_destroy(&group->inotify_data.idr); |
150 | free_uid(group->inotify_data.user); | ||
149 | } | 151 | } |
150 | 152 | ||
151 | void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) | 153 | void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 472cdf29ef82..e46ca685b9be 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -546,21 +546,24 @@ retry: | |||
546 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) | 546 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) |
547 | goto out_err; | 547 | goto out_err; |
548 | 548 | ||
549 | /* we are putting the mark on the idr, take a reference */ | ||
550 | fsnotify_get_mark(&tmp_ientry->fsn_entry); | ||
551 | |||
549 | spin_lock(&group->inotify_data.idr_lock); | 552 | spin_lock(&group->inotify_data.idr_lock); |
550 | ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, | 553 | ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, |
551 | group->inotify_data.last_wd+1, | 554 | group->inotify_data.last_wd+1, |
552 | &tmp_ientry->wd); | 555 | &tmp_ientry->wd); |
553 | spin_unlock(&group->inotify_data.idr_lock); | 556 | spin_unlock(&group->inotify_data.idr_lock); |
554 | if (ret) { | 557 | if (ret) { |
558 | /* we didn't get on the idr, drop the idr reference */ | ||
559 | fsnotify_put_mark(&tmp_ientry->fsn_entry); | ||
560 | |||
555 | /* idr was out of memory allocate and try again */ | 561 | /* idr was out of memory allocate and try again */ |
556 | if (ret == -EAGAIN) | 562 | if (ret == -EAGAIN) |
557 | goto retry; | 563 | goto retry; |
558 | goto out_err; | 564 | goto out_err; |
559 | } | 565 | } |
560 | 566 | ||
561 | /* we put the mark on the idr, take a reference */ | ||
562 | fsnotify_get_mark(&tmp_ientry->fsn_entry); | ||
563 | |||
564 | /* we are on the idr, now get on the inode */ | 567 | /* we are on the idr, now get on the inode */ |
565 | ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); | 568 | ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); |
566 | if (ret) { | 569 | if (ret) { |
@@ -578,16 +581,13 @@ retry: | |||
578 | /* return the watch descriptor for this new entry */ | 581 | /* return the watch descriptor for this new entry */ |
579 | ret = tmp_ientry->wd; | 582 | ret = tmp_ientry->wd; |
580 | 583 | ||
581 | /* match the ref from fsnotify_init_markentry() */ | ||
582 | fsnotify_put_mark(&tmp_ientry->fsn_entry); | ||
583 | |||
584 | /* if this mark added a new event update the group mask */ | 584 | /* if this mark added a new event update the group mask */ |
585 | if (mask & ~group->mask) | 585 | if (mask & ~group->mask) |
586 | fsnotify_recalc_group_mask(group); | 586 | fsnotify_recalc_group_mask(group); |
587 | 587 | ||
588 | out_err: | 588 | out_err: |
589 | if (ret < 0) | 589 | /* match the ref from fsnotify_init_markentry() */ |
590 | kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); | 590 | fsnotify_put_mark(&tmp_ientry->fsn_entry); |
591 | 591 | ||
592 | return ret; | 592 | return ret; |
593 | } | 593 | } |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index ecebb2276790..f9d5d3ffc75a 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -406,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | |||
406 | struct buffer_head *bh) | 406 | struct buffer_head *bh) |
407 | { | 407 | { |
408 | int ret = 0; | 408 | int ret = 0; |
409 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; | ||
409 | 410 | ||
410 | mlog_entry_void(); | 411 | mlog_entry_void(); |
411 | 412 | ||
@@ -425,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | |||
425 | 426 | ||
426 | get_bh(bh); /* for end_buffer_write_sync() */ | 427 | get_bh(bh); /* for end_buffer_write_sync() */ |
427 | bh->b_end_io = end_buffer_write_sync; | 428 | bh->b_end_io = end_buffer_write_sync; |
429 | ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check); | ||
428 | submit_bh(WRITE, bh); | 430 | submit_bh(WRITE, bh); |
429 | 431 | ||
430 | wait_on_buffer(bh); | 432 | wait_on_buffer(bh); |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index a795eb91f4ea..12d5eb78a11a 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -184,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
184 | BUG_ON(!lksb); | 184 | BUG_ON(!lksb); |
185 | 185 | ||
186 | /* only updates if this node masters the lockres */ | 186 | /* only updates if this node masters the lockres */ |
187 | spin_lock(&res->spinlock); | ||
187 | if (res->owner == dlm->node_num) { | 188 | if (res->owner == dlm->node_num) { |
188 | |||
189 | spin_lock(&res->spinlock); | ||
190 | /* check the lksb flags for the direction */ | 189 | /* check the lksb flags for the direction */ |
191 | if (lksb->flags & DLM_LKSB_GET_LVB) { | 190 | if (lksb->flags & DLM_LKSB_GET_LVB) { |
192 | mlog(0, "getting lvb from lockres for %s node\n", | 191 | mlog(0, "getting lvb from lockres for %s node\n", |
@@ -201,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
201 | * here. In the future we might want to clear it at the time | 200 | * here. In the future we might want to clear it at the time |
202 | * the put is actually done. | 201 | * the put is actually done. |
203 | */ | 202 | */ |
204 | spin_unlock(&res->spinlock); | ||
205 | } | 203 | } |
204 | spin_unlock(&res->spinlock); | ||
206 | 205 | ||
207 | /* reset any lvb flags on the lksb */ | 206 | /* reset any lvb flags on the lksb */ |
208 | lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); | 207 | lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 1b0de157a08c..b83d6107a1f5 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); | |||
112 | * O_RDONLY -> PRMODE level | 112 | * O_RDONLY -> PRMODE level |
113 | * O_WRONLY -> EXMODE level | 113 | * O_WRONLY -> EXMODE level |
114 | * | 114 | * |
115 | * O_NONBLOCK -> LKM_NOQUEUE | 115 | * O_NONBLOCK -> NOQUEUE |
116 | */ | 116 | */ |
117 | static int dlmfs_decode_open_flags(int open_flags, | 117 | static int dlmfs_decode_open_flags(int open_flags, |
118 | int *level, | 118 | int *level, |
119 | int *flags) | 119 | int *flags) |
120 | { | 120 | { |
121 | if (open_flags & (O_WRONLY|O_RDWR)) | 121 | if (open_flags & (O_WRONLY|O_RDWR)) |
122 | *level = LKM_EXMODE; | 122 | *level = DLM_LOCK_EX; |
123 | else | 123 | else |
124 | *level = LKM_PRMODE; | 124 | *level = DLM_LOCK_PR; |
125 | 125 | ||
126 | *flags = 0; | 126 | *flags = 0; |
127 | if (open_flags & O_NONBLOCK) | 127 | if (open_flags & O_NONBLOCK) |
128 | *flags |= LKM_NOQUEUE; | 128 | *flags |= DLM_LKF_NOQUEUE; |
129 | 129 | ||
130 | return 0; | 130 | return 0; |
131 | } | 131 | } |
@@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode, | |||
166 | * to be able userspace to be able to distinguish a | 166 | * to be able userspace to be able to distinguish a |
167 | * valid lock request from one that simply couldn't be | 167 | * valid lock request from one that simply couldn't be |
168 | * granted. */ | 168 | * granted. */ |
169 | if (flags & LKM_NOQUEUE && status == -EAGAIN) | 169 | if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN) |
170 | status = -ETXTBSY; | 170 | status = -ETXTBSY; |
171 | kfree(fp); | 171 | kfree(fp); |
172 | goto bail; | 172 | goto bail; |
@@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode, | |||
193 | status = 0; | 193 | status = 0; |
194 | if (fp) { | 194 | if (fp) { |
195 | level = fp->fp_lock_level; | 195 | level = fp->fp_lock_level; |
196 | if (level != LKM_IVMODE) | 196 | if (level != DLM_LOCK_IV) |
197 | user_dlm_cluster_unlock(&ip->ip_lockres, level); | 197 | user_dlm_cluster_unlock(&ip->ip_lockres, level); |
198 | 198 | ||
199 | kfree(fp); | 199 | kfree(fp); |
@@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp, | |||
262 | if ((count + *ppos) > i_size_read(inode)) | 262 | if ((count + *ppos) > i_size_read(inode)) |
263 | readlen = i_size_read(inode) - *ppos; | 263 | readlen = i_size_read(inode) - *ppos; |
264 | else | 264 | else |
265 | readlen = count - *ppos; | 265 | readlen = count; |
266 | 266 | ||
267 | lvb_buf = kmalloc(readlen, GFP_NOFS); | 267 | lvb_buf = kmalloc(readlen, GFP_NOFS); |
268 | if (!lvb_buf) | 268 | if (!lvb_buf) |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 17947dc8341e..a5fbd9cea968 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -684,6 +684,7 @@ restarted_transaction: | |||
684 | if (why == RESTART_META) { | 684 | if (why == RESTART_META) { |
685 | mlog(0, "restarting function.\n"); | 685 | mlog(0, "restarting function.\n"); |
686 | restart_func = 1; | 686 | restart_func = 1; |
687 | status = 0; | ||
687 | } else { | 688 | } else { |
688 | BUG_ON(why != RESTART_TRANS); | 689 | BUG_ON(why != RESTART_TRANS); |
689 | 690 | ||
@@ -1981,18 +1982,18 @@ relock: | |||
1981 | /* communicate with ocfs2_dio_end_io */ | 1982 | /* communicate with ocfs2_dio_end_io */ |
1982 | ocfs2_iocb_set_rw_locked(iocb, rw_level); | 1983 | ocfs2_iocb_set_rw_locked(iocb, rw_level); |
1983 | 1984 | ||
1984 | if (direct_io) { | 1985 | ret = generic_segment_checks(iov, &nr_segs, &ocount, |
1985 | ret = generic_segment_checks(iov, &nr_segs, &ocount, | 1986 | VERIFY_READ); |
1986 | VERIFY_READ); | 1987 | if (ret) |
1987 | if (ret) | 1988 | goto out_dio; |
1988 | goto out_dio; | ||
1989 | 1989 | ||
1990 | count = ocount; | 1990 | count = ocount; |
1991 | ret = generic_write_checks(file, ppos, &count, | 1991 | ret = generic_write_checks(file, ppos, &count, |
1992 | S_ISBLK(inode->i_mode)); | 1992 | S_ISBLK(inode->i_mode)); |
1993 | if (ret) | 1993 | if (ret) |
1994 | goto out_dio; | 1994 | goto out_dio; |
1995 | 1995 | ||
1996 | if (direct_io) { | ||
1996 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, | 1997 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, |
1997 | ppos, count, ocount); | 1998 | ppos, count, ocount); |
1998 | if (written < 0) { | 1999 | if (written < 0) { |
@@ -2007,7 +2008,10 @@ relock: | |||
2007 | goto out_dio; | 2008 | goto out_dio; |
2008 | } | 2009 | } |
2009 | } else { | 2010 | } else { |
2010 | written = __generic_file_aio_write(iocb, iov, nr_segs, ppos); | 2011 | current->backing_dev_info = file->f_mapping->backing_dev_info; |
2012 | written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, | ||
2013 | ppos, count, 0); | ||
2014 | current->backing_dev_info = NULL; | ||
2011 | } | 2015 | } |
2012 | 2016 | ||
2013 | out_dio: | 2017 | out_dio: |
@@ -2021,9 +2025,9 @@ out_dio: | |||
2021 | if (ret < 0) | 2025 | if (ret < 0) |
2022 | written = ret; | 2026 | written = ret; |
2023 | 2027 | ||
2024 | if (!ret && (old_size != i_size_read(inode) || | 2028 | if (!ret && ((old_size != i_size_read(inode)) || |
2025 | old_clusters != OCFS2_I(inode)->ip_clusters || | 2029 | (old_clusters != OCFS2_I(inode)->ip_clusters) || |
2026 | has_refcount)) { | 2030 | has_refcount)) { |
2027 | ret = jbd2_journal_force_commit(osb->journal->j_journal); | 2031 | ret = jbd2_journal_force_commit(osb->journal->j_journal); |
2028 | if (ret < 0) | 2032 | if (ret < 0) |
2029 | written = ret; | 2033 | written = ret; |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 07cc8bb68b6d..af189887201c 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -558,6 +558,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, | |||
558 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 558 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
559 | if (IS_ERR(handle)) { | 559 | if (IS_ERR(handle)) { |
560 | status = PTR_ERR(handle); | 560 | status = PTR_ERR(handle); |
561 | handle = NULL; | ||
561 | mlog_errno(status); | 562 | mlog_errno(status); |
562 | goto out; | 563 | goto out; |
563 | } | 564 | } |
@@ -639,11 +640,13 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
639 | goto bail_unlock; | 640 | goto bail_unlock; |
640 | } | 641 | } |
641 | 642 | ||
642 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, | 643 | if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { |
643 | orphan_dir_bh); | 644 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, |
644 | if (status < 0) { | 645 | orphan_dir_bh); |
645 | mlog_errno(status); | 646 | if (status < 0) { |
646 | goto bail_commit; | 647 | mlog_errno(status); |
648 | goto bail_commit; | ||
649 | } | ||
647 | } | 650 | } |
648 | 651 | ||
649 | /* set the inodes dtime */ | 652 | /* set the inodes dtime */ |
@@ -722,38 +725,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb, | |||
722 | static int ocfs2_wipe_inode(struct inode *inode, | 725 | static int ocfs2_wipe_inode(struct inode *inode, |
723 | struct buffer_head *di_bh) | 726 | struct buffer_head *di_bh) |
724 | { | 727 | { |
725 | int status, orphaned_slot; | 728 | int status, orphaned_slot = -1; |
726 | struct inode *orphan_dir_inode = NULL; | 729 | struct inode *orphan_dir_inode = NULL; |
727 | struct buffer_head *orphan_dir_bh = NULL; | 730 | struct buffer_head *orphan_dir_bh = NULL; |
728 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 731 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
729 | struct ocfs2_dinode *di; | 732 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; |
730 | 733 | ||
731 | di = (struct ocfs2_dinode *) di_bh->b_data; | 734 | if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { |
732 | orphaned_slot = le16_to_cpu(di->i_orphaned_slot); | 735 | orphaned_slot = le16_to_cpu(di->i_orphaned_slot); |
733 | 736 | ||
734 | status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); | 737 | status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); |
735 | if (status) | 738 | if (status) |
736 | return status; | 739 | return status; |
737 | 740 | ||
738 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 741 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
739 | ORPHAN_DIR_SYSTEM_INODE, | 742 | ORPHAN_DIR_SYSTEM_INODE, |
740 | orphaned_slot); | 743 | orphaned_slot); |
741 | if (!orphan_dir_inode) { | 744 | if (!orphan_dir_inode) { |
742 | status = -EEXIST; | 745 | status = -EEXIST; |
743 | mlog_errno(status); | 746 | mlog_errno(status); |
744 | goto bail; | 747 | goto bail; |
745 | } | 748 | } |
746 | 749 | ||
747 | /* Lock the orphan dir. The lock will be held for the entire | 750 | /* Lock the orphan dir. The lock will be held for the entire |
748 | * delete_inode operation. We do this now to avoid races with | 751 | * delete_inode operation. We do this now to avoid races with |
749 | * recovery completion on other nodes. */ | 752 | * recovery completion on other nodes. */ |
750 | mutex_lock(&orphan_dir_inode->i_mutex); | 753 | mutex_lock(&orphan_dir_inode->i_mutex); |
751 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 754 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
752 | if (status < 0) { | 755 | if (status < 0) { |
753 | mutex_unlock(&orphan_dir_inode->i_mutex); | 756 | mutex_unlock(&orphan_dir_inode->i_mutex); |
754 | 757 | ||
755 | mlog_errno(status); | 758 | mlog_errno(status); |
756 | goto bail; | 759 | goto bail; |
760 | } | ||
757 | } | 761 | } |
758 | 762 | ||
759 | /* we do this while holding the orphan dir lock because we | 763 | /* we do this while holding the orphan dir lock because we |
@@ -794,6 +798,9 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
794 | mlog_errno(status); | 798 | mlog_errno(status); |
795 | 799 | ||
796 | bail_unlock_dir: | 800 | bail_unlock_dir: |
801 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR) | ||
802 | return status; | ||
803 | |||
797 | ocfs2_inode_unlock(orphan_dir_inode, 1); | 804 | ocfs2_inode_unlock(orphan_dir_inode, 1); |
798 | mutex_unlock(&orphan_dir_inode->i_mutex); | 805 | mutex_unlock(&orphan_dir_inode->i_mutex); |
799 | brelse(orphan_dir_bh); | 806 | brelse(orphan_dir_bh); |
@@ -889,7 +896,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode, | |||
889 | 896 | ||
890 | /* Do some basic inode verification... */ | 897 | /* Do some basic inode verification... */ |
891 | di = (struct ocfs2_dinode *) di_bh->b_data; | 898 | di = (struct ocfs2_dinode *) di_bh->b_data; |
892 | if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { | 899 | if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) && |
900 | !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) { | ||
893 | /* | 901 | /* |
894 | * Inodes in the orphan dir must have ORPHANED_FL. The only | 902 | * Inodes in the orphan dir must have ORPHANED_FL. The only |
895 | * inodes that come back out of the orphan dir are reflink | 903 | * inodes that come back out of the orphan dir are reflink |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index ba4fe07b293c..0b28e1921a39 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -100,6 +100,8 @@ struct ocfs2_inode_info | |||
100 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 | 100 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 |
101 | /* Does someone have the file open O_DIRECT */ | 101 | /* Does someone have the file open O_DIRECT */ |
102 | #define OCFS2_INODE_OPEN_DIRECT 0x00000040 | 102 | #define OCFS2_INODE_OPEN_DIRECT 0x00000040 |
103 | /* Tell the inode wipe code it's not in orphan dir */ | ||
104 | #define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080 | ||
103 | 105 | ||
104 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) | 106 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) |
105 | { | 107 | { |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b1eb50ae4097..4cbb18f26c5f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -408,23 +408,28 @@ static int ocfs2_mknod(struct inode *dir, | |||
408 | } | 408 | } |
409 | } | 409 | } |
410 | 410 | ||
411 | status = ocfs2_add_entry(handle, dentry, inode, | 411 | /* |
412 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, | 412 | * Do this before adding the entry to the directory. We add |
413 | &lookup); | 413 | * also set d_op after success so that ->d_iput() will cleanup |
414 | if (status < 0) { | 414 | * the dentry lock even if ocfs2_add_entry() fails below. |
415 | */ | ||
416 | status = ocfs2_dentry_attach_lock(dentry, inode, | ||
417 | OCFS2_I(dir)->ip_blkno); | ||
418 | if (status) { | ||
415 | mlog_errno(status); | 419 | mlog_errno(status); |
416 | goto leave; | 420 | goto leave; |
417 | } | 421 | } |
422 | dentry->d_op = &ocfs2_dentry_ops; | ||
418 | 423 | ||
419 | status = ocfs2_dentry_attach_lock(dentry, inode, | 424 | status = ocfs2_add_entry(handle, dentry, inode, |
420 | OCFS2_I(dir)->ip_blkno); | 425 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, |
421 | if (status) { | 426 | &lookup); |
427 | if (status < 0) { | ||
422 | mlog_errno(status); | 428 | mlog_errno(status); |
423 | goto leave; | 429 | goto leave; |
424 | } | 430 | } |
425 | 431 | ||
426 | insert_inode_hash(inode); | 432 | insert_inode_hash(inode); |
427 | dentry->d_op = &ocfs2_dentry_ops; | ||
428 | d_instantiate(dentry, inode); | 433 | d_instantiate(dentry, inode); |
429 | status = 0; | 434 | status = 0; |
430 | leave: | 435 | leave: |
@@ -445,11 +450,6 @@ leave: | |||
445 | 450 | ||
446 | ocfs2_free_dir_lookup_result(&lookup); | 451 | ocfs2_free_dir_lookup_result(&lookup); |
447 | 452 | ||
448 | if ((status < 0) && inode) { | ||
449 | clear_nlink(inode); | ||
450 | iput(inode); | ||
451 | } | ||
452 | |||
453 | if (inode_ac) | 453 | if (inode_ac) |
454 | ocfs2_free_alloc_context(inode_ac); | 454 | ocfs2_free_alloc_context(inode_ac); |
455 | 455 | ||
@@ -459,6 +459,17 @@ leave: | |||
459 | if (meta_ac) | 459 | if (meta_ac) |
460 | ocfs2_free_alloc_context(meta_ac); | 460 | ocfs2_free_alloc_context(meta_ac); |
461 | 461 | ||
462 | /* | ||
463 | * We should call iput after the i_mutex of the bitmap been | ||
464 | * unlocked in ocfs2_free_alloc_context, or the | ||
465 | * ocfs2_delete_inode will mutex_lock again. | ||
466 | */ | ||
467 | if ((status < 0) && inode) { | ||
468 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; | ||
469 | clear_nlink(inode); | ||
470 | iput(inode); | ||
471 | } | ||
472 | |||
462 | mlog_exit(status); | 473 | mlog_exit(status); |
463 | 474 | ||
464 | return status; | 475 | return status; |
@@ -1771,22 +1782,27 @@ static int ocfs2_symlink(struct inode *dir, | |||
1771 | } | 1782 | } |
1772 | } | 1783 | } |
1773 | 1784 | ||
1774 | status = ocfs2_add_entry(handle, dentry, inode, | 1785 | /* |
1775 | le64_to_cpu(fe->i_blkno), parent_fe_bh, | 1786 | * Do this before adding the entry to the directory. We add |
1776 | &lookup); | 1787 | * also set d_op after success so that ->d_iput() will cleanup |
1777 | if (status < 0) { | 1788 | * the dentry lock even if ocfs2_add_entry() fails below. |
1789 | */ | ||
1790 | status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | ||
1791 | if (status) { | ||
1778 | mlog_errno(status); | 1792 | mlog_errno(status); |
1779 | goto bail; | 1793 | goto bail; |
1780 | } | 1794 | } |
1795 | dentry->d_op = &ocfs2_dentry_ops; | ||
1781 | 1796 | ||
1782 | status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); | 1797 | status = ocfs2_add_entry(handle, dentry, inode, |
1783 | if (status) { | 1798 | le64_to_cpu(fe->i_blkno), parent_fe_bh, |
1799 | &lookup); | ||
1800 | if (status < 0) { | ||
1784 | mlog_errno(status); | 1801 | mlog_errno(status); |
1785 | goto bail; | 1802 | goto bail; |
1786 | } | 1803 | } |
1787 | 1804 | ||
1788 | insert_inode_hash(inode); | 1805 | insert_inode_hash(inode); |
1789 | dentry->d_op = &ocfs2_dentry_ops; | ||
1790 | d_instantiate(dentry, inode); | 1806 | d_instantiate(dentry, inode); |
1791 | bail: | 1807 | bail: |
1792 | if (status < 0 && did_quota) | 1808 | if (status < 0 && did_quota) |
@@ -1811,6 +1827,7 @@ bail: | |||
1811 | if (xattr_ac) | 1827 | if (xattr_ac) |
1812 | ocfs2_free_alloc_context(xattr_ac); | 1828 | ocfs2_free_alloc_context(xattr_ac); |
1813 | if ((status < 0) && inode) { | 1829 | if ((status < 0) && inode) { |
1830 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; | ||
1814 | clear_nlink(inode); | 1831 | clear_nlink(inode); |
1815 | iput(inode); | 1832 | iput(inode); |
1816 | } | 1833 | } |
@@ -1976,6 +1993,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1976 | } | 1993 | } |
1977 | 1994 | ||
1978 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); | 1995 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); |
1996 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; | ||
1979 | 1997 | ||
1980 | /* Record which orphan dir our inode now resides | 1998 | /* Record which orphan dir our inode now resides |
1981 | * in. delete_inode will use this to determine which orphan | 1999 | * in. delete_inode will use this to determine which orphan |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index bd96f6c7877e..5cbcd0f008fc 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -4083,6 +4083,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode, | |||
4083 | di->i_attr = s_di->i_attr; | 4083 | di->i_attr = s_di->i_attr; |
4084 | 4084 | ||
4085 | if (preserve) { | 4085 | if (preserve) { |
4086 | t_inode->i_uid = s_inode->i_uid; | ||
4087 | t_inode->i_gid = s_inode->i_gid; | ||
4088 | t_inode->i_mode = s_inode->i_mode; | ||
4086 | di->i_uid = s_di->i_uid; | 4089 | di->i_uid = s_di->i_uid; |
4087 | di->i_gid = s_di->i_gid; | 4090 | di->i_gid = s_di->i_gid; |
4088 | di->i_mode = s_di->i_mode; | 4091 | di->i_mode = s_di->i_mode; |
diff --git a/fs/proc/array.c b/fs/proc/array.c index e51f2ec2c5e5..885ab5513ac5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -81,7 +81,6 @@ | |||
81 | #include <linux/pid_namespace.h> | 81 | #include <linux/pid_namespace.h> |
82 | #include <linux/ptrace.h> | 82 | #include <linux/ptrace.h> |
83 | #include <linux/tracehook.h> | 83 | #include <linux/tracehook.h> |
84 | #include <linux/swapops.h> | ||
85 | 84 | ||
86 | #include <asm/pgtable.h> | 85 | #include <asm/pgtable.h> |
87 | #include <asm/processor.h> | 86 | #include <asm/processor.h> |
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
495 | rsslim, | 494 | rsslim, |
496 | mm ? mm->start_code : 0, | 495 | mm ? mm->start_code : 0, |
497 | mm ? mm->end_code : 0, | 496 | mm ? mm->end_code : 0, |
498 | (permitted && mm) ? task->stack_start : 0, | 497 | (permitted && mm) ? mm->start_stack : 0, |
499 | esp, | 498 | esp, |
500 | eip, | 499 | eip, |
501 | /* The signal information here is obsolete. | 500 | /* The signal information here is obsolete. |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 7621db800a74..8418fcc0a6ab 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -2909,7 +2909,7 @@ out_no_task: | |||
2909 | */ | 2909 | */ |
2910 | static const struct pid_entry tid_base_stuff[] = { | 2910 | static const struct pid_entry tid_base_stuff[] = { |
2911 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 2911 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
2912 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), | 2912 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
2913 | REG("environ", S_IRUSR, proc_environ_operations), | 2913 | REG("environ", S_IRUSR, proc_environ_operations), |
2914 | INF("auxv", S_IRUSR, proc_pid_auxv), | 2914 | INF("auxv", S_IRUSR, proc_pid_auxv), |
2915 | ONE("status", S_IRUGO, proc_pid_status), | 2915 | ONE("status", S_IRUGO, proc_pid_status), |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 070553427dd5..47f5b145f56e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
247 | } else if (vma->vm_start <= mm->start_stack && | 247 | } else if (vma->vm_start <= mm->start_stack && |
248 | vma->vm_end >= mm->start_stack) { | 248 | vma->vm_end >= mm->start_stack) { |
249 | name = "[stack]"; | 249 | name = "[stack]"; |
250 | } else { | ||
251 | unsigned long stack_start; | ||
252 | struct proc_maps_private *pmp; | ||
253 | |||
254 | pmp = m->private; | ||
255 | stack_start = pmp->task->stack_start; | ||
256 | |||
257 | if (vma->vm_start <= stack_start && | ||
258 | vma->vm_end >= stack_start) { | ||
259 | pad_len_spaces(m, len); | ||
260 | seq_printf(m, | ||
261 | "[threadstack:%08lx]", | ||
262 | #ifdef CONFIG_STACK_GROWSUP | ||
263 | vma->vm_end - stack_start | ||
264 | #else | ||
265 | stack_start - vma->vm_start | ||
266 | #endif | ||
267 | ); | ||
268 | } | ||
269 | } | 250 | } |
270 | } else { | 251 | } else { |
271 | name = "[vdso]"; | 252 | name = "[vdso]"; |
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index dad7fb247ddc..3e21b1e2ad3a 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig | |||
@@ -33,6 +33,14 @@ config PRINT_QUOTA_WARNING | |||
33 | Note that this behavior is currently deprecated and may go away in | 33 | Note that this behavior is currently deprecated and may go away in |
34 | future. Please use notification via netlink socket instead. | 34 | future. Please use notification via netlink socket instead. |
35 | 35 | ||
36 | config QUOTA_DEBUG | ||
37 | bool "Additional quota sanity checks" | ||
38 | depends on QUOTA | ||
39 | default n | ||
40 | help | ||
41 | If you say Y here, quota subsystem will perform some additional | ||
42 | sanity checks of quota internal structures. If unsure, say N. | ||
43 | |||
36 | # Generic support for tree structured quota files. Selected when needed. | 44 | # Generic support for tree structured quota files. Selected when needed. |
37 | config QUOTA_TREE | 45 | config QUOTA_TREE |
38 | tristate | 46 | tristate |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index a0a9405b202a..788b5802a7ce 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -80,8 +80,6 @@ | |||
80 | 80 | ||
81 | #include <asm/uaccess.h> | 81 | #include <asm/uaccess.h> |
82 | 82 | ||
83 | #define __DQUOT_PARANOIA | ||
84 | |||
85 | /* | 83 | /* |
86 | * There are three quota SMP locks. dq_list_lock protects all lists with quotas | 84 | * There are three quota SMP locks. dq_list_lock protects all lists with quotas |
87 | * and quota formats, dqstats structure containing statistics about the lists | 85 | * and quota formats, dqstats structure containing statistics about the lists |
@@ -695,7 +693,7 @@ void dqput(struct dquot *dquot) | |||
695 | 693 | ||
696 | if (!dquot) | 694 | if (!dquot) |
697 | return; | 695 | return; |
698 | #ifdef __DQUOT_PARANOIA | 696 | #ifdef CONFIG_QUOTA_DEBUG |
699 | if (!atomic_read(&dquot->dq_count)) { | 697 | if (!atomic_read(&dquot->dq_count)) { |
700 | printk("VFS: dqput: trying to free free dquot\n"); | 698 | printk("VFS: dqput: trying to free free dquot\n"); |
701 | printk("VFS: device %s, dquot of %s %d\n", | 699 | printk("VFS: device %s, dquot of %s %d\n", |
@@ -748,7 +746,7 @@ we_slept: | |||
748 | goto we_slept; | 746 | goto we_slept; |
749 | } | 747 | } |
750 | atomic_dec(&dquot->dq_count); | 748 | atomic_dec(&dquot->dq_count); |
751 | #ifdef __DQUOT_PARANOIA | 749 | #ifdef CONFIG_QUOTA_DEBUG |
752 | /* sanity check */ | 750 | /* sanity check */ |
753 | BUG_ON(!list_empty(&dquot->dq_free)); | 751 | BUG_ON(!list_empty(&dquot->dq_free)); |
754 | #endif | 752 | #endif |
@@ -845,7 +843,7 @@ we_slept: | |||
845 | dquot = NULL; | 843 | dquot = NULL; |
846 | goto out; | 844 | goto out; |
847 | } | 845 | } |
848 | #ifdef __DQUOT_PARANOIA | 846 | #ifdef CONFIG_QUOTA_DEBUG |
849 | BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ | 847 | BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ |
850 | #endif | 848 | #endif |
851 | out: | 849 | out: |
@@ -874,7 +872,7 @@ static int dqinit_needed(struct inode *inode, int type) | |||
874 | static void add_dquot_ref(struct super_block *sb, int type) | 872 | static void add_dquot_ref(struct super_block *sb, int type) |
875 | { | 873 | { |
876 | struct inode *inode, *old_inode = NULL; | 874 | struct inode *inode, *old_inode = NULL; |
877 | #ifdef __DQUOT_PARANOIA | 875 | #ifdef CONFIG_QUOTA_DEBUG |
878 | int reserved = 0; | 876 | int reserved = 0; |
879 | #endif | 877 | #endif |
880 | 878 | ||
@@ -882,7 +880,7 @@ static void add_dquot_ref(struct super_block *sb, int type) | |||
882 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 880 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
883 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 881 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) |
884 | continue; | 882 | continue; |
885 | #ifdef __DQUOT_PARANOIA | 883 | #ifdef CONFIG_QUOTA_DEBUG |
886 | if (unlikely(inode_get_rsv_space(inode) > 0)) | 884 | if (unlikely(inode_get_rsv_space(inode) > 0)) |
887 | reserved = 1; | 885 | reserved = 1; |
888 | #endif | 886 | #endif |
@@ -907,7 +905,7 @@ static void add_dquot_ref(struct super_block *sb, int type) | |||
907 | spin_unlock(&inode_lock); | 905 | spin_unlock(&inode_lock); |
908 | iput(old_inode); | 906 | iput(old_inode); |
909 | 907 | ||
910 | #ifdef __DQUOT_PARANOIA | 908 | #ifdef CONFIG_QUOTA_DEBUG |
911 | if (reserved) { | 909 | if (reserved) { |
912 | printk(KERN_WARNING "VFS (%s): Writes happened before quota" | 910 | printk(KERN_WARNING "VFS (%s): Writes happened before quota" |
913 | " was turned on thus quota information is probably " | 911 | " was turned on thus quota information is probably " |
@@ -940,7 +938,7 @@ static int remove_inode_dquot_ref(struct inode *inode, int type, | |||
940 | inode->i_dquot[type] = NULL; | 938 | inode->i_dquot[type] = NULL; |
941 | if (dquot) { | 939 | if (dquot) { |
942 | if (dqput_blocks(dquot)) { | 940 | if (dqput_blocks(dquot)) { |
943 | #ifdef __DQUOT_PARANOIA | 941 | #ifdef CONFIG_QUOTA_DEBUG |
944 | if (atomic_read(&dquot->dq_count) != 1) | 942 | if (atomic_read(&dquot->dq_count) != 1) |
945 | printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); | 943 | printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); |
946 | #endif | 944 | #endif |
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index f8a6075abf50..07930449a958 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
@@ -46,8 +46,6 @@ static inline bool is_privroot_deh(struct dentry *dir, | |||
46 | struct reiserfs_de_head *deh) | 46 | struct reiserfs_de_head *deh) |
47 | { | 47 | { |
48 | struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; | 48 | struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; |
49 | if (reiserfs_expose_privroot(dir->d_sb)) | ||
50 | return 0; | ||
51 | return (dir == dir->d_parent && privroot->d_inode && | 49 | return (dir == dir->d_parent && privroot->d_inode && |
52 | deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); | 50 | deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); |
53 | } | 51 | } |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 4f9586bb7631..e7cc00e636dc 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -554,7 +554,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, | |||
554 | if (!err && new_size < i_size_read(dentry->d_inode)) { | 554 | if (!err && new_size < i_size_read(dentry->d_inode)) { |
555 | struct iattr newattrs = { | 555 | struct iattr newattrs = { |
556 | .ia_ctime = current_fs_time(inode->i_sb), | 556 | .ia_ctime = current_fs_time(inode->i_sb), |
557 | .ia_size = buffer_size, | 557 | .ia_size = new_size, |
558 | .ia_valid = ATTR_SIZE | ATTR_CTIME, | 558 | .ia_valid = ATTR_SIZE | ATTR_CTIME, |
559 | }; | 559 | }; |
560 | 560 | ||
@@ -973,21 +973,13 @@ int reiserfs_permission(struct inode *inode, int mask) | |||
973 | return generic_permission(inode, mask, NULL); | 973 | return generic_permission(inode, mask, NULL); |
974 | } | 974 | } |
975 | 975 | ||
976 | /* This will catch lookups from the fs root to .reiserfs_priv */ | 976 | static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) |
977 | static int | ||
978 | xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) | ||
979 | { | 977 | { |
980 | struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; | 978 | return -EPERM; |
981 | if (container_of(q1, struct dentry, d_name) == priv_root) | ||
982 | return -ENOENT; | ||
983 | if (q1->len == name->len && | ||
984 | !memcmp(q1->name, name->name, name->len)) | ||
985 | return 0; | ||
986 | return 1; | ||
987 | } | 979 | } |
988 | 980 | ||
989 | static const struct dentry_operations xattr_lookup_poison_ops = { | 981 | static const struct dentry_operations xattr_lookup_poison_ops = { |
990 | .d_compare = xattr_lookup_poison, | 982 | .d_revalidate = xattr_hide_revalidate, |
991 | }; | 983 | }; |
992 | 984 | ||
993 | int reiserfs_lookup_privroot(struct super_block *s) | 985 | int reiserfs_lookup_privroot(struct super_block *s) |
@@ -1001,8 +993,7 @@ int reiserfs_lookup_privroot(struct super_block *s) | |||
1001 | strlen(PRIVROOT_NAME)); | 993 | strlen(PRIVROOT_NAME)); |
1002 | if (!IS_ERR(dentry)) { | 994 | if (!IS_ERR(dentry)) { |
1003 | REISERFS_SB(s)->priv_root = dentry; | 995 | REISERFS_SB(s)->priv_root = dentry; |
1004 | if (!reiserfs_expose_privroot(s)) | 996 | dentry->d_op = &xattr_lookup_poison_ops; |
1005 | s->s_root->d_op = &xattr_lookup_poison_ops; | ||
1006 | if (dentry->d_inode) | 997 | if (dentry->d_inode) |
1007 | dentry->d_inode->i_flags |= S_PRIVATE; | 998 | dentry->d_inode->i_flags |= S_PRIVATE; |
1008 | } else | 999 | } else |
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 1c4c8f089970..dfa1d67f8fca 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c | |||
@@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb) | |||
479 | if (server->conn_pid) | 479 | if (server->conn_pid) |
480 | kill_pid(server->conn_pid, SIGTERM, 1); | 480 | kill_pid(server->conn_pid, SIGTERM, 1); |
481 | 481 | ||
482 | bdi_destroy(&server->bdi); | ||
482 | kfree(server->ops); | 483 | kfree(server->ops); |
483 | smb_unload_nls(server); | 484 | smb_unload_nls(server); |
484 | sb->s_fs_info = NULL; | 485 | sb->s_fs_info = NULL; |
@@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
525 | if (!server) | 526 | if (!server) |
526 | goto out_no_server; | 527 | goto out_no_server; |
527 | sb->s_fs_info = server; | 528 | sb->s_fs_info = server; |
529 | |||
530 | if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY)) | ||
531 | goto out_bdi; | ||
532 | |||
533 | sb->s_bdi = &server->bdi; | ||
528 | 534 | ||
529 | server->super_block = sb; | 535 | server->super_block = sb; |
530 | server->mnt = NULL; | 536 | server->mnt = NULL; |
@@ -624,6 +630,8 @@ out_no_smbiod: | |||
624 | out_bad_option: | 630 | out_bad_option: |
625 | kfree(mem); | 631 | kfree(mem); |
626 | out_no_mem: | 632 | out_no_mem: |
633 | bdi_destroy(&server->bdi); | ||
634 | out_bdi: | ||
627 | if (!server->mnt) | 635 | if (!server->mnt) |
628 | printk(KERN_ERR "smb_fill_super: allocation failure\n"); | 636 | printk(KERN_ERR "smb_fill_super: allocation failure\n"); |
629 | sb->s_fs_info = NULL; | 637 | sb->s_fs_info = NULL; |
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 1cb0d81b164b..653c030eb840 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c | |||
@@ -87,9 +87,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, | |||
87 | u64 cur_index = index >> msblk->devblksize_log2; | 87 | u64 cur_index = index >> msblk->devblksize_log2; |
88 | int bytes, compressed, b = 0, k = 0, page = 0, avail; | 88 | int bytes, compressed, b = 0, k = 0, page = 0, avail; |
89 | 89 | ||
90 | 90 | bh = kcalloc(((srclength + msblk->devblksize - 1) | |
91 | bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1, | 91 | >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); |
92 | sizeof(*bh), GFP_KERNEL); | ||
93 | if (bh == NULL) | 92 | if (bh == NULL) |
94 | return -ENOMEM; | 93 | return -ENOMEM; |
95 | 94 | ||
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 3550aec2f655..48b6f4a385a6 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -275,7 +275,8 @@ allocate_root: | |||
275 | 275 | ||
276 | err = squashfs_read_inode(root, root_inode); | 276 | err = squashfs_read_inode(root, root_inode); |
277 | if (err) { | 277 | if (err) { |
278 | iget_failed(root); | 278 | make_bad_inode(root); |
279 | iput(root); | ||
279 | goto failed_mount; | 280 | goto failed_mount; |
280 | } | 281 | } |
281 | insert_inode_hash(root); | 282 | insert_inode_hash(root); |
@@ -353,6 +354,7 @@ static void squashfs_put_super(struct super_block *sb) | |||
353 | kfree(sbi->id_table); | 354 | kfree(sbi->id_table); |
354 | kfree(sbi->fragment_index); | 355 | kfree(sbi->fragment_index); |
355 | kfree(sbi->meta_index); | 356 | kfree(sbi->meta_index); |
357 | kfree(sbi->inode_lookup_table); | ||
356 | kfree(sb->s_fs_info); | 358 | kfree(sb->s_fs_info); |
357 | sb->s_fs_info = NULL; | 359 | sb->s_fs_info = NULL; |
358 | } | 360 | } |
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 15a03d0fb9f3..7a603874e483 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c | |||
@@ -128,8 +128,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, | |||
128 | goto release_mutex; | 128 | goto release_mutex; |
129 | } | 129 | } |
130 | 130 | ||
131 | length = stream->total_out; | ||
131 | mutex_unlock(&msblk->read_data_mutex); | 132 | mutex_unlock(&msblk->read_data_mutex); |
132 | return stream->total_out; | 133 | return length; |
133 | 134 | ||
134 | release_mutex: | 135 | release_mutex: |
135 | mutex_unlock(&msblk->read_data_mutex); | 136 | mutex_unlock(&msblk->read_data_mutex); |
diff --git a/fs/super.c b/fs/super.c index f35ac6022109..1527e6a0ee35 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/kobject.h> | 37 | #include <linux/kobject.h> |
38 | #include <linux/mutex.h> | 38 | #include <linux/mutex.h> |
39 | #include <linux/file.h> | 39 | #include <linux/file.h> |
40 | #include <linux/backing-dev.h> | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
41 | #include "internal.h" | 42 | #include "internal.h" |
42 | 43 | ||
@@ -693,6 +694,7 @@ int set_anon_super(struct super_block *s, void *data) | |||
693 | return -EMFILE; | 694 | return -EMFILE; |
694 | } | 695 | } |
695 | s->s_dev = MKDEV(0, dev & MINORMASK); | 696 | s->s_dev = MKDEV(0, dev & MINORMASK); |
697 | s->s_bdi = &noop_backing_dev_info; | ||
696 | return 0; | 698 | return 0; |
697 | } | 699 | } |
698 | 700 | ||
@@ -954,10 +956,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void | |||
954 | if (error < 0) | 956 | if (error < 0) |
955 | goto out_free_secdata; | 957 | goto out_free_secdata; |
956 | BUG_ON(!mnt->mnt_sb); | 958 | BUG_ON(!mnt->mnt_sb); |
959 | WARN_ON(!mnt->mnt_sb->s_bdi); | ||
957 | 960 | ||
958 | error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); | 961 | error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); |
959 | if (error) | 962 | if (error) |
960 | goto out_sb; | 963 | goto out_sb; |
961 | 964 | ||
962 | /* | 965 | /* |
963 | * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE | 966 | * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE |
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
15 | #include <linux/quotaops.h> | 15 | #include <linux/quotaops.h> |
16 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
17 | #include <linux/backing-dev.h> | ||
17 | #include "internal.h" | 18 | #include "internal.h" |
18 | 19 | ||
19 | #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ | 20 | #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ |
@@ -32,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) | |||
32 | * This should be safe, as we require bdi backing to actually | 33 | * This should be safe, as we require bdi backing to actually |
33 | * write out data in the first place | 34 | * write out data in the first place |
34 | */ | 35 | */ |
35 | if (!sb->s_bdi) | 36 | if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info) |
36 | return 0; | 37 | return 0; |
37 | 38 | ||
38 | if (sb->s_qcop && sb->s_qcop->quota_sync) | 39 | if (sb->s_qcop && sb->s_qcop->quota_sync) |
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 4e50286a4cc3..1dabed286b4c 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c | |||
@@ -164,8 +164,8 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_ | |||
164 | name, de->name)) | 164 | name, de->name)) |
165 | goto found; | 165 | goto found; |
166 | } | 166 | } |
167 | dir_put_page(page); | ||
167 | } | 168 | } |
168 | dir_put_page(page); | ||
169 | 169 | ||
170 | if (++n >= npages) | 170 | if (++n >= npages) |
171 | n = 0; | 171 | n = 0; |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 52e06b487ced..29f1edca76de 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1209,6 +1209,7 @@ xfs_fs_put_super( | |||
1209 | 1209 | ||
1210 | xfs_unmountfs(mp); | 1210 | xfs_unmountfs(mp); |
1211 | xfs_freesb(mp); | 1211 | xfs_freesb(mp); |
1212 | xfs_inode_shrinker_unregister(mp); | ||
1212 | xfs_icsb_destroy_counters(mp); | 1213 | xfs_icsb_destroy_counters(mp); |
1213 | xfs_close_devices(mp); | 1214 | xfs_close_devices(mp); |
1214 | xfs_dmops_put(mp); | 1215 | xfs_dmops_put(mp); |
@@ -1622,6 +1623,8 @@ xfs_fs_fill_super( | |||
1622 | if (error) | 1623 | if (error) |
1623 | goto fail_vnrele; | 1624 | goto fail_vnrele; |
1624 | 1625 | ||
1626 | xfs_inode_shrinker_register(mp); | ||
1627 | |||
1625 | kfree(mtpt); | 1628 | kfree(mtpt); |
1626 | return 0; | 1629 | return 0; |
1627 | 1630 | ||
@@ -1867,6 +1870,7 @@ init_xfs_fs(void) | |||
1867 | goto out_cleanup_procfs; | 1870 | goto out_cleanup_procfs; |
1868 | 1871 | ||
1869 | vfs_initquota(); | 1872 | vfs_initquota(); |
1873 | xfs_inode_shrinker_init(); | ||
1870 | 1874 | ||
1871 | error = register_filesystem(&xfs_fs_type); | 1875 | error = register_filesystem(&xfs_fs_type); |
1872 | if (error) | 1876 | if (error) |
@@ -1894,6 +1898,7 @@ exit_xfs_fs(void) | |||
1894 | { | 1898 | { |
1895 | vfs_exitquota(); | 1899 | vfs_exitquota(); |
1896 | unregister_filesystem(&xfs_fs_type); | 1900 | unregister_filesystem(&xfs_fs_type); |
1901 | xfs_inode_shrinker_destroy(); | ||
1897 | xfs_sysctl_unregister(); | 1902 | xfs_sysctl_unregister(); |
1898 | xfs_cleanup_procfs(); | 1903 | xfs_cleanup_procfs(); |
1899 | xfs_buf_terminate(); | 1904 | xfs_buf_terminate(); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 05cd85317f6f..a427c638d909 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -95,7 +95,8 @@ xfs_inode_ag_walk( | |||
95 | struct xfs_perag *pag, int flags), | 95 | struct xfs_perag *pag, int flags), |
96 | int flags, | 96 | int flags, |
97 | int tag, | 97 | int tag, |
98 | int exclusive) | 98 | int exclusive, |
99 | int *nr_to_scan) | ||
99 | { | 100 | { |
100 | uint32_t first_index; | 101 | uint32_t first_index; |
101 | int last_error = 0; | 102 | int last_error = 0; |
@@ -134,7 +135,7 @@ restart: | |||
134 | if (error == EFSCORRUPTED) | 135 | if (error == EFSCORRUPTED) |
135 | break; | 136 | break; |
136 | 137 | ||
137 | } while (1); | 138 | } while ((*nr_to_scan)--); |
138 | 139 | ||
139 | if (skipped) { | 140 | if (skipped) { |
140 | delay(1); | 141 | delay(1); |
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator( | |||
150 | struct xfs_perag *pag, int flags), | 151 | struct xfs_perag *pag, int flags), |
151 | int flags, | 152 | int flags, |
152 | int tag, | 153 | int tag, |
153 | int exclusive) | 154 | int exclusive, |
155 | int *nr_to_scan) | ||
154 | { | 156 | { |
155 | int error = 0; | 157 | int error = 0; |
156 | int last_error = 0; | 158 | int last_error = 0; |
157 | xfs_agnumber_t ag; | 159 | xfs_agnumber_t ag; |
160 | int nr; | ||
158 | 161 | ||
162 | nr = nr_to_scan ? *nr_to_scan : INT_MAX; | ||
159 | for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { | 163 | for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { |
160 | struct xfs_perag *pag; | 164 | struct xfs_perag *pag; |
161 | 165 | ||
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator( | |||
165 | continue; | 169 | continue; |
166 | } | 170 | } |
167 | error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, | 171 | error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, |
168 | exclusive); | 172 | exclusive, &nr); |
169 | xfs_perag_put(pag); | 173 | xfs_perag_put(pag); |
170 | if (error) { | 174 | if (error) { |
171 | last_error = error; | 175 | last_error = error; |
172 | if (error == EFSCORRUPTED) | 176 | if (error == EFSCORRUPTED) |
173 | break; | 177 | break; |
174 | } | 178 | } |
179 | if (nr <= 0) | ||
180 | break; | ||
175 | } | 181 | } |
182 | if (nr_to_scan) | ||
183 | *nr_to_scan = nr; | ||
176 | return XFS_ERROR(last_error); | 184 | return XFS_ERROR(last_error); |
177 | } | 185 | } |
178 | 186 | ||
@@ -291,7 +299,7 @@ xfs_sync_data( | |||
291 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); | 299 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); |
292 | 300 | ||
293 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, | 301 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, |
294 | XFS_ICI_NO_TAG, 0); | 302 | XFS_ICI_NO_TAG, 0, NULL); |
295 | if (error) | 303 | if (error) |
296 | return XFS_ERROR(error); | 304 | return XFS_ERROR(error); |
297 | 305 | ||
@@ -310,7 +318,7 @@ xfs_sync_attr( | |||
310 | ASSERT((flags & ~SYNC_WAIT) == 0); | 318 | ASSERT((flags & ~SYNC_WAIT) == 0); |
311 | 319 | ||
312 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, | 320 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, |
313 | XFS_ICI_NO_TAG, 0); | 321 | XFS_ICI_NO_TAG, 0, NULL); |
314 | } | 322 | } |
315 | 323 | ||
316 | STATIC int | 324 | STATIC int |
@@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag( | |||
673 | radix_tree_tag_set(&pag->pag_ici_root, | 681 | radix_tree_tag_set(&pag->pag_ici_root, |
674 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | 682 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), |
675 | XFS_ICI_RECLAIM_TAG); | 683 | XFS_ICI_RECLAIM_TAG); |
684 | pag->pag_ici_reclaimable++; | ||
676 | } | 685 | } |
677 | 686 | ||
678 | /* | 687 | /* |
@@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag( | |||
705 | { | 714 | { |
706 | radix_tree_tag_clear(&pag->pag_ici_root, | 715 | radix_tree_tag_clear(&pag->pag_ici_root, |
707 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | 716 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); |
717 | pag->pag_ici_reclaimable--; | ||
708 | } | 718 | } |
709 | 719 | ||
710 | /* | 720 | /* |
@@ -820,10 +830,10 @@ xfs_reclaim_inode( | |||
820 | * call into reclaim to find it in a clean state instead of waiting for | 830 | * call into reclaim to find it in a clean state instead of waiting for |
821 | * it now. We also don't return errors here - if the error is transient | 831 | * it now. We also don't return errors here - if the error is transient |
822 | * then the next reclaim pass will flush the inode, and if the error | 832 | * then the next reclaim pass will flush the inode, and if the error |
823 | * is permanent then the next sync reclaim will relcaim the inode and | 833 | * is permanent then the next sync reclaim will reclaim the inode and |
824 | * pass on the error. | 834 | * pass on the error. |
825 | */ | 835 | */ |
826 | if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 836 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
827 | xfs_fs_cmn_err(CE_WARN, ip->i_mount, | 837 | xfs_fs_cmn_err(CE_WARN, ip->i_mount, |
828 | "inode 0x%llx background reclaim flush failed with %d", | 838 | "inode 0x%llx background reclaim flush failed with %d", |
829 | (long long)ip->i_ino, error); | 839 | (long long)ip->i_ino, error); |
@@ -854,5 +864,93 @@ xfs_reclaim_inodes( | |||
854 | int mode) | 864 | int mode) |
855 | { | 865 | { |
856 | return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, | 866 | return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, |
857 | XFS_ICI_RECLAIM_TAG, 1); | 867 | XFS_ICI_RECLAIM_TAG, 1, NULL); |
868 | } | ||
869 | |||
870 | /* | ||
871 | * Shrinker infrastructure. | ||
872 | * | ||
873 | * This is all far more complex than it needs to be. It adds a global list of | ||
874 | * mounts because the shrinkers can only call a global context. We need to make | ||
875 | * the shrinkers pass a context to avoid the need for global state. | ||
876 | */ | ||
877 | static LIST_HEAD(xfs_mount_list); | ||
878 | static struct rw_semaphore xfs_mount_list_lock; | ||
879 | |||
880 | static int | ||
881 | xfs_reclaim_inode_shrink( | ||
882 | int nr_to_scan, | ||
883 | gfp_t gfp_mask) | ||
884 | { | ||
885 | struct xfs_mount *mp; | ||
886 | struct xfs_perag *pag; | ||
887 | xfs_agnumber_t ag; | ||
888 | int reclaimable = 0; | ||
889 | |||
890 | if (nr_to_scan) { | ||
891 | if (!(gfp_mask & __GFP_FS)) | ||
892 | return -1; | ||
893 | |||
894 | down_read(&xfs_mount_list_lock); | ||
895 | list_for_each_entry(mp, &xfs_mount_list, m_mplist) { | ||
896 | xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, | ||
897 | XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); | ||
898 | if (nr_to_scan <= 0) | ||
899 | break; | ||
900 | } | ||
901 | up_read(&xfs_mount_list_lock); | ||
902 | } | ||
903 | |||
904 | down_read(&xfs_mount_list_lock); | ||
905 | list_for_each_entry(mp, &xfs_mount_list, m_mplist) { | ||
906 | for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { | ||
907 | |||
908 | pag = xfs_perag_get(mp, ag); | ||
909 | if (!pag->pag_ici_init) { | ||
910 | xfs_perag_put(pag); | ||
911 | continue; | ||
912 | } | ||
913 | reclaimable += pag->pag_ici_reclaimable; | ||
914 | xfs_perag_put(pag); | ||
915 | } | ||
916 | } | ||
917 | up_read(&xfs_mount_list_lock); | ||
918 | return reclaimable; | ||
919 | } | ||
920 | |||
921 | static struct shrinker xfs_inode_shrinker = { | ||
922 | .shrink = xfs_reclaim_inode_shrink, | ||
923 | .seeks = DEFAULT_SEEKS, | ||
924 | }; | ||
925 | |||
926 | void __init | ||
927 | xfs_inode_shrinker_init(void) | ||
928 | { | ||
929 | init_rwsem(&xfs_mount_list_lock); | ||
930 | register_shrinker(&xfs_inode_shrinker); | ||
931 | } | ||
932 | |||
933 | void | ||
934 | xfs_inode_shrinker_destroy(void) | ||
935 | { | ||
936 | ASSERT(list_empty(&xfs_mount_list)); | ||
937 | unregister_shrinker(&xfs_inode_shrinker); | ||
938 | } | ||
939 | |||
940 | void | ||
941 | xfs_inode_shrinker_register( | ||
942 | struct xfs_mount *mp) | ||
943 | { | ||
944 | down_write(&xfs_mount_list_lock); | ||
945 | list_add_tail(&mp->m_mplist, &xfs_mount_list); | ||
946 | up_write(&xfs_mount_list_lock); | ||
947 | } | ||
948 | |||
949 | void | ||
950 | xfs_inode_shrinker_unregister( | ||
951 | struct xfs_mount *mp) | ||
952 | { | ||
953 | down_write(&xfs_mount_list_lock); | ||
954 | list_del(&mp->m_mplist); | ||
955 | up_write(&xfs_mount_list_lock); | ||
858 | } | 956 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index d480c346cabb..cdcbaaca9880 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | |||
53 | int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); | 53 | int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); |
54 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 54 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
55 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), | 55 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), |
56 | int flags, int tag, int write_lock); | 56 | int flags, int tag, int write_lock, int *nr_to_scan); |
57 | |||
58 | void xfs_inode_shrinker_init(void); | ||
59 | void xfs_inode_shrinker_destroy(void); | ||
60 | void xfs_inode_shrinker_register(struct xfs_mount *mp); | ||
61 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); | ||
57 | 62 | ||
58 | #endif | 63 | #endif |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 5d0ee8d492db..50bee07d6b0e 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes( | |||
891 | uint flags) | 891 | uint flags) |
892 | { | 892 | { |
893 | ASSERT(mp->m_quotainfo); | 893 | ASSERT(mp->m_quotainfo); |
894 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); | 894 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, |
895 | XFS_ICI_NO_TAG, 0, NULL); | ||
895 | } | 896 | } |
896 | 897 | ||
897 | /*------------------------------------------------------------------------*/ | 898 | /*------------------------------------------------------------------------*/ |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index b1a5a1ff88ea..abb8222b88c9 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -223,6 +223,7 @@ typedef struct xfs_perag { | |||
223 | int pag_ici_init; /* incore inode cache initialised */ | 223 | int pag_ici_init; /* incore inode cache initialised */ |
224 | rwlock_t pag_ici_lock; /* incore inode lock */ | 224 | rwlock_t pag_ici_lock; /* incore inode lock */ |
225 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ | 225 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ |
226 | int pag_ici_reclaimable; /* reclaimable inodes */ | ||
226 | #endif | 227 | #endif |
227 | int pagb_count; /* pagb slots in use */ | 228 | int pagb_count; /* pagb slots in use */ |
228 | xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ | 229 | xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index cd27c9d6c71f..5bba29a07812 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -177,16 +177,26 @@ xfs_swap_extents_check_format( | |||
177 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) | 177 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) |
178 | return EINVAL; | 178 | return EINVAL; |
179 | 179 | ||
180 | /* Check root block of temp in btree form to max in target */ | 180 | /* |
181 | * If we are in a btree format, check that the temp root block will fit | ||
182 | * in the target and that it has enough extents to be in btree format | ||
183 | * in the target. | ||
184 | * | ||
185 | * Note that we have to be careful to allow btree->extent conversions | ||
186 | * (a common defrag case) which will occur when the temp inode is in | ||
187 | * extent format... | ||
188 | */ | ||
181 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && | 189 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && |
182 | XFS_IFORK_BOFF(ip) && | 190 | ((XFS_IFORK_BOFF(ip) && |
183 | tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) | 191 | tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) || |
192 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max)) | ||
184 | return EINVAL; | 193 | return EINVAL; |
185 | 194 | ||
186 | /* Check root block of target in btree form to max in temp */ | 195 | /* Reciprocal target->temp btree format checks */ |
187 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && | 196 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && |
188 | XFS_IFORK_BOFF(tip) && | 197 | ((XFS_IFORK_BOFF(tip) && |
189 | ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) | 198 | ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) || |
199 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max)) | ||
190 | return EINVAL; | 200 | return EINVAL; |
191 | 201 | ||
192 | return 0; | 202 | return 0; |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e8fba92d7cd9..2be019136287 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -745,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
745 | 745 | ||
746 | /* | 746 | /* |
747 | * Determine if we have a transaction that has gone to disk | 747 | * Determine if we have a transaction that has gone to disk |
748 | * that needs to be covered. Log activity needs to be idle (no AIL and | 748 | * that needs to be covered. To begin the transition to the idle state |
749 | * nothing in the iclogs). And, we need to be in the right state indicating | 749 | * firstly the log needs to be idle (no AIL and nothing in the iclogs). |
750 | * something has gone out. | 750 | * If we are then in a state where covering is needed, the caller is informed |
751 | * that dummy transactions are required to move the log into the idle state. | ||
752 | * | ||
753 | * Because this is called as part of the sync process, we should also indicate | ||
754 | * that dummy transactions should be issued in anything but the covered or | ||
755 | * idle states. This ensures that the log tail is accurately reflected in | ||
756 | * the log at the end of the sync, hence if a crash occurrs avoids replay | ||
757 | * of transactions where the metadata is already on disk. | ||
751 | */ | 758 | */ |
752 | int | 759 | int |
753 | xfs_log_need_covered(xfs_mount_t *mp) | 760 | xfs_log_need_covered(xfs_mount_t *mp) |
@@ -759,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
759 | return 0; | 766 | return 0; |
760 | 767 | ||
761 | spin_lock(&log->l_icloglock); | 768 | spin_lock(&log->l_icloglock); |
762 | if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || | 769 | switch (log->l_covered_state) { |
763 | (log->l_covered_state == XLOG_STATE_COVER_NEED2)) | 770 | case XLOG_STATE_COVER_DONE: |
764 | && !xfs_trans_ail_tail(log->l_ailp) | 771 | case XLOG_STATE_COVER_DONE2: |
765 | && xlog_iclogs_empty(log)) { | 772 | case XLOG_STATE_COVER_IDLE: |
766 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) | 773 | break; |
767 | log->l_covered_state = XLOG_STATE_COVER_DONE; | 774 | case XLOG_STATE_COVER_NEED: |
768 | else { | 775 | case XLOG_STATE_COVER_NEED2: |
769 | ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2); | 776 | if (!xfs_trans_ail_tail(log->l_ailp) && |
770 | log->l_covered_state = XLOG_STATE_COVER_DONE2; | 777 | xlog_iclogs_empty(log)) { |
778 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) | ||
779 | log->l_covered_state = XLOG_STATE_COVER_DONE; | ||
780 | else | ||
781 | log->l_covered_state = XLOG_STATE_COVER_DONE2; | ||
771 | } | 782 | } |
783 | /* FALLTHRU */ | ||
784 | default: | ||
772 | needed = 1; | 785 | needed = 1; |
786 | break; | ||
773 | } | 787 | } |
774 | spin_unlock(&log->l_icloglock); | 788 | spin_unlock(&log->l_icloglock); |
775 | return needed; | 789 | return needed; |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4fa0bc7b983e..9ff48a16a7ee 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -259,6 +259,7 @@ typedef struct xfs_mount { | |||
259 | wait_queue_head_t m_wait_single_sync_task; | 259 | wait_queue_head_t m_wait_single_sync_task; |
260 | __int64_t m_update_flags; /* sb flags we need to update | 260 | __int64_t m_update_flags; /* sb flags we need to update |
261 | on the next remount,rw */ | 261 | on the next remount,rw */ |
262 | struct list_head m_mplist; /* inode shrinker mount list */ | ||
262 | } xfs_mount_t; | 263 | } xfs_mount_t; |
263 | 264 | ||
264 | /* | 265 | /* |