aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorKeith Packard <keithp@keithp.com>2011-06-21 15:02:57 -0400
committerKeith Packard <keithp@keithp.com>2011-06-21 15:02:57 -0400
commit2cd1176bd9e92924242e779dcc5c8fc922f96659 (patch)
treee9daa254647af2e7d9f99694e19976df0ea2a14d /fs
parenta18711120764dd96ed2ee6a4d436c448542bad77 (diff)
parente92d03bff9a0d0bcbb812c9b1290ca96c9338d45 (diff)
Merge branch 'drm-intel-fixes' into drm-intel-next
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c8
-rw-r--r--fs/afs/fsclient.c3
-rw-r--r--fs/afs/inode.c10
-rw-r--r--fs/afs/super.c74
-rw-r--r--fs/afs/write.c21
-rw-r--r--fs/bad_inode.c3
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--fs/btrfs/ctree.h15
-rw-r--r--fs/btrfs/delayed-inode.c32
-rw-r--r--fs/btrfs/delayed-inode.h5
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/extent-tree.c59
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-cache.c163
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/ioctl.c25
-rw-r--r--fs/btrfs/relocation.c30
-rw-r--r--fs/btrfs/scrub.c69
-rw-r--r--fs/btrfs/sysfs.c146
-rw-r--r--fs/btrfs/transaction.c121
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/caps.c10
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/file.c35
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/ioctl.c6
-rw-r--r--fs/ceph/locks.c29
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/xattr.c6
-rw-r--r--fs/cifs/Kconfig3
-rw-r--r--fs/cifs/cache.c6
-rw-r--r--fs/cifs/cifsencrypt.c2
-rw-r--r--fs/cifs/cifsfs.c36
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/connect.c29
-rw-r--r--fs/cifs/fscache.c51
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/dcookies.c3
-rw-r--r--fs/exec.c7
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/logfs/dir.c8
-rw-r--r--fs/namei.c34
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/nfsctl.c19
-rw-r--r--fs/nfsd/vfs.c19
-rw-r--r--fs/nilfs2/btree.c39
-rw-r--r--fs/nilfs2/inode.c7
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/proc/base.c6
-rw-r--r--fs/proc/namespaces.c9
-rw-r--r--fs/proc/proc_sysctl.c3
-rw-r--r--fs/proc/root.c11
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/sysfs/mount.c37
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/timerfd.c5
-rw-r--r--fs/ubifs/super.c136
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c75
-rw-r--r--fs/xfs/xfs_log.c11
65 files changed, 833 insertions, 744 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 20c106f24927..1b0b19550015 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -584,11 +584,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
584 584
585success: 585success:
586 d_add(dentry, inode); 586 d_add(dentry, inode);
587 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", 587 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }",
588 fid.vnode, 588 fid.vnode,
589 fid.unique, 589 fid.unique,
590 dentry->d_inode->i_ino, 590 dentry->d_inode->i_ino,
591 (unsigned long long)dentry->d_inode->i_version); 591 dentry->d_inode->i_generation);
592 592
593 return NULL; 593 return NULL;
594} 594}
@@ -671,10 +671,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
671 * been deleted and replaced, and the original vnode ID has 671 * been deleted and replaced, and the original vnode ID has
672 * been reused */ 672 * been reused */
673 if (fid.unique != vnode->fid.unique) { 673 if (fid.unique != vnode->fid.unique) {
674 _debug("%s: file deleted (uq %u -> %u I:%llu)", 674 _debug("%s: file deleted (uq %u -> %u I:%u)",
675 dentry->d_name.name, fid.unique, 675 dentry->d_name.name, fid.unique,
676 vnode->fid.unique, 676 vnode->fid.unique,
677 (unsigned long long)dentry->d_inode->i_version); 677 dentry->d_inode->i_generation);
678 spin_lock(&vnode->lock); 678 spin_lock(&vnode->lock);
679 set_bit(AFS_VNODE_DELETED, &vnode->flags); 679 set_bit(AFS_VNODE_DELETED, &vnode->flags);
680 spin_unlock(&vnode->lock); 680 spin_unlock(&vnode->lock);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 4bd0218473a9..346e3289abd7 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -89,7 +89,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
89 i_size_write(&vnode->vfs_inode, size); 89 i_size_write(&vnode->vfs_inode, size);
90 vnode->vfs_inode.i_uid = status->owner; 90 vnode->vfs_inode.i_uid = status->owner;
91 vnode->vfs_inode.i_gid = status->group; 91 vnode->vfs_inode.i_gid = status->group;
92 vnode->vfs_inode.i_version = vnode->fid.unique; 92 vnode->vfs_inode.i_generation = vnode->fid.unique;
93 vnode->vfs_inode.i_nlink = status->nlink; 93 vnode->vfs_inode.i_nlink = status->nlink;
94 94
95 mode = vnode->vfs_inode.i_mode; 95 mode = vnode->vfs_inode.i_mode;
@@ -102,6 +102,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
102 vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; 102 vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
103 vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; 103 vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime;
104 vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; 104 vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
105 vnode->vfs_inode.i_version = data_version;
105 } 106 }
106 107
107 expected_version = status->data_version; 108 expected_version = status->data_version;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index db66c5201474..0fdab6e03d87 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -75,7 +75,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
75 inode->i_ctime.tv_nsec = 0; 75 inode->i_ctime.tv_nsec = 0;
76 inode->i_atime = inode->i_mtime = inode->i_ctime; 76 inode->i_atime = inode->i_mtime = inode->i_ctime;
77 inode->i_blocks = 0; 77 inode->i_blocks = 0;
78 inode->i_version = vnode->fid.unique; 78 inode->i_generation = vnode->fid.unique;
79 inode->i_version = vnode->status.data_version;
79 inode->i_mapping->a_ops = &afs_fs_aops; 80 inode->i_mapping->a_ops = &afs_fs_aops;
80 81
81 /* check to see whether a symbolic link is really a mountpoint */ 82 /* check to see whether a symbolic link is really a mountpoint */
@@ -100,7 +101,7 @@ static int afs_iget5_test(struct inode *inode, void *opaque)
100 struct afs_iget_data *data = opaque; 101 struct afs_iget_data *data = opaque;
101 102
102 return inode->i_ino == data->fid.vnode && 103 return inode->i_ino == data->fid.vnode &&
103 inode->i_version == data->fid.unique; 104 inode->i_generation == data->fid.unique;
104} 105}
105 106
106/* 107/*
@@ -122,7 +123,7 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
122 struct afs_vnode *vnode = AFS_FS_I(inode); 123 struct afs_vnode *vnode = AFS_FS_I(inode);
123 124
124 inode->i_ino = data->fid.vnode; 125 inode->i_ino = data->fid.vnode;
125 inode->i_version = data->fid.unique; 126 inode->i_generation = data->fid.unique;
126 vnode->fid = data->fid; 127 vnode->fid = data->fid;
127 vnode->volume = data->volume; 128 vnode->volume = data->volume;
128 129
@@ -380,8 +381,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
380 381
381 inode = dentry->d_inode; 382 inode = dentry->d_inode;
382 383
383 _enter("{ ino=%lu v=%llu }", inode->i_ino, 384 _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
384 (unsigned long long)inode->i_version);
385 385
386 generic_fillattr(inode, stat); 386 generic_fillattr(inode, stat);
387 return 0; 387 return 0;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index fb240e8766d6..356dcf0929e8 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -31,8 +31,8 @@
31static void afs_i_init_once(void *foo); 31static void afs_i_init_once(void *foo);
32static struct dentry *afs_mount(struct file_system_type *fs_type, 32static struct dentry *afs_mount(struct file_system_type *fs_type,
33 int flags, const char *dev_name, void *data); 33 int flags, const char *dev_name, void *data);
34static void afs_kill_super(struct super_block *sb);
34static struct inode *afs_alloc_inode(struct super_block *sb); 35static struct inode *afs_alloc_inode(struct super_block *sb);
35static void afs_put_super(struct super_block *sb);
36static void afs_destroy_inode(struct inode *inode); 36static void afs_destroy_inode(struct inode *inode);
37static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); 37static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
38 38
@@ -40,7 +40,7 @@ struct file_system_type afs_fs_type = {
40 .owner = THIS_MODULE, 40 .owner = THIS_MODULE,
41 .name = "afs", 41 .name = "afs",
42 .mount = afs_mount, 42 .mount = afs_mount,
43 .kill_sb = kill_anon_super, 43 .kill_sb = afs_kill_super,
44 .fs_flags = 0, 44 .fs_flags = 0,
45}; 45};
46 46
@@ -50,7 +50,6 @@ static const struct super_operations afs_super_ops = {
50 .drop_inode = afs_drop_inode, 50 .drop_inode = afs_drop_inode,
51 .destroy_inode = afs_destroy_inode, 51 .destroy_inode = afs_destroy_inode,
52 .evict_inode = afs_evict_inode, 52 .evict_inode = afs_evict_inode,
53 .put_super = afs_put_super,
54 .show_options = generic_show_options, 53 .show_options = generic_show_options,
55}; 54};
56 55
@@ -282,19 +281,25 @@ static int afs_parse_device_name(struct afs_mount_params *params,
282 */ 281 */
283static int afs_test_super(struct super_block *sb, void *data) 282static int afs_test_super(struct super_block *sb, void *data)
284{ 283{
285 struct afs_mount_params *params = data; 284 struct afs_super_info *as1 = data;
286 struct afs_super_info *as = sb->s_fs_info; 285 struct afs_super_info *as = sb->s_fs_info;
287 286
288 return as->volume == params->volume; 287 return as->volume == as1->volume;
288}
289
290static int afs_set_super(struct super_block *sb, void *data)
291{
292 sb->s_fs_info = data;
293 return set_anon_super(sb, NULL);
289} 294}
290 295
291/* 296/*
292 * fill in the superblock 297 * fill in the superblock
293 */ 298 */
294static int afs_fill_super(struct super_block *sb, void *data) 299static int afs_fill_super(struct super_block *sb,
300 struct afs_mount_params *params)
295{ 301{
296 struct afs_mount_params *params = data; 302 struct afs_super_info *as = sb->s_fs_info;
297 struct afs_super_info *as = NULL;
298 struct afs_fid fid; 303 struct afs_fid fid;
299 struct dentry *root = NULL; 304 struct dentry *root = NULL;
300 struct inode *inode = NULL; 305 struct inode *inode = NULL;
@@ -302,23 +307,13 @@ static int afs_fill_super(struct super_block *sb, void *data)
302 307
303 _enter(""); 308 _enter("");
304 309
305 /* allocate a superblock info record */
306 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
307 if (!as) {
308 _leave(" = -ENOMEM");
309 return -ENOMEM;
310 }
311
312 afs_get_volume(params->volume);
313 as->volume = params->volume;
314
315 /* fill in the superblock */ 310 /* fill in the superblock */
316 sb->s_blocksize = PAGE_CACHE_SIZE; 311 sb->s_blocksize = PAGE_CACHE_SIZE;
317 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 312 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
318 sb->s_magic = AFS_FS_MAGIC; 313 sb->s_magic = AFS_FS_MAGIC;
319 sb->s_op = &afs_super_ops; 314 sb->s_op = &afs_super_ops;
320 sb->s_fs_info = as;
321 sb->s_bdi = &as->volume->bdi; 315 sb->s_bdi = &as->volume->bdi;
316 strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id));
322 317
323 /* allocate the root inode and dentry */ 318 /* allocate the root inode and dentry */
324 fid.vid = as->volume->vid; 319 fid.vid = as->volume->vid;
@@ -326,7 +321,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
326 fid.unique = 1; 321 fid.unique = 1;
327 inode = afs_iget(sb, params->key, &fid, NULL, NULL); 322 inode = afs_iget(sb, params->key, &fid, NULL, NULL);
328 if (IS_ERR(inode)) 323 if (IS_ERR(inode))
329 goto error_inode; 324 return PTR_ERR(inode);
330 325
331 if (params->autocell) 326 if (params->autocell)
332 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); 327 set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
@@ -342,16 +337,8 @@ static int afs_fill_super(struct super_block *sb, void *data)
342 _leave(" = 0"); 337 _leave(" = 0");
343 return 0; 338 return 0;
344 339
345error_inode:
346 ret = PTR_ERR(inode);
347 inode = NULL;
348error: 340error:
349 iput(inode); 341 iput(inode);
350 afs_put_volume(as->volume);
351 kfree(as);
352
353 sb->s_fs_info = NULL;
354
355 _leave(" = %d", ret); 342 _leave(" = %d", ret);
356 return ret; 343 return ret;
357} 344}
@@ -367,6 +354,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
367 struct afs_volume *vol; 354 struct afs_volume *vol;
368 struct key *key; 355 struct key *key;
369 char *new_opts = kstrdup(options, GFP_KERNEL); 356 char *new_opts = kstrdup(options, GFP_KERNEL);
357 struct afs_super_info *as;
370 int ret; 358 int ret;
371 359
372 _enter(",,%s,%p", dev_name, options); 360 _enter(",,%s,%p", dev_name, options);
@@ -399,12 +387,22 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
399 ret = PTR_ERR(vol); 387 ret = PTR_ERR(vol);
400 goto error; 388 goto error;
401 } 389 }
402 params.volume = vol; 390
391 /* allocate a superblock info record */
392 as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
393 if (!as) {
394 ret = -ENOMEM;
395 afs_put_volume(vol);
396 goto error;
397 }
398 as->volume = vol;
403 399
404 /* allocate a deviceless superblock */ 400 /* allocate a deviceless superblock */
405 sb = sget(fs_type, afs_test_super, set_anon_super, &params); 401 sb = sget(fs_type, afs_test_super, afs_set_super, as);
406 if (IS_ERR(sb)) { 402 if (IS_ERR(sb)) {
407 ret = PTR_ERR(sb); 403 ret = PTR_ERR(sb);
404 afs_put_volume(vol);
405 kfree(as);
408 goto error; 406 goto error;
409 } 407 }
410 408
@@ -422,16 +420,16 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
422 } else { 420 } else {
423 _debug("reuse"); 421 _debug("reuse");
424 ASSERTCMP(sb->s_flags, &, MS_ACTIVE); 422 ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
423 afs_put_volume(vol);
424 kfree(as);
425 } 425 }
426 426
427 afs_put_volume(params.volume);
428 afs_put_cell(params.cell); 427 afs_put_cell(params.cell);
429 kfree(new_opts); 428 kfree(new_opts);
430 _leave(" = 0 [%p]", sb); 429 _leave(" = 0 [%p]", sb);
431 return dget(sb->s_root); 430 return dget(sb->s_root);
432 431
433error: 432error:
434 afs_put_volume(params.volume);
435 afs_put_cell(params.cell); 433 afs_put_cell(params.cell);
436 key_put(params.key); 434 key_put(params.key);
437 kfree(new_opts); 435 kfree(new_opts);
@@ -439,18 +437,12 @@ error:
439 return ERR_PTR(ret); 437 return ERR_PTR(ret);
440} 438}
441 439
442/* 440static void afs_kill_super(struct super_block *sb)
443 * finish the unmounting process on the superblock
444 */
445static void afs_put_super(struct super_block *sb)
446{ 441{
447 struct afs_super_info *as = sb->s_fs_info; 442 struct afs_super_info *as = sb->s_fs_info;
448 443 kill_anon_super(sb);
449 _enter("");
450
451 afs_put_volume(as->volume); 444 afs_put_volume(as->volume);
452 445 kfree(as);
453 _leave("");
454} 446}
455 447
456/* 448/*
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 789b3afb3423..b806285ff853 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,23 +84,21 @@ void afs_put_writeback(struct afs_writeback *wb)
84 * partly or wholly fill a page that's under preparation for writing 84 * partly or wholly fill a page that's under preparation for writing
85 */ 85 */
86static int afs_fill_page(struct afs_vnode *vnode, struct key *key, 86static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
87 loff_t pos, unsigned len, struct page *page) 87 loff_t pos, struct page *page)
88{ 88{
89 loff_t i_size; 89 loff_t i_size;
90 unsigned eof;
91 int ret; 90 int ret;
91 int len;
92 92
93 _enter(",,%llu,%u", (unsigned long long)pos, len); 93 _enter(",,%llu", (unsigned long long)pos);
94
95 ASSERTCMP(len, <=, PAGE_CACHE_SIZE);
96 94
97 i_size = i_size_read(&vnode->vfs_inode); 95 i_size = i_size_read(&vnode->vfs_inode);
98 if (pos + len > i_size) 96 if (pos + PAGE_CACHE_SIZE > i_size)
99 eof = i_size; 97 len = i_size - pos;
100 else 98 else
101 eof = PAGE_CACHE_SIZE; 99 len = PAGE_CACHE_SIZE;
102 100
103 ret = afs_vnode_fetch_data(vnode, key, 0, eof, page); 101 ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
104 if (ret < 0) { 102 if (ret < 0) {
105 if (ret == -ENOENT) { 103 if (ret == -ENOENT) {
106 _debug("got NOENT from server" 104 _debug("got NOENT from server"
@@ -153,9 +151,8 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
153 *pagep = page; 151 *pagep = page;
154 /* page won't leak in error case: it eventually gets cleaned off LRU */ 152 /* page won't leak in error case: it eventually gets cleaned off LRU */
155 153
156 if (!PageUptodate(page)) { 154 if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
157 _debug("not up to date"); 155 ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
158 ret = afs_fill_page(vnode, key, pos, len, page);
159 if (ret < 0) { 156 if (ret < 0) {
160 kfree(candidate); 157 kfree(candidate);
161 _leave(" = %d [prep]", ret); 158 _leave(" = %d [prep]", ret);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 9ad2369d9e35..bfcb18feb1df 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -231,9 +231,6 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
231 231
232static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags) 232static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
233{ 233{
234 if (flags & IPERM_FLAG_RCU)
235 return -ECHILD;
236
237 return -EIO; 234 return -EIO;
238} 235}
239 236
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d84089349c82..2e667868e0d2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1228,6 +1228,7 @@ static void reada_for_search(struct btrfs_root *root,
1228 u32 nr; 1228 u32 nr;
1229 u32 blocksize; 1229 u32 blocksize;
1230 u32 nscan = 0; 1230 u32 nscan = 0;
1231 bool map = true;
1231 1232
1232 if (level != 1) 1233 if (level != 1)
1233 return; 1234 return;
@@ -1249,8 +1250,11 @@ static void reada_for_search(struct btrfs_root *root,
1249 1250
1250 nritems = btrfs_header_nritems(node); 1251 nritems = btrfs_header_nritems(node);
1251 nr = slot; 1252 nr = slot;
1253 if (node->map_token || path->skip_locking)
1254 map = false;
1255
1252 while (1) { 1256 while (1) {
1253 if (!node->map_token) { 1257 if (map && !node->map_token) {
1254 unsigned long offset = btrfs_node_key_ptr_offset(nr); 1258 unsigned long offset = btrfs_node_key_ptr_offset(nr);
1255 map_private_extent_buffer(node, offset, 1259 map_private_extent_buffer(node, offset,
1256 sizeof(struct btrfs_key_ptr), 1260 sizeof(struct btrfs_key_ptr),
@@ -1277,7 +1281,7 @@ static void reada_for_search(struct btrfs_root *root,
1277 if ((search <= target && target - search <= 65536) || 1281 if ((search <= target && target - search <= 65536) ||
1278 (search > target && search - target <= 65536)) { 1282 (search > target && search - target <= 65536)) {
1279 gen = btrfs_node_ptr_generation(node, nr); 1283 gen = btrfs_node_ptr_generation(node, nr);
1280 if (node->map_token) { 1284 if (map && node->map_token) {
1281 unmap_extent_buffer(node, node->map_token, 1285 unmap_extent_buffer(node, node->map_token,
1282 KM_USER1); 1286 KM_USER1);
1283 node->map_token = NULL; 1287 node->map_token = NULL;
@@ -1289,7 +1293,7 @@ static void reada_for_search(struct btrfs_root *root,
1289 if ((nread > 65536 || nscan > 32)) 1293 if ((nread > 65536 || nscan > 32))
1290 break; 1294 break;
1291 } 1295 }
1292 if (node->map_token) { 1296 if (map && node->map_token) {
1293 unmap_extent_buffer(node, node->map_token, KM_USER1); 1297 unmap_extent_buffer(node, node->map_token, KM_USER1);
1294 node->map_token = NULL; 1298 node->map_token = NULL;
1295 } 1299 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 378b5b4443f3..300628795fdb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -967,6 +967,12 @@ struct btrfs_fs_info {
967 struct srcu_struct subvol_srcu; 967 struct srcu_struct subvol_srcu;
968 968
969 spinlock_t trans_lock; 969 spinlock_t trans_lock;
970 /*
971 * the reloc mutex goes with the trans lock, it is taken
972 * during commit to protect us from the relocation code
973 */
974 struct mutex reloc_mutex;
975
970 struct list_head trans_list; 976 struct list_head trans_list;
971 struct list_head hashers; 977 struct list_head hashers;
972 struct list_head dead_roots; 978 struct list_head dead_roots;
@@ -1172,6 +1178,14 @@ struct btrfs_root {
1172 u32 type; 1178 u32 type;
1173 1179
1174 u64 highest_objectid; 1180 u64 highest_objectid;
1181
1182 /* btrfs_record_root_in_trans is a multi-step process,
1183 * and it can race with the balancing code. But the
1184 * race is very small, and only the first time the root
1185 * is added to each transaction. So in_trans_setup
1186 * is used to tell us when more checks are required
1187 */
1188 unsigned long in_trans_setup;
1175 int ref_cows; 1189 int ref_cows;
1176 int track_dirty; 1190 int track_dirty;
1177 int in_radix; 1191 int in_radix;
@@ -1181,7 +1195,6 @@ struct btrfs_root {
1181 struct btrfs_key defrag_max; 1195 struct btrfs_key defrag_max;
1182 int defrag_running; 1196 int defrag_running;
1183 char *name; 1197 char *name;
1184 int in_sysfs;
1185 1198
1186 /* the dirty list is only used by non-reference counted roots */ 1199 /* the dirty list is only used by non-reference counted roots */
1187 struct list_head dirty_list; 1200 struct list_head dirty_list;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29d2d37..f1cbd028f7b3 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
297 item->data_len = data_len; 297 item->data_len = data_len;
298 item->ins_or_del = 0; 298 item->ins_or_del = 0;
299 item->bytes_reserved = 0; 299 item->bytes_reserved = 0;
300 item->block_rsv = NULL;
301 item->delayed_node = NULL; 300 item->delayed_node = NULL;
302 atomic_set(&item->refs, 1); 301 atomic_set(&item->refs, 1);
303 } 302 }
@@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
593 592
594 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 593 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
595 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); 594 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
596 if (!ret) { 595 if (!ret)
597 item->bytes_reserved = num_bytes; 596 item->bytes_reserved = num_bytes;
598 item->block_rsv = dst_rsv;
599 }
600 597
601 return ret; 598 return ret;
602} 599}
@@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
604static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, 601static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
605 struct btrfs_delayed_item *item) 602 struct btrfs_delayed_item *item)
606{ 603{
604 struct btrfs_block_rsv *rsv;
605
607 if (!item->bytes_reserved) 606 if (!item->bytes_reserved)
608 return; 607 return;
609 608
610 btrfs_block_rsv_release(root, item->block_rsv, 609 rsv = &root->fs_info->global_block_rsv;
610 btrfs_block_rsv_release(root, rsv,
611 item->bytes_reserved); 611 item->bytes_reserved);
612} 612}
613 613
@@ -1014,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1014 struct btrfs_delayed_root *delayed_root; 1014 struct btrfs_delayed_root *delayed_root;
1015 struct btrfs_delayed_node *curr_node, *prev_node; 1015 struct btrfs_delayed_node *curr_node, *prev_node;
1016 struct btrfs_path *path; 1016 struct btrfs_path *path;
1017 struct btrfs_block_rsv *block_rsv;
1017 int ret = 0; 1018 int ret = 0;
1018 1019
1019 path = btrfs_alloc_path(); 1020 path = btrfs_alloc_path();
@@ -1021,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1021 return -ENOMEM; 1022 return -ENOMEM;
1022 path->leave_spinning = 1; 1023 path->leave_spinning = 1;
1023 1024
1025 block_rsv = trans->block_rsv;
1026 trans->block_rsv = &root->fs_info->global_block_rsv;
1027
1024 delayed_root = btrfs_get_delayed_root(root); 1028 delayed_root = btrfs_get_delayed_root(root);
1025 1029
1026 curr_node = btrfs_first_delayed_node(delayed_root); 1030 curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1045 } 1049 }
1046 1050
1047 btrfs_free_path(path); 1051 btrfs_free_path(path);
1052 trans->block_rsv = block_rsv;
1048 return ret; 1053 return ret;
1049} 1054}
1050 1055
@@ -1052,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1052 struct btrfs_delayed_node *node) 1057 struct btrfs_delayed_node *node)
1053{ 1058{
1054 struct btrfs_path *path; 1059 struct btrfs_path *path;
1060 struct btrfs_block_rsv *block_rsv;
1055 int ret; 1061 int ret;
1056 1062
1057 path = btrfs_alloc_path(); 1063 path = btrfs_alloc_path();
@@ -1059,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1059 return -ENOMEM; 1065 return -ENOMEM;
1060 path->leave_spinning = 1; 1066 path->leave_spinning = 1;
1061 1067
1068 block_rsv = trans->block_rsv;
1069 trans->block_rsv = &node->root->fs_info->global_block_rsv;
1070
1062 ret = btrfs_insert_delayed_items(trans, path, node->root, node); 1071 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1063 if (!ret) 1072 if (!ret)
1064 ret = btrfs_delete_delayed_items(trans, path, node->root, node); 1073 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1066 ret = btrfs_update_delayed_inode(trans, node->root, path, node); 1075 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1067 btrfs_free_path(path); 1076 btrfs_free_path(path);
1068 1077
1078 trans->block_rsv = block_rsv;
1069 return ret; 1079 return ret;
1070} 1080}
1071 1081
@@ -1116,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1116 struct btrfs_path *path; 1126 struct btrfs_path *path;
1117 struct btrfs_delayed_node *delayed_node = NULL; 1127 struct btrfs_delayed_node *delayed_node = NULL;
1118 struct btrfs_root *root; 1128 struct btrfs_root *root;
1129 struct btrfs_block_rsv *block_rsv;
1119 unsigned long nr = 0; 1130 unsigned long nr = 0;
1120 int need_requeue = 0; 1131 int need_requeue = 0;
1121 int ret; 1132 int ret;
@@ -1134,6 +1145,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1134 if (IS_ERR(trans)) 1145 if (IS_ERR(trans))
1135 goto free_path; 1146 goto free_path;
1136 1147
1148 block_rsv = trans->block_rsv;
1149 trans->block_rsv = &root->fs_info->global_block_rsv;
1150
1137 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); 1151 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1138 if (!ret) 1152 if (!ret)
1139 ret = btrfs_delete_delayed_items(trans, path, root, 1153 ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1176 1190
1177 nr = trans->blocks_used; 1191 nr = trans->blocks_used;
1178 1192
1193 trans->block_rsv = block_rsv;
1179 btrfs_end_transaction_dmeta(trans, root); 1194 btrfs_end_transaction_dmeta(trans, root);
1180 __btrfs_btree_balance_dirty(root, nr); 1195 __btrfs_btree_balance_dirty(root, nr);
1181free_path: 1196free_path:
@@ -1222,6 +1237,13 @@ again:
1222 return 0; 1237 return 0;
1223} 1238}
1224 1239
1240void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1241{
1242 struct btrfs_delayed_root *delayed_root;
1243 delayed_root = btrfs_get_delayed_root(root);
1244 WARN_ON(btrfs_first_delayed_node(delayed_root));
1245}
1246
1225void btrfs_balance_delayed_items(struct btrfs_root *root) 1247void btrfs_balance_delayed_items(struct btrfs_root *root)
1226{ 1248{
1227 struct btrfs_delayed_root *delayed_root; 1249 struct btrfs_delayed_root *delayed_root;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240aa648..d1a6a2915c66 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
75 struct list_head tree_list; /* used for batch insert/delete items */ 75 struct list_head tree_list; /* used for batch insert/delete items */
76 struct list_head readdir_list; /* used for readdir items */ 76 struct list_head readdir_list; /* used for readdir items */
77 u64 bytes_reserved; 77 u64 bytes_reserved;
78 struct btrfs_block_rsv *block_rsv;
79 struct btrfs_delayed_node *delayed_node; 78 struct btrfs_delayed_node *delayed_node;
80 atomic_t refs; 79 atomic_t refs;
81 int ins_or_del; 80 int ins_or_del;
@@ -138,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
138/* for init */ 137/* for init */
139int __init btrfs_delayed_inode_init(void); 138int __init btrfs_delayed_inode_init(void);
140void btrfs_delayed_inode_exit(void); 139void btrfs_delayed_inode_exit(void);
140
141/* for debugging */
142void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
143
141#endif 144#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a203d363184d..1ac8db5dc0a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1044 root->last_trans = 0; 1044 root->last_trans = 0;
1045 root->highest_objectid = 0; 1045 root->highest_objectid = 0;
1046 root->name = NULL; 1046 root->name = NULL;
1047 root->in_sysfs = 0;
1048 root->inode_tree = RB_ROOT; 1047 root->inode_tree = RB_ROOT;
1049 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); 1048 INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
1050 root->block_rsv = NULL; 1049 root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@ again:
1300 return root; 1299 return root;
1301 1300
1302 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); 1301 root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
1303 if (!root->free_ino_ctl)
1304 goto fail;
1305 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), 1302 root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
1306 GFP_NOFS); 1303 GFP_NOFS);
1307 if (!root->free_ino_pinned) 1304 if (!root->free_ino_pinned || !root->free_ino_ctl) {
1305 ret = -ENOMEM;
1308 goto fail; 1306 goto fail;
1307 }
1309 1308
1310 btrfs_init_free_ino_ctl(root); 1309 btrfs_init_free_ino_ctl(root);
1311 mutex_init(&root->fs_commit_mutex); 1310 mutex_init(&root->fs_commit_mutex);
1312 spin_lock_init(&root->cache_lock); 1311 spin_lock_init(&root->cache_lock);
1313 init_waitqueue_head(&root->cache_wait); 1312 init_waitqueue_head(&root->cache_wait);
1314 1313
1315 set_anon_super(&root->anon_super, NULL); 1314 ret = set_anon_super(&root->anon_super, NULL);
1315 if (ret)
1316 goto fail;
1316 1317
1317 if (btrfs_root_refs(&root->root_item) == 0) { 1318 if (btrfs_root_refs(&root->root_item) == 0) {
1318 ret = -ENOENT; 1319 ret = -ENOENT;
@@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1618 spin_lock_init(&fs_info->fs_roots_radix_lock); 1619 spin_lock_init(&fs_info->fs_roots_radix_lock);
1619 spin_lock_init(&fs_info->delayed_iput_lock); 1620 spin_lock_init(&fs_info->delayed_iput_lock);
1620 spin_lock_init(&fs_info->defrag_inodes_lock); 1621 spin_lock_init(&fs_info->defrag_inodes_lock);
1622 mutex_init(&fs_info->reloc_mutex);
1621 1623
1622 init_completion(&fs_info->kobj_unregister); 1624 init_completion(&fs_info->kobj_unregister);
1623 fs_info->tree_root = tree_root; 1625 fs_info->tree_root = tree_root;
@@ -1668,8 +1670,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1668 init_waitqueue_head(&fs_info->scrub_pause_wait); 1670 init_waitqueue_head(&fs_info->scrub_pause_wait);
1669 init_rwsem(&fs_info->scrub_super_lock); 1671 init_rwsem(&fs_info->scrub_super_lock);
1670 fs_info->scrub_workers_refcnt = 0; 1672 fs_info->scrub_workers_refcnt = 0;
1671 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1672 fs_info->thread_pool_size, &fs_info->generic_worker);
1673 1673
1674 sb->s_blocksize = 4096; 1674 sb->s_blocksize = 4096;
1675 sb->s_blocksize_bits = blksize_bits(4096); 1675 sb->s_blocksize_bits = blksize_bits(4096);
@@ -2911,9 +2911,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
2911 2911
2912 INIT_LIST_HEAD(&splice); 2912 INIT_LIST_HEAD(&splice);
2913 2913
2914 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2915
2916 spin_lock(&root->fs_info->delalloc_lock); 2914 spin_lock(&root->fs_info->delalloc_lock);
2915 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2917 2916
2918 while (!list_empty(&splice)) { 2917 while (!list_empty(&splice)) {
2919 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2918 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5b9b6b6df242..1f61bf5b4960 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3089,6 +3089,13 @@ alloc:
3089 } 3089 }
3090 goto again; 3090 goto again;
3091 } 3091 }
3092
3093 /*
3094 * If we have less pinned bytes than we want to allocate then
3095 * don't bother committing the transaction, it won't help us.
3096 */
3097 if (data_sinfo->bytes_pinned < bytes)
3098 committed = 1;
3092 spin_unlock(&data_sinfo->lock); 3099 spin_unlock(&data_sinfo->lock);
3093 3100
3094 /* commit the current transaction and try again */ 3101 /* commit the current transaction and try again */
@@ -3307,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3307 if (reserved == 0) 3314 if (reserved == 0)
3308 return 0; 3315 return 0;
3309 3316
3310 /* nothing to shrink - nothing to reclaim */
3311 if (root->fs_info->delalloc_bytes == 0)
3312 return 0;
3313
3314 max_reclaim = min(reserved, to_reclaim); 3317 max_reclaim = min(reserved, to_reclaim);
3315 3318
3316 while (loops < 1024) { 3319 while (loops < 1024) {
@@ -5211,9 +5214,7 @@ loop:
5211 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5214 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
5212 * again 5215 * again
5213 */ 5216 */
5214 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 5217 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
5215 (found_uncached_bg || empty_size || empty_cluster ||
5216 allowed_chunk_alloc)) {
5217 index = 0; 5218 index = 0;
5218 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5219 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5219 found_uncached_bg = false; 5220 found_uncached_bg = false;
@@ -5253,32 +5254,36 @@ loop:
5253 goto search; 5254 goto search;
5254 } 5255 }
5255 5256
5256 if (loop < LOOP_CACHING_WAIT) { 5257 loop++;
5257 loop++;
5258 goto search;
5259 }
5260 5258
5261 if (loop == LOOP_ALLOC_CHUNK) { 5259 if (loop == LOOP_ALLOC_CHUNK) {
5262 empty_size = 0; 5260 if (allowed_chunk_alloc) {
5263 empty_cluster = 0; 5261 ret = do_chunk_alloc(trans, root, num_bytes +
5264 } 5262 2 * 1024 * 1024, data,
5263 CHUNK_ALLOC_LIMITED);
5264 allowed_chunk_alloc = 0;
5265 if (ret == 1)
5266 done_chunk_alloc = 1;
5267 } else if (!done_chunk_alloc &&
5268 space_info->force_alloc ==
5269 CHUNK_ALLOC_NO_FORCE) {
5270 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5271 }
5265 5272
5266 if (allowed_chunk_alloc) { 5273 /*
5267 ret = do_chunk_alloc(trans, root, num_bytes + 5274 * We didn't allocate a chunk, go ahead and drop the
5268 2 * 1024 * 1024, data, 5275 * empty size and loop again.
5269 CHUNK_ALLOC_LIMITED); 5276 */
5270 allowed_chunk_alloc = 0; 5277 if (!done_chunk_alloc)
5271 done_chunk_alloc = 1; 5278 loop = LOOP_NO_EMPTY_SIZE;
5272 } else if (!done_chunk_alloc &&
5273 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5274 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5275 } 5279 }
5276 5280
5277 if (loop < LOOP_NO_EMPTY_SIZE) { 5281 if (loop == LOOP_NO_EMPTY_SIZE) {
5278 loop++; 5282 empty_size = 0;
5279 goto search; 5283 empty_cluster = 0;
5280 } 5284 }
5281 ret = -ENOSPC; 5285
5286 goto search;
5282 } else if (!ins->objectid) { 5287 } else if (!ins->objectid) {
5283 ret = -ENOSPC; 5288 ret = -ENOSPC;
5284 } else if (ins->objectid) { 5289 } else if (ins->objectid) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e8445a4757c..a11a92ee2d30 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -126,9 +126,9 @@ struct extent_buffer {
126 unsigned long map_len; 126 unsigned long map_len;
127 struct page *first_page; 127 struct page *first_page;
128 unsigned long bflags; 128 unsigned long bflags;
129 atomic_t refs;
130 struct list_head leak_list; 129 struct list_head leak_list;
131 struct rcu_head rcu_head; 130 struct rcu_head rcu_head;
131 atomic_t refs;
132 132
133 /* the spinlock is used to protect most operations */ 133 /* the spinlock is used to protect most operations */
134 spinlock_t lock; 134 spinlock_t lock;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ad144736a5fd..9f985a429877 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -250,7 +250,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
250 pgoff_t index = 0; 250 pgoff_t index = 0;
251 unsigned long first_page_offset; 251 unsigned long first_page_offset;
252 int num_checksums; 252 int num_checksums;
253 int ret = 0, ret2; 253 int ret = 0;
254 254
255 INIT_LIST_HEAD(&bitmaps); 255 INIT_LIST_HEAD(&bitmaps);
256 256
@@ -421,11 +421,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
421 goto free_cache; 421 goto free_cache;
422 } 422 }
423 spin_lock(&ctl->tree_lock); 423 spin_lock(&ctl->tree_lock);
424 ret2 = link_free_space(ctl, e); 424 ret = link_free_space(ctl, e);
425 ctl->total_bitmaps++; 425 ctl->total_bitmaps++;
426 ctl->op->recalc_thresholds(ctl); 426 ctl->op->recalc_thresholds(ctl);
427 spin_unlock(&ctl->tree_lock); 427 spin_unlock(&ctl->tree_lock);
428 list_add_tail(&e->list, &bitmaps);
429 if (ret) { 428 if (ret) {
430 printk(KERN_ERR "Duplicate entries in " 429 printk(KERN_ERR "Duplicate entries in "
431 "free space cache, dumping\n"); 430 "free space cache, dumping\n");
@@ -434,6 +433,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
434 page_cache_release(page); 433 page_cache_release(page);
435 goto free_cache; 434 goto free_cache;
436 } 435 }
436 list_add_tail(&e->list, &bitmaps);
437 } 437 }
438 438
439 num_entries--; 439 num_entries--;
@@ -1417,6 +1417,23 @@ again:
1417 return 0; 1417 return 0;
1418} 1418}
1419 1419
1420static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1421 struct btrfs_free_space *info, u64 offset,
1422 u64 bytes)
1423{
1424 u64 bytes_to_set = 0;
1425 u64 end;
1426
1427 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1428
1429 bytes_to_set = min(end - offset, bytes);
1430
1431 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1432
1433 return bytes_to_set;
1434
1435}
1436
1420static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1437static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1421 struct btrfs_free_space *info) 1438 struct btrfs_free_space *info)
1422{ 1439{
@@ -1453,12 +1470,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1453 return true; 1470 return true;
1454} 1471}
1455 1472
1473static struct btrfs_free_space_op free_space_op = {
1474 .recalc_thresholds = recalculate_thresholds,
1475 .use_bitmap = use_bitmap,
1476};
1477
1456static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1478static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1457 struct btrfs_free_space *info) 1479 struct btrfs_free_space *info)
1458{ 1480{
1459 struct btrfs_free_space *bitmap_info; 1481 struct btrfs_free_space *bitmap_info;
1482 struct btrfs_block_group_cache *block_group = NULL;
1460 int added = 0; 1483 int added = 0;
1461 u64 bytes, offset, end; 1484 u64 bytes, offset, bytes_added;
1462 int ret; 1485 int ret;
1463 1486
1464 bytes = info->bytes; 1487 bytes = info->bytes;
@@ -1467,7 +1490,49 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1467 if (!ctl->op->use_bitmap(ctl, info)) 1490 if (!ctl->op->use_bitmap(ctl, info))
1468 return 0; 1491 return 0;
1469 1492
1493 if (ctl->op == &free_space_op)
1494 block_group = ctl->private;
1470again: 1495again:
1496 /*
1497 * Since we link bitmaps right into the cluster we need to see if we
1498 * have a cluster here, and if so and it has our bitmap we need to add
1499 * the free space to that bitmap.
1500 */
1501 if (block_group && !list_empty(&block_group->cluster_list)) {
1502 struct btrfs_free_cluster *cluster;
1503 struct rb_node *node;
1504 struct btrfs_free_space *entry;
1505
1506 cluster = list_entry(block_group->cluster_list.next,
1507 struct btrfs_free_cluster,
1508 block_group_list);
1509 spin_lock(&cluster->lock);
1510 node = rb_first(&cluster->root);
1511 if (!node) {
1512 spin_unlock(&cluster->lock);
1513 goto no_cluster_bitmap;
1514 }
1515
1516 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1517 if (!entry->bitmap) {
1518 spin_unlock(&cluster->lock);
1519 goto no_cluster_bitmap;
1520 }
1521
1522 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1523 bytes_added = add_bytes_to_bitmap(ctl, entry,
1524 offset, bytes);
1525 bytes -= bytes_added;
1526 offset += bytes_added;
1527 }
1528 spin_unlock(&cluster->lock);
1529 if (!bytes) {
1530 ret = 1;
1531 goto out;
1532 }
1533 }
1534
1535no_cluster_bitmap:
1471 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1536 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1472 1, 0); 1537 1, 0);
1473 if (!bitmap_info) { 1538 if (!bitmap_info) {
@@ -1475,19 +1540,10 @@ again:
1475 goto new_bitmap; 1540 goto new_bitmap;
1476 } 1541 }
1477 1542
1478 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1543 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1479 1544 bytes -= bytes_added;
1480 if (offset >= bitmap_info->offset && offset + bytes > end) { 1545 offset += bytes_added;
1481 bitmap_set_bits(ctl, bitmap_info, offset, end - offset); 1546 added = 0;
1482 bytes -= end - offset;
1483 offset = end;
1484 added = 0;
1485 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1486 bitmap_set_bits(ctl, bitmap_info, offset, bytes);
1487 bytes = 0;
1488 } else {
1489 BUG();
1490 }
1491 1547
1492 if (!bytes) { 1548 if (!bytes) {
1493 ret = 1; 1549 ret = 1;
@@ -1766,11 +1822,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1766 "\n", count); 1822 "\n", count);
1767} 1823}
1768 1824
1769static struct btrfs_free_space_op free_space_op = {
1770 .recalc_thresholds = recalculate_thresholds,
1771 .use_bitmap = use_bitmap,
1772};
1773
1774void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1825void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1775{ 1826{
1776 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1827 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -2142,9 +2193,11 @@ again:
2142/* 2193/*
2143 * This searches the block group for just extents to fill the cluster with. 2194 * This searches the block group for just extents to fill the cluster with.
2144 */ 2195 */
2145static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2196static noinline int
2146 struct btrfs_free_cluster *cluster, 2197setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2147 u64 offset, u64 bytes, u64 min_bytes) 2198 struct btrfs_free_cluster *cluster,
2199 struct list_head *bitmaps, u64 offset, u64 bytes,
2200 u64 min_bytes)
2148{ 2201{
2149 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2202 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2150 struct btrfs_free_space *first = NULL; 2203 struct btrfs_free_space *first = NULL;
@@ -2166,6 +2219,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2166 * extent entry. 2219 * extent entry.
2167 */ 2220 */
2168 while (entry->bitmap) { 2221 while (entry->bitmap) {
2222 if (list_empty(&entry->list))
2223 list_add_tail(&entry->list, bitmaps);
2169 node = rb_next(&entry->offset_index); 2224 node = rb_next(&entry->offset_index);
2170 if (!node) 2225 if (!node)
2171 return -ENOSPC; 2226 return -ENOSPC;
@@ -2185,8 +2240,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2185 return -ENOSPC; 2240 return -ENOSPC;
2186 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2241 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2187 2242
2188 if (entry->bitmap) 2243 if (entry->bitmap) {
2244 if (list_empty(&entry->list))
2245 list_add_tail(&entry->list, bitmaps);
2189 continue; 2246 continue;
2247 }
2248
2190 /* 2249 /*
2191 * we haven't filled the empty size and the window is 2250 * we haven't filled the empty size and the window is
2192 * very large. reset and try again 2251 * very large. reset and try again
@@ -2238,9 +2297,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2238 * This specifically looks for bitmaps that may work in the cluster, we assume 2297 * This specifically looks for bitmaps that may work in the cluster, we assume
2239 * that we have already failed to find extents that will work. 2298 * that we have already failed to find extents that will work.
2240 */ 2299 */
2241static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2300static noinline int
2242 struct btrfs_free_cluster *cluster, 2301setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2243 u64 offset, u64 bytes, u64 min_bytes) 2302 struct btrfs_free_cluster *cluster,
2303 struct list_head *bitmaps, u64 offset, u64 bytes,
2304 u64 min_bytes)
2244{ 2305{
2245 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2306 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2246 struct btrfs_free_space *entry; 2307 struct btrfs_free_space *entry;
@@ -2250,10 +2311,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2250 if (ctl->total_bitmaps == 0) 2311 if (ctl->total_bitmaps == 0)
2251 return -ENOSPC; 2312 return -ENOSPC;
2252 2313
2314 /*
2315 * First check our cached list of bitmaps and see if there is an entry
2316 * here that will work.
2317 */
2318 list_for_each_entry(entry, bitmaps, list) {
2319 if (entry->bytes < min_bytes)
2320 continue;
2321 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2322 bytes, min_bytes);
2323 if (!ret)
2324 return 0;
2325 }
2326
2327 /*
2328 * If we do have entries on our list and we are here then we didn't find
2329 * anything, so go ahead and get the next entry after the last entry in
2330 * this list and start the search from there.
2331 */
2332 if (!list_empty(bitmaps)) {
2333 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2334 list);
2335 node = rb_next(&entry->offset_index);
2336 if (!node)
2337 return -ENOSPC;
2338 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2339 goto search;
2340 }
2341
2253 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); 2342 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2254 if (!entry) 2343 if (!entry)
2255 return -ENOSPC; 2344 return -ENOSPC;
2256 2345
2346search:
2257 node = &entry->offset_index; 2347 node = &entry->offset_index;
2258 do { 2348 do {
2259 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2349 entry = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -2284,6 +2374,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2284 u64 offset, u64 bytes, u64 empty_size) 2374 u64 offset, u64 bytes, u64 empty_size)
2285{ 2375{
2286 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2376 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2377 struct list_head bitmaps;
2378 struct btrfs_free_space *entry, *tmp;
2287 u64 min_bytes; 2379 u64 min_bytes;
2288 int ret; 2380 int ret;
2289 2381
@@ -2322,11 +2414,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2322 goto out; 2414 goto out;
2323 } 2415 }
2324 2416
2325 ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, 2417 INIT_LIST_HEAD(&bitmaps);
2326 min_bytes); 2418 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2419 bytes, min_bytes);
2327 if (ret) 2420 if (ret)
2328 ret = setup_cluster_bitmap(block_group, cluster, offset, 2421 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2329 bytes, min_bytes); 2422 offset, bytes, min_bytes);
2423
2424 /* Clear our temporary list */
2425 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2426 list_del_init(&entry->list);
2330 2427
2331 if (!ret) { 2428 if (!ret) {
2332 atomic_inc(&block_group->count); 2429 atomic_inc(&block_group->count);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ebf95f7a44d6..0a9b10c5b0a7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1986,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1986 } 1986 }
1987 1987
1988 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) 1988 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
1989 return 0; 1989 goto good;
1990 1990
1991 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && 1991 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
1992 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { 1992 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
@@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3076 ret = btrfs_update_inode(trans, root, dir); 3076 ret = btrfs_update_inode(trans, root, dir);
3077 BUG_ON(ret); 3077 BUG_ON(ret);
3078 3078
3079 btrfs_free_path(path);
3079 return 0; 3080 return 0;
3080} 3081}
3081 3082
@@ -3646,7 +3647,7 @@ void btrfs_evict_inode(struct inode *inode)
3646 btrfs_i_size_write(inode, 0); 3647 btrfs_i_size_write(inode, 0);
3647 3648
3648 while (1) { 3649 while (1) {
3649 trans = btrfs_start_transaction(root, 0); 3650 trans = btrfs_join_transaction(root);
3650 BUG_ON(IS_ERR(trans)); 3651 BUG_ON(IS_ERR(trans));
3651 trans->block_rsv = root->orphan_block_rsv; 3652 trans->block_rsv = root->orphan_block_rsv;
3652 3653
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ac37040e426a..a3c4751e07db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
482 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); 482 ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
483 BUG_ON(ret); 483 BUG_ON(ret);
484 484
485 spin_lock(&root->fs_info->trans_lock);
485 list_add(&pending_snapshot->list, 486 list_add(&pending_snapshot->list,
486 &trans->transaction->pending_snapshots); 487 &trans->transaction->pending_snapshots);
488 spin_unlock(&root->fs_info->trans_lock);
487 if (async_transid) { 489 if (async_transid) {
488 *async_transid = trans->transid; 490 *async_transid = trans->transid;
489 ret = btrfs_commit_transaction_async(trans, 491 ret = btrfs_commit_transaction_async(trans,
@@ -2054,29 +2056,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
2054 2056
2055static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) 2057static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2056{ 2058{
2057 struct btrfs_ioctl_fs_info_args fi_args; 2059 struct btrfs_ioctl_fs_info_args *fi_args;
2058 struct btrfs_device *device; 2060 struct btrfs_device *device;
2059 struct btrfs_device *next; 2061 struct btrfs_device *next;
2060 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2062 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2063 int ret = 0;
2061 2064
2062 if (!capable(CAP_SYS_ADMIN)) 2065 if (!capable(CAP_SYS_ADMIN))
2063 return -EPERM; 2066 return -EPERM;
2064 2067
2065 fi_args.num_devices = fs_devices->num_devices; 2068 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
2066 fi_args.max_id = 0; 2069 if (!fi_args)
2067 memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid)); 2070 return -ENOMEM;
2071
2072 fi_args->num_devices = fs_devices->num_devices;
2073 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2068 2074
2069 mutex_lock(&fs_devices->device_list_mutex); 2075 mutex_lock(&fs_devices->device_list_mutex);
2070 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2076 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2071 if (device->devid > fi_args.max_id) 2077 if (device->devid > fi_args->max_id)
2072 fi_args.max_id = device->devid; 2078 fi_args->max_id = device->devid;
2073 } 2079 }
2074 mutex_unlock(&fs_devices->device_list_mutex); 2080 mutex_unlock(&fs_devices->device_list_mutex);
2075 2081
2076 if (copy_to_user(arg, &fi_args, sizeof(fi_args))) 2082 if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2077 return -EFAULT; 2083 ret = -EFAULT;
2078 2084
2079 return 0; 2085 kfree(fi_args);
2086 return ret;
2080} 2087}
2081 2088
2082static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) 2089static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b1ef27cc673b..5e0a3dc79a45 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1368 int ret; 1368 int ret;
1369 1369
1370 if (!root->reloc_root) 1370 if (!root->reloc_root)
1371 return 0; 1371 goto out;
1372 1372
1373 reloc_root = root->reloc_root; 1373 reloc_root = root->reloc_root;
1374 root_item = &reloc_root->root_item; 1374 root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1390 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1390 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1391 &reloc_root->root_key, root_item); 1391 &reloc_root->root_key, root_item);
1392 BUG_ON(ret); 1392 BUG_ON(ret);
1393
1394out:
1393 return 0; 1395 return 0;
1394} 1396}
1395 1397
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2142 u64 num_bytes = 0; 2144 u64 num_bytes = 0;
2143 int ret; 2145 int ret;
2144 2146
2145 spin_lock(&root->fs_info->trans_lock); 2147 mutex_lock(&root->fs_info->reloc_mutex);
2146 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2148 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2147 rc->merging_rsv_size += rc->nodes_relocated * 2; 2149 rc->merging_rsv_size += rc->nodes_relocated * 2;
2148 spin_unlock(&root->fs_info->trans_lock); 2150 mutex_unlock(&root->fs_info->reloc_mutex);
2151
2149again: 2152again:
2150 if (!err) { 2153 if (!err) {
2151 num_bytes = rc->merging_rsv_size; 2154 num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
2214 int ret; 2217 int ret;
2215again: 2218again:
2216 root = rc->extent_root; 2219 root = rc->extent_root;
2217 spin_lock(&root->fs_info->trans_lock); 2220
2221 /*
2222 * this serializes us with btrfs_record_root_in_transaction,
2223 * we have to make sure nobody is in the middle of
2224 * adding their roots to the list while we are
2225 * doing this splice
2226 */
2227 mutex_lock(&root->fs_info->reloc_mutex);
2218 list_splice_init(&rc->reloc_roots, &reloc_roots); 2228 list_splice_init(&rc->reloc_roots, &reloc_roots);
2219 spin_unlock(&root->fs_info->trans_lock); 2229 mutex_unlock(&root->fs_info->reloc_mutex);
2220 2230
2221 while (!list_empty(&reloc_roots)) { 2231 while (!list_empty(&reloc_roots)) {
2222 found = 1; 2232 found = 1;
@@ -3590,17 +3600,19 @@ next:
3590static void set_reloc_control(struct reloc_control *rc) 3600static void set_reloc_control(struct reloc_control *rc)
3591{ 3601{
3592 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3602 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3593 spin_lock(&fs_info->trans_lock); 3603
3604 mutex_lock(&fs_info->reloc_mutex);
3594 fs_info->reloc_ctl = rc; 3605 fs_info->reloc_ctl = rc;
3595 spin_unlock(&fs_info->trans_lock); 3606 mutex_unlock(&fs_info->reloc_mutex);
3596} 3607}
3597 3608
3598static void unset_reloc_control(struct reloc_control *rc) 3609static void unset_reloc_control(struct reloc_control *rc)
3599{ 3610{
3600 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3611 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3601 spin_lock(&fs_info->trans_lock); 3612
3613 mutex_lock(&fs_info->reloc_mutex);
3602 fs_info->reloc_ctl = NULL; 3614 fs_info->reloc_ctl = NULL;
3603 spin_unlock(&fs_info->trans_lock); 3615 mutex_unlock(&fs_info->reloc_mutex);
3604} 3616}
3605 3617
3606static int check_extent_flags(u64 flags) 3618static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index df50fd1eca8f..a8d03d5efb5d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -16,13 +16,7 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h> 19#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include "ctree.h" 20#include "ctree.h"
27#include "volumes.h" 21#include "volumes.h"
28#include "disk-io.h" 22#include "disk-io.h"
@@ -804,18 +798,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
804 798
805 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 799 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
806 if (ret < 0) 800 if (ret < 0)
807 goto out; 801 goto out_noplug;
808
809 l = path->nodes[0];
810 slot = path->slots[0];
811 btrfs_item_key_to_cpu(l, &key, slot);
812 if (key.objectid != logical) {
813 ret = btrfs_previous_item(root, path, 0,
814 BTRFS_EXTENT_ITEM_KEY);
815 if (ret < 0)
816 goto out;
817 }
818 802
803 /*
804 * we might miss half an extent here, but that doesn't matter,
805 * as it's only the prefetch
806 */
819 while (1) { 807 while (1) {
820 l = path->nodes[0]; 808 l = path->nodes[0];
821 slot = path->slots[0]; 809 slot = path->slots[0];
@@ -824,7 +812,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
824 if (ret == 0) 812 if (ret == 0)
825 continue; 813 continue;
826 if (ret < 0) 814 if (ret < 0)
827 goto out; 815 goto out_noplug;
828 816
829 break; 817 break;
830 } 818 }
@@ -906,15 +894,20 @@ again:
906 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 894 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
907 if (ret < 0) 895 if (ret < 0)
908 goto out; 896 goto out;
909 897 if (ret > 0) {
910 l = path->nodes[0];
911 slot = path->slots[0];
912 btrfs_item_key_to_cpu(l, &key, slot);
913 if (key.objectid != logical) {
914 ret = btrfs_previous_item(root, path, 0, 898 ret = btrfs_previous_item(root, path, 0,
915 BTRFS_EXTENT_ITEM_KEY); 899 BTRFS_EXTENT_ITEM_KEY);
916 if (ret < 0) 900 if (ret < 0)
917 goto out; 901 goto out;
902 if (ret > 0) {
903 /* there's no smaller item, so stick with the
904 * larger one */
905 btrfs_release_path(path);
906 ret = btrfs_search_slot(NULL, root, &key,
907 path, 0, 0);
908 if (ret < 0)
909 goto out;
910 }
918 } 911 }
919 912
920 while (1) { 913 while (1) {
@@ -989,6 +982,7 @@ next:
989 982
990out: 983out:
991 blk_finish_plug(&plug); 984 blk_finish_plug(&plug);
985out_noplug:
992 btrfs_free_path(path); 986 btrfs_free_path(path);
993 return ret < 0 ? ret : 0; 987 return ret < 0 ? ret : 0;
994} 988}
@@ -1064,8 +1058,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1064 while (1) { 1058 while (1) {
1065 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1059 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1066 if (ret < 0) 1060 if (ret < 0)
1067 goto out; 1061 break;
1068 ret = 0; 1062 if (ret > 0) {
1063 if (path->slots[0] >=
1064 btrfs_header_nritems(path->nodes[0])) {
1065 ret = btrfs_next_leaf(root, path);
1066 if (ret)
1067 break;
1068 }
1069 }
1069 1070
1070 l = path->nodes[0]; 1071 l = path->nodes[0];
1071 slot = path->slots[0]; 1072 slot = path->slots[0];
@@ -1075,7 +1076,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1075 if (found_key.objectid != sdev->dev->devid) 1076 if (found_key.objectid != sdev->dev->devid)
1076 break; 1077 break;
1077 1078
1078 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) 1079 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1079 break; 1080 break;
1080 1081
1081 if (found_key.offset >= end) 1082 if (found_key.offset >= end)
@@ -1104,7 +1105,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1104 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 1105 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1105 if (!cache) { 1106 if (!cache) {
1106 ret = -ENOENT; 1107 ret = -ENOENT;
1107 goto out; 1108 break;
1108 } 1109 }
1109 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, 1110 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1110 chunk_offset, length); 1111 chunk_offset, length);
@@ -1116,9 +1117,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1116 btrfs_release_path(path); 1117 btrfs_release_path(path);
1117 } 1118 }
1118 1119
1119out:
1120 btrfs_free_path(path); 1120 btrfs_free_path(path);
1121 return ret; 1121
1122 /*
1123 * ret can still be 1 from search_slot or next_leaf,
1124 * that's not an error
1125 */
1126 return ret < 0 ? ret : 0;
1122} 1127}
1123 1128
1124static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) 1129static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
@@ -1155,8 +1160,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1155 struct btrfs_fs_info *fs_info = root->fs_info; 1160 struct btrfs_fs_info *fs_info = root->fs_info;
1156 1161
1157 mutex_lock(&fs_info->scrub_lock); 1162 mutex_lock(&fs_info->scrub_lock);
1158 if (fs_info->scrub_workers_refcnt == 0) 1163 if (fs_info->scrub_workers_refcnt == 0) {
1164 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1165 fs_info->thread_pool_size, &fs_info->generic_worker);
1166 fs_info->scrub_workers.idle_thresh = 4;
1159 btrfs_start_workers(&fs_info->scrub_workers, 1); 1167 btrfs_start_workers(&fs_info->scrub_workers, 1);
1168 }
1160 ++fs_info->scrub_workers_refcnt; 1169 ++fs_info->scrub_workers_refcnt;
1161 mutex_unlock(&fs_info->scrub_lock); 1170 mutex_unlock(&fs_info->scrub_lock);
1162 1171
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223ae6691..daac9ae6d731 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
28#include "disk-io.h" 28#include "disk-io.h"
29#include "transaction.h" 29#include "transaction.h"
30 30
31static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
32{
33 return snprintf(buf, PAGE_SIZE, "%llu\n",
34 (unsigned long long)btrfs_root_used(&root->root_item));
35}
36
37static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
38{
39 return snprintf(buf, PAGE_SIZE, "%llu\n",
40 (unsigned long long)btrfs_root_limit(&root->root_item));
41}
42
43static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
44{
45
46 return snprintf(buf, PAGE_SIZE, "%llu\n",
47 (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
48}
49
50static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
51{
52 return snprintf(buf, PAGE_SIZE, "%llu\n",
53 (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
54}
55
56static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
57{
58 return snprintf(buf, PAGE_SIZE, "%llu\n",
59 (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
60}
61
62/* this is for root attrs (subvols/snapshots) */
63struct btrfs_root_attr {
64 struct attribute attr;
65 ssize_t (*show)(struct btrfs_root *, char *);
66 ssize_t (*store)(struct btrfs_root *, const char *, size_t);
67};
68
69#define ROOT_ATTR(name, mode, show, store) \
70static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
71 show, store)
72
73ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
74ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
75
76static struct attribute *btrfs_root_attrs[] = {
77 &btrfs_root_attr_blocks_used.attr,
78 &btrfs_root_attr_block_limit.attr,
79 NULL,
80};
81
82/* this is for super attrs (actual full fs) */
83struct btrfs_super_attr {
84 struct attribute attr;
85 ssize_t (*show)(struct btrfs_fs_info *, char *);
86 ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
87};
88
89#define SUPER_ATTR(name, mode, show, store) \
90static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
91 show, store)
92
93SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
94SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
95SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
96
97static struct attribute *btrfs_super_attrs[] = {
98 &btrfs_super_attr_blocks_used.attr,
99 &btrfs_super_attr_total_blocks.attr,
100 &btrfs_super_attr_blocksize.attr,
101 NULL,
102};
103
104static ssize_t btrfs_super_attr_show(struct kobject *kobj,
105 struct attribute *attr, char *buf)
106{
107 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
108 super_kobj);
109 struct btrfs_super_attr *a = container_of(attr,
110 struct btrfs_super_attr,
111 attr);
112
113 return a->show ? a->show(fs, buf) : 0;
114}
115
116static ssize_t btrfs_super_attr_store(struct kobject *kobj,
117 struct attribute *attr,
118 const char *buf, size_t len)
119{
120 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
121 super_kobj);
122 struct btrfs_super_attr *a = container_of(attr,
123 struct btrfs_super_attr,
124 attr);
125
126 return a->store ? a->store(fs, buf, len) : 0;
127}
128
129static ssize_t btrfs_root_attr_show(struct kobject *kobj,
130 struct attribute *attr, char *buf)
131{
132 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
133 root_kobj);
134 struct btrfs_root_attr *a = container_of(attr,
135 struct btrfs_root_attr,
136 attr);
137
138 return a->show ? a->show(root, buf) : 0;
139}
140
141static ssize_t btrfs_root_attr_store(struct kobject *kobj,
142 struct attribute *attr,
143 const char *buf, size_t len)
144{
145 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
146 root_kobj);
147 struct btrfs_root_attr *a = container_of(attr,
148 struct btrfs_root_attr,
149 attr);
150 return a->store ? a->store(root, buf, len) : 0;
151}
152
153static void btrfs_super_release(struct kobject *kobj)
154{
155 struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
156 super_kobj);
157 complete(&fs->kobj_unregister);
158}
159
160static void btrfs_root_release(struct kobject *kobj)
161{
162 struct btrfs_root *root = container_of(kobj, struct btrfs_root,
163 root_kobj);
164 complete(&root->kobj_unregister);
165}
166
167static const struct sysfs_ops btrfs_super_attr_ops = {
168 .show = btrfs_super_attr_show,
169 .store = btrfs_super_attr_store,
170};
171
172static const struct sysfs_ops btrfs_root_attr_ops = {
173 .show = btrfs_root_attr_show,
174 .store = btrfs_root_attr_store,
175};
176
177/* /sys/fs/btrfs/ entry */ 31/* /sys/fs/btrfs/ entry */
178static struct kset *btrfs_kset; 32static struct kset *btrfs_kset;
179 33
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dd719662340e..51dcec86757f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
126 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
127 * when the transaction commits 127 * when the transaction commits
128 */ 128 */
129int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 129static int record_root_in_trans(struct btrfs_trans_handle *trans,
130 struct btrfs_root *root) 130 struct btrfs_root *root)
131{ 131{
132 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
133 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
134 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
135 135
136 /*
137 * see below for in_trans_setup usage rules
138 * we have the reloc mutex held now, so there
139 * is only one writer in this function
140 */
141 root->in_trans_setup = 1;
142
143 /* make sure readers find in_trans_setup before
144 * they find our root->last_trans update
145 */
146 smp_wmb();
147
136 spin_lock(&root->fs_info->fs_roots_radix_lock); 148 spin_lock(&root->fs_info->fs_roots_radix_lock);
137 if (root->last_trans == trans->transid) { 149 if (root->last_trans == trans->transid) {
138 spin_unlock(&root->fs_info->fs_roots_radix_lock); 150 spin_unlock(&root->fs_info->fs_roots_radix_lock);
139 return 0; 151 return 0;
140 } 152 }
141 root->last_trans = trans->transid;
142 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 153 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
143 (unsigned long)root->root_key.objectid, 154 (unsigned long)root->root_key.objectid,
144 BTRFS_ROOT_TRANS_TAG); 155 BTRFS_ROOT_TRANS_TAG);
145 spin_unlock(&root->fs_info->fs_roots_radix_lock); 156 spin_unlock(&root->fs_info->fs_roots_radix_lock);
157 root->last_trans = trans->transid;
158
159 /* this is pretty tricky. We don't want to
160 * take the relocation lock in btrfs_record_root_in_trans
161 * unless we're really doing the first setup for this root in
162 * this transaction.
163 *
164 * Normally we'd use root->last_trans as a flag to decide
165 * if we want to take the expensive mutex.
166 *
167 * But, we have to set root->last_trans before we
168 * init the relocation root, otherwise, we trip over warnings
169 * in ctree.c. The solution used here is to flag ourselves
170 * with root->in_trans_setup. When this is 1, we're still
171 * fixing up the reloc trees and everyone must wait.
172 *
173 * When this is zero, they can trust root->last_trans and fly
174 * through btrfs_record_root_in_trans without having to take the
175 * lock. smp_wmb() makes sure that all the writes above are
176 * done before we pop in the zero below
177 */
146 btrfs_init_reloc_root(trans, root); 178 btrfs_init_reloc_root(trans, root);
179 smp_wmb();
180 root->in_trans_setup = 0;
147 } 181 }
148 return 0; 182 return 0;
149} 183}
150 184
185
186int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
187 struct btrfs_root *root)
188{
189 if (!root->ref_cows)
190 return 0;
191
192 /*
193 * see record_root_in_trans for comments about in_trans_setup usage
194 * and barriers
195 */
196 smp_rmb();
197 if (root->last_trans == trans->transid &&
198 !root->in_trans_setup)
199 return 0;
200
201 mutex_lock(&root->fs_info->reloc_mutex);
202 record_root_in_trans(trans, root);
203 mutex_unlock(&root->fs_info->reloc_mutex);
204
205 return 0;
206}
207
151/* wait for commit against the current transaction to become unblocked 208/* wait for commit against the current transaction to become unblocked
152 * when this is done, it is safe to start a new transaction, but the current 209 * when this is done, it is safe to start a new transaction, but the current
153 * transaction might not be fully on disk. 210 * transaction might not be fully on disk.
@@ -349,7 +406,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
349 list) { 406 list) {
350 if (t->in_commit) { 407 if (t->in_commit) {
351 if (t->commit_done) 408 if (t->commit_done)
352 goto out; 409 break;
353 cur_trans = t; 410 cur_trans = t;
354 atomic_inc(&cur_trans->use_count); 411 atomic_inc(&cur_trans->use_count);
355 break; 412 break;
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
882 parent = dget_parent(dentry); 939 parent = dget_parent(dentry);
883 parent_inode = parent->d_inode; 940 parent_inode = parent->d_inode;
884 parent_root = BTRFS_I(parent_inode)->root; 941 parent_root = BTRFS_I(parent_inode)->root;
885 btrfs_record_root_in_trans(trans, parent_root); 942 record_root_in_trans(trans, parent_root);
886 943
887 /* 944 /*
888 * insert the directory item 945 * insert the directory item
@@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
900 ret = btrfs_update_inode(trans, parent_root, parent_inode); 957 ret = btrfs_update_inode(trans, parent_root, parent_inode);
901 BUG_ON(ret); 958 BUG_ON(ret);
902 959
903 btrfs_record_root_in_trans(trans, root); 960 /*
961 * pull in the delayed directory update
962 * and the delayed inode item
963 * otherwise we corrupt the FS during
964 * snapshot
965 */
966 ret = btrfs_run_delayed_items(trans, root);
967 BUG_ON(ret);
968
969 record_root_in_trans(trans, root);
904 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 970 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
905 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 971 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
906 btrfs_check_and_init_root_item(new_root_item); 972 btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
961 int ret; 1027 int ret;
962 1028
963 list_for_each_entry(pending, head, list) { 1029 list_for_each_entry(pending, head, list) {
964 /*
965 * We must deal with the delayed items before creating
966 * snapshots, or we will create a snapthot with inconsistent
967 * information.
968 */
969 ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
970 BUG_ON(ret);
971
972 ret = create_pending_snapshot(trans, fs_info, pending); 1030 ret = create_pending_snapshot(trans, fs_info, pending);
973 BUG_ON(ret); 1031 BUG_ON(ret);
974 } 1032 }
@@ -1118,8 +1176,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1118 wait_current_trans_commit_start_and_unblock(root, cur_trans); 1176 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1119 else 1177 else
1120 wait_current_trans_commit_start(root, cur_trans); 1178 wait_current_trans_commit_start(root, cur_trans);
1121 put_transaction(cur_trans);
1122 1179
1180 if (current->journal_info == trans)
1181 current->journal_info = NULL;
1182
1183 put_transaction(cur_trans);
1123 return 0; 1184 return 0;
1124} 1185}
1125 1186
@@ -1238,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1238 schedule_timeout(1); 1299 schedule_timeout(1);
1239 1300
1240 finish_wait(&cur_trans->writer_wait, &wait); 1301 finish_wait(&cur_trans->writer_wait, &wait);
1241 spin_lock(&root->fs_info->trans_lock);
1242 root->fs_info->trans_no_join = 1;
1243 spin_unlock(&root->fs_info->trans_lock);
1244 } while (atomic_read(&cur_trans->num_writers) > 1 || 1302 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1245 (should_grow && cur_trans->num_joined != joined)); 1303 (should_grow && cur_trans->num_joined != joined));
1246 1304
1247 ret = create_pending_snapshots(trans, root->fs_info); 1305 /*
1248 BUG_ON(ret); 1306 * Ok now we need to make sure to block out any other joins while we
1307 * commit the transaction. We could have started a join before setting
1308 * no_join so make sure to wait for num_writers to == 1 again.
1309 */
1310 spin_lock(&root->fs_info->trans_lock);
1311 root->fs_info->trans_no_join = 1;
1312 spin_unlock(&root->fs_info->trans_lock);
1313 wait_event(cur_trans->writer_wait,
1314 atomic_read(&cur_trans->num_writers) == 1);
1315
1316 /*
1317 * the reloc mutex makes sure that we stop
1318 * the balancing code from coming in and moving
1319 * extents around in the middle of the commit
1320 */
1321 mutex_lock(&root->fs_info->reloc_mutex);
1249 1322
1250 ret = btrfs_run_delayed_items(trans, root); 1323 ret = btrfs_run_delayed_items(trans, root);
1251 BUG_ON(ret); 1324 BUG_ON(ret);
1252 1325
1326 ret = create_pending_snapshots(trans, root->fs_info);
1327 BUG_ON(ret);
1328
1253 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1329 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1254 BUG_ON(ret); 1330 BUG_ON(ret);
1255 1331
1332 /*
1333 * make sure none of the code above managed to slip in a
1334 * delayed item
1335 */
1336 btrfs_assert_delayed_root_empty(root);
1337
1256 WARN_ON(cur_trans != trans->transaction); 1338 WARN_ON(cur_trans != trans->transaction);
1257 1339
1258 btrfs_scrub_pause(root); 1340 btrfs_scrub_pause(root);
@@ -1309,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1309 root->fs_info->running_transaction = NULL; 1391 root->fs_info->running_transaction = NULL;
1310 root->fs_info->trans_no_join = 0; 1392 root->fs_info->trans_no_join = 0;
1311 spin_unlock(&root->fs_info->trans_lock); 1393 spin_unlock(&root->fs_info->trans_lock);
1394 mutex_unlock(&root->fs_info->reloc_mutex);
1312 1395
1313 wake_up(&root->fs_info->transaction_wait); 1396 wake_up(&root->fs_info->transaction_wait);
1314 1397
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c6dc47..4ce8a9f41d1e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@ again:
3177 tmp_key.offset = (u64)-1; 3177 tmp_key.offset = (u64)-1;
3178 3178
3179 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 3179 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
3180 BUG_ON(!wc.replay_dest); 3180 BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
3181 3181
3182 wc.replay_dest->log_root = log; 3182 wc.replay_dest->log_root = log;
3183 btrfs_record_root_in_trans(trans, wc.replay_dest); 3183 btrfs_record_root_in_trans(trans, wc.replay_dest);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index da541dfca2e3..1efa56e18f9b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
689 transid = btrfs_super_generation(disk_super); 689 transid = btrfs_super_generation(disk_super);
690 if (disk_super->label[0]) 690 if (disk_super->label[0])
691 printk(KERN_INFO "device label %s ", disk_super->label); 691 printk(KERN_INFO "device label %s ", disk_super->label);
692 else { 692 else
693 /* FIXME, make a readl uuid parser */ 693 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
694 printk(KERN_INFO "device fsid %llx-%llx ",
695 *(unsigned long long *)disk_super->fsid,
696 *(unsigned long long *)(disk_super->fsid + 8));
697 }
698 printk(KERN_CONT "devid %llu transid %llu %s\n", 694 printk(KERN_CONT "devid %llu transid %llu %s\n",
699 (unsigned long long)devid, (unsigned long long)transid, path); 695 (unsigned long long)devid, (unsigned long long)transid, path);
700 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 696 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
diff --git a/fs/buffer.c b/fs/buffer.c
index 49c9aada0374..1a80b048ade8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1902,10 +1902,8 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1902 if (!buffer_uptodate(*wait_bh)) 1902 if (!buffer_uptodate(*wait_bh))
1903 err = -EIO; 1903 err = -EIO;
1904 } 1904 }
1905 if (unlikely(err)) { 1905 if (unlikely(err))
1906 page_zero_new_buffers(page, from, to); 1906 page_zero_new_buffers(page, from, to);
1907 ClearPageUptodate(page);
1908 }
1909 return err; 1907 return err;
1910} 1908}
1911EXPORT_SYMBOL(__block_write_begin); 1909EXPORT_SYMBOL(__block_write_begin);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 33da49dc3cc6..5a3953db8118 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -453,7 +453,7 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
453 int err; 453 int err;
454 struct inode *inode = page->mapping->host; 454 struct inode *inode = page->mapping->host;
455 BUG_ON(!inode); 455 BUG_ON(!inode);
456 igrab(inode); 456 ihold(inode);
457 err = writepage_nounlock(page, wbc); 457 err = writepage_nounlock(page, wbc);
458 unlock_page(page); 458 unlock_page(page);
459 iput(inode); 459 iput(inode);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1f72b00447c4..f605753c8fe9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2940,14 +2940,12 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2940 while (!list_empty(&mdsc->cap_dirty)) { 2940 while (!list_empty(&mdsc->cap_dirty)) {
2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, 2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
2942 i_dirty_item); 2942 i_dirty_item);
2943 inode = igrab(&ci->vfs_inode); 2943 inode = &ci->vfs_inode;
2944 ihold(inode);
2944 dout("flush_dirty_caps %p\n", inode); 2945 dout("flush_dirty_caps %p\n", inode);
2945 spin_unlock(&mdsc->cap_dirty_lock); 2946 spin_unlock(&mdsc->cap_dirty_lock);
2946 if (inode) { 2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, 2948 iput(inode);
2948 NULL);
2949 iput(inode);
2950 }
2951 spin_lock(&mdsc->cap_dirty_lock); 2949 spin_lock(&mdsc->cap_dirty_lock);
2952 } 2950 }
2953 spin_unlock(&mdsc->cap_dirty_lock); 2951 spin_unlock(&mdsc->cap_dirty_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 33729e822bb9..ef8f08c343e8 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -308,7 +308,8 @@ more:
308 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 308 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
309 if (IS_ERR(req)) 309 if (IS_ERR(req))
310 return PTR_ERR(req); 310 return PTR_ERR(req);
311 req->r_inode = igrab(inode); 311 req->r_inode = inode;
312 ihold(inode);
312 req->r_dentry = dget(filp->f_dentry); 313 req->r_dentry = dget(filp->f_dentry);
313 /* hints to request -> mds selection code */ 314 /* hints to request -> mds selection code */
314 req->r_direct_mode = USE_AUTH_MDS; 315 req->r_direct_mode = USE_AUTH_MDS;
@@ -787,10 +788,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
787 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 788 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
788 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 789 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
789 err = ceph_mdsc_do_request(mdsc, dir, req); 790 err = ceph_mdsc_do_request(mdsc, dir, req);
790 if (err) 791 if (err) {
791 d_drop(dentry); 792 d_drop(dentry);
792 else if (!req->r_reply_info.head->is_dentry) 793 } else if (!req->r_reply_info.head->is_dentry) {
793 d_instantiate(dentry, igrab(old_dentry->d_inode)); 794 ihold(old_dentry->d_inode);
795 d_instantiate(dentry, old_dentry->d_inode);
796 }
794 ceph_mdsc_put_request(req); 797 ceph_mdsc_put_request(req);
795 return err; 798 return err;
796} 799}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index a610d3d67488..f67b687550de 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -109,7 +109,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
109 err = ceph_mdsc_do_request(mdsc, NULL, req); 109 err = ceph_mdsc_do_request(mdsc, NULL, req);
110 inode = req->r_target_inode; 110 inode = req->r_target_inode;
111 if (inode) 111 if (inode)
112 igrab(inode); 112 ihold(inode);
113 ceph_mdsc_put_request(req); 113 ceph_mdsc_put_request(req);
114 if (!inode) 114 if (!inode)
115 return ERR_PTR(-ESTALE); 115 return ERR_PTR(-ESTALE);
@@ -167,7 +167,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
167 err = ceph_mdsc_do_request(mdsc, NULL, req); 167 err = ceph_mdsc_do_request(mdsc, NULL, req);
168 inode = req->r_target_inode; 168 inode = req->r_target_inode;
169 if (inode) 169 if (inode)
170 igrab(inode); 170 ihold(inode);
171 ceph_mdsc_put_request(req); 171 ceph_mdsc_put_request(req);
172 if (!inode) 172 if (!inode)
173 return ERR_PTR(err ? err : -ESTALE); 173 return ERR_PTR(err ? err : -ESTALE);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 203252d88d9f..9542f07d0b93 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -191,7 +191,8 @@ int ceph_open(struct inode *inode, struct file *file)
191 err = PTR_ERR(req); 191 err = PTR_ERR(req);
192 goto out; 192 goto out;
193 } 193 }
194 req->r_inode = igrab(inode); 194 req->r_inode = inode;
195 ihold(inode);
195 req->r_num_caps = 1; 196 req->r_num_caps = 1;
196 err = ceph_mdsc_do_request(mdsc, parent_inode, req); 197 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
197 if (!err) 198 if (!err)
@@ -282,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file)
282static int striped_read(struct inode *inode, 283static int striped_read(struct inode *inode,
283 u64 off, u64 len, 284 u64 off, u64 len,
284 struct page **pages, int num_pages, 285 struct page **pages, int num_pages,
285 int *checkeof, bool align_to_pages, 286 int *checkeof, bool o_direct,
286 unsigned long buf_align) 287 unsigned long buf_align)
287{ 288{
288 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 289 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -307,7 +308,7 @@ static int striped_read(struct inode *inode,
307 io_align = off & ~PAGE_MASK; 308 io_align = off & ~PAGE_MASK;
308 309
309more: 310more:
310 if (align_to_pages) 311 if (o_direct)
311 page_align = (pos - io_align + buf_align) & ~PAGE_MASK; 312 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
312 else 313 else
313 page_align = pos & ~PAGE_MASK; 314 page_align = pos & ~PAGE_MASK;
@@ -317,10 +318,10 @@ more:
317 ci->i_truncate_seq, 318 ci->i_truncate_seq,
318 ci->i_truncate_size, 319 ci->i_truncate_size,
319 page_pos, pages_left, page_align); 320 page_pos, pages_left, page_align);
320 hit_stripe = this_len < left;
321 was_short = ret >= 0 && ret < this_len;
322 if (ret == -ENOENT) 321 if (ret == -ENOENT)
323 ret = 0; 322 ret = 0;
323 hit_stripe = this_len < left;
324 was_short = ret >= 0 && ret < this_len;
324 dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, 325 dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
325 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); 326 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
326 327
@@ -345,20 +346,22 @@ more:
345 } 346 }
346 347
347 if (was_short) { 348 if (was_short) {
348 /* was original extent fully inside i_size? */ 349 /* did we bounce off eof? */
349 if (pos + left <= inode->i_size) { 350 if (pos + left > inode->i_size)
350 dout("zero tail\n"); 351 *checkeof = 1;
351 ceph_zero_page_vector_range(page_off + read, len - read, 352
353 /* zero trailing bytes (inside i_size) */
354 if (left > 0 && pos < inode->i_size) {
355 if (pos + left > inode->i_size)
356 left = inode->i_size - pos;
357
358 dout("zero tail %d\n", left);
359 ceph_zero_page_vector_range(page_off + read, left,
352 pages); 360 pages);
353 read = len; 361 read += left;
354 goto out;
355 } 362 }
356
357 /* check i_size */
358 *checkeof = 1;
359 } 363 }
360 364
361out:
362 if (ret >= 0) 365 if (ret >= 0)
363 ret = read; 366 ret = read;
364 dout("striped_read returns %d\n", ret); 367 dout("striped_read returns %d\n", ret);
@@ -658,7 +661,7 @@ out:
658 661
659 /* hit EOF or hole? */ 662 /* hit EOF or hole? */
660 if (statret == 0 && *ppos < inode->i_size) { 663 if (statret == 0 && *ppos < inode->i_size) {
661 dout("aio_read sync_read hit hole, reading more\n"); 664 dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
662 read += ret; 665 read += ret;
663 base += ret; 666 base += ret;
664 len -= ret; 667 len -= ret;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 70b6a4839c38..d8858e96ab18 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1101,10 +1101,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1101 goto done; 1101 goto done;
1102 } 1102 }
1103 req->r_dentry = dn; /* may have spliced */ 1103 req->r_dentry = dn; /* may have spliced */
1104 igrab(in); 1104 ihold(in);
1105 } else if (ceph_ino(in) == vino.ino && 1105 } else if (ceph_ino(in) == vino.ino &&
1106 ceph_snap(in) == vino.snap) { 1106 ceph_snap(in) == vino.snap) {
1107 igrab(in); 1107 ihold(in);
1108 } else { 1108 } else {
1109 dout(" %p links to %p %llx.%llx, not %llx.%llx\n", 1109 dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
1110 dn, in, ceph_ino(in), ceph_snap(in), 1110 dn, in, ceph_ino(in), ceph_snap(in),
@@ -1144,7 +1144,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1144 goto done; 1144 goto done;
1145 } 1145 }
1146 req->r_dentry = dn; /* may have spliced */ 1146 req->r_dentry = dn; /* may have spliced */
1147 igrab(in); 1147 ihold(in);
1148 rinfo->head->is_dentry = 1; /* fool notrace handlers */ 1148 rinfo->head->is_dentry = 1; /* fool notrace handlers */
1149 } 1149 }
1150 1150
@@ -1328,7 +1328,7 @@ void ceph_queue_writeback(struct inode *inode)
1328 if (queue_work(ceph_inode_to_client(inode)->wb_wq, 1328 if (queue_work(ceph_inode_to_client(inode)->wb_wq,
1329 &ceph_inode(inode)->i_wb_work)) { 1329 &ceph_inode(inode)->i_wb_work)) {
1330 dout("ceph_queue_writeback %p\n", inode); 1330 dout("ceph_queue_writeback %p\n", inode);
1331 igrab(inode); 1331 ihold(inode);
1332 } else { 1332 } else {
1333 dout("ceph_queue_writeback %p failed\n", inode); 1333 dout("ceph_queue_writeback %p failed\n", inode);
1334 } 1334 }
@@ -1353,7 +1353,7 @@ void ceph_queue_invalidate(struct inode *inode)
1353 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, 1353 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
1354 &ceph_inode(inode)->i_pg_inv_work)) { 1354 &ceph_inode(inode)->i_pg_inv_work)) {
1355 dout("ceph_queue_invalidate %p\n", inode); 1355 dout("ceph_queue_invalidate %p\n", inode);
1356 igrab(inode); 1356 ihold(inode);
1357 } else { 1357 } else {
1358 dout("ceph_queue_invalidate %p failed\n", inode); 1358 dout("ceph_queue_invalidate %p failed\n", inode);
1359 } 1359 }
@@ -1477,7 +1477,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
1477 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, 1477 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1478 &ci->i_vmtruncate_work)) { 1478 &ci->i_vmtruncate_work)) {
1479 dout("ceph_queue_vmtruncate %p\n", inode); 1479 dout("ceph_queue_vmtruncate %p\n", inode);
1480 igrab(inode); 1480 ihold(inode);
1481 } else { 1481 } else {
1482 dout("ceph_queue_vmtruncate %p failed, pending=%d\n", 1482 dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
1483 inode, ci->i_truncate_pending); 1483 inode, ci->i_truncate_pending);
@@ -1738,7 +1738,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1738 __mark_inode_dirty(inode, inode_dirty_flags); 1738 __mark_inode_dirty(inode, inode_dirty_flags);
1739 1739
1740 if (mask) { 1740 if (mask) {
1741 req->r_inode = igrab(inode); 1741 req->r_inode = inode;
1742 ihold(inode);
1742 req->r_inode_drop = release; 1743 req->r_inode_drop = release;
1743 req->r_args.setattr.mask = cpu_to_le32(mask); 1744 req->r_args.setattr.mask = cpu_to_le32(mask);
1744 req->r_num_caps = 1; 1745 req->r_num_caps = 1;
@@ -1779,7 +1780,8 @@ int ceph_do_getattr(struct inode *inode, int mask)
1779 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1780 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
1780 if (IS_ERR(req)) 1781 if (IS_ERR(req))
1781 return PTR_ERR(req); 1782 return PTR_ERR(req);
1782 req->r_inode = igrab(inode); 1783 req->r_inode = inode;
1784 ihold(inode);
1783 req->r_num_caps = 1; 1785 req->r_num_caps = 1;
1784 req->r_args.getattr.mask = cpu_to_le32(mask); 1786 req->r_args.getattr.mask = cpu_to_le32(mask);
1785 err = ceph_mdsc_do_request(mdsc, NULL, req); 1787 err = ceph_mdsc_do_request(mdsc, NULL, req);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8888c9ba68db..ef0b5f48e13a 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -73,7 +73,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
73 USE_AUTH_MDS); 73 USE_AUTH_MDS);
74 if (IS_ERR(req)) 74 if (IS_ERR(req))
75 return PTR_ERR(req); 75 return PTR_ERR(req);
76 req->r_inode = igrab(inode); 76 req->r_inode = inode;
77 ihold(inode);
77 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; 78 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
78 79
79 req->r_args.setlayout.layout.fl_stripe_unit = 80 req->r_args.setlayout.layout.fl_stripe_unit =
@@ -135,7 +136,8 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
135 136
136 if (IS_ERR(req)) 137 if (IS_ERR(req))
137 return PTR_ERR(req); 138 return PTR_ERR(req);
138 req->r_inode = igrab(inode); 139 req->r_inode = inode;
140 ihold(inode);
139 141
140 req->r_args.setlayout.layout.fl_stripe_unit = 142 req->r_args.setlayout.layout.fl_stripe_unit =
141 cpu_to_le32(l.stripe_unit); 143 cpu_to_le32(l.stripe_unit);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 476b329867d4..80576d05d687 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -23,7 +23,8 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
24 if (IS_ERR(req)) 24 if (IS_ERR(req))
25 return PTR_ERR(req); 25 return PTR_ERR(req);
26 req->r_inode = igrab(inode); 26 req->r_inode = inode;
27 ihold(inode);
27 28
28 /* mds requires start and length rather than start and end */ 29 /* mds requires start and length rather than start and end */
29 if (LLONG_MAX == fl->fl_end) 30 if (LLONG_MAX == fl->fl_end)
@@ -32,11 +33,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
32 length = fl->fl_end - fl->fl_start + 1; 33 length = fl->fl_end - fl->fl_start + 1;
33 34
34 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 35 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
35 "length: %llu, wait: %d, type`: %d", (int)lock_type, 36 "length: %llu, wait: %d, type: %d", (int)lock_type,
36 (int)operation, (u64)fl->fl_pid, fl->fl_start, 37 (int)operation, (u64)fl->fl_pid, fl->fl_start,
37 length, wait, fl->fl_type); 38 length, wait, fl->fl_type);
38 39
39
40 req->r_args.filelock_change.rule = lock_type; 40 req->r_args.filelock_change.rule = lock_type;
41 req->r_args.filelock_change.type = cmd; 41 req->r_args.filelock_change.type = cmd;
42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); 42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
@@ -70,7 +70,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
70 } 70 }
71 ceph_mdsc_put_request(req); 71 ceph_mdsc_put_request(req);
72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
73 "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type, 73 "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
74 (int)operation, (u64)fl->fl_pid, fl->fl_start, 74 (int)operation, (u64)fl->fl_pid, fl->fl_start,
75 length, wait, fl->fl_type, err); 75 length, wait, fl->fl_type, err);
76 return err; 76 return err;
@@ -109,16 +109,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
109 dout("mds locked, locking locally"); 109 dout("mds locked, locking locally");
110 err = posix_lock_file(file, fl, NULL); 110 err = posix_lock_file(file, fl, NULL);
111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
112 /* undo! This should only happen if the kernel detects 112 /* undo! This should only happen if
113 * local deadlock. */ 113 * the kernel detects local
114 * deadlock. */
114 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 115 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
115 CEPH_LOCK_UNLOCK, 0, fl); 116 CEPH_LOCK_UNLOCK, 0, fl);
116 dout("got %d on posix_lock_file, undid lock", err); 117 dout("got %d on posix_lock_file, undid lock",
118 err);
117 } 119 }
118 } 120 }
119 121
120 } else { 122 } else if (err == -ERESTARTSYS) {
121 dout("mds returned error code %d", err); 123 dout("undoing lock\n");
124 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
125 CEPH_LOCK_UNLOCK, 0, fl);
122 } 126 }
123 return err; 127 return err;
124} 128}
@@ -155,8 +159,11 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
155 file, CEPH_LOCK_UNLOCK, 0, fl); 159 file, CEPH_LOCK_UNLOCK, 0, fl);
156 dout("got %d on flock_lock_file_wait, undid lock", err); 160 dout("got %d on flock_lock_file_wait, undid lock", err);
157 } 161 }
158 } else { 162 } else if (err == -ERESTARTSYS) {
159 dout("mds error code %d", err); 163 dout("undoing lock\n");
164 ceph_lock_message(CEPH_LOCK_FLOCK,
165 CEPH_MDS_OP_SETFILELOCK,
166 file, CEPH_LOCK_UNLOCK, 0, fl);
160 } 167 }
161 return err; 168 return err;
162} 169}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 24067d68a554..54b14de2e729 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -722,7 +722,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
722 ci = list_first_entry(&mdsc->snap_flush_list, 722 ci = list_first_entry(&mdsc->snap_flush_list,
723 struct ceph_inode_info, i_snap_flush_item); 723 struct ceph_inode_info, i_snap_flush_item);
724 inode = &ci->vfs_inode; 724 inode = &ci->vfs_inode;
725 igrab(inode); 725 ihold(inode);
726 spin_unlock(&mdsc->snap_flush_lock); 726 spin_unlock(&mdsc->snap_flush_lock);
727 spin_lock(&inode->i_lock); 727 spin_lock(&inode->i_lock);
728 __ceph_flush_snaps(ci, &session, 0); 728 __ceph_flush_snaps(ci, &session, 0);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f2b628696180..f42d730f1b66 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -665,7 +665,8 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
665 err = PTR_ERR(req); 665 err = PTR_ERR(req);
666 goto out; 666 goto out;
667 } 667 }
668 req->r_inode = igrab(inode); 668 req->r_inode = inode;
669 ihold(inode);
669 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 670 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
670 req->r_num_caps = 1; 671 req->r_num_caps = 1;
671 req->r_args.setxattr.flags = cpu_to_le32(flags); 672 req->r_args.setxattr.flags = cpu_to_le32(flags);
@@ -795,7 +796,8 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
795 USE_AUTH_MDS); 796 USE_AUTH_MDS);
796 if (IS_ERR(req)) 797 if (IS_ERR(req))
797 return PTR_ERR(req); 798 return PTR_ERR(req);
798 req->r_inode = igrab(inode); 799 req->r_inode = inode;
800 ihold(inode);
799 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 801 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
800 req->r_num_caps = 1; 802 req->r_num_caps = 1;
801 req->r_path2 = kstrdup(name, GFP_NOFS); 803 req->r_path2 = kstrdup(name, GFP_NOFS);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 1cd4c3a1862d..53ed1ad2c112 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
7 select CRYPTO_MD5 7 select CRYPTO_MD5
8 select CRYPTO_HMAC 8 select CRYPTO_HMAC
9 select CRYPTO_ARC4 9 select CRYPTO_ARC4
10 select CRYPTO_ECB
10 select CRYPTO_DES 11 select CRYPTO_DES
11 help 12 help
12 This is the client VFS module for the Common Internet File System 13 This is the client VFS module for the Common Internet File System
@@ -148,7 +149,7 @@ config CIFS_FSCACHE
148 149
149config CIFS_ACL 150config CIFS_ACL
150 bool "Provide CIFS ACL support (EXPERIMENTAL)" 151 bool "Provide CIFS ACL support (EXPERIMENTAL)"
151 depends on EXPERIMENTAL && CIFS_XATTR 152 depends on EXPERIMENTAL && CIFS_XATTR && KEYS
152 help 153 help
153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 154 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
154 is handed over to the application/caller. 155 is handed over to the application/caller.
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index dd8584d35a14..545509c3313b 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -92,7 +92,7 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
92 break; 92 break;
93 93
94 default: 94 default:
95 cERROR(1, "CIFS: Unknown network family '%d'", sa->sa_family); 95 cERROR(1, "Unknown network family '%d'", sa->sa_family);
96 key_len = 0; 96 key_len = 0;
97 break; 97 break;
98 } 98 }
@@ -152,7 +152,7 @@ static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
152 152
153 sharename = extract_sharename(tcon->treeName); 153 sharename = extract_sharename(tcon->treeName);
154 if (IS_ERR(sharename)) { 154 if (IS_ERR(sharename)) {
155 cFYI(1, "CIFS: couldn't extract sharename\n"); 155 cFYI(1, "%s: couldn't extract sharename\n", __func__);
156 sharename = NULL; 156 sharename = NULL;
157 return 0; 157 return 0;
158 } 158 }
@@ -302,7 +302,7 @@ static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data)
302 pagevec_init(&pvec, 0); 302 pagevec_init(&pvec, 0);
303 first = 0; 303 first = 0;
304 304
305 cFYI(1, "cifs inode 0x%p now uncached", cifsi); 305 cFYI(1, "%s: cifs inode 0x%p now uncached", __func__, cifsi);
306 306
307 for (;;) { 307 for (;;) {
308 nr_pages = pagevec_lookup(&pvec, 308 nr_pages = pagevec_lookup(&pvec,
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index dfbd9f1f373d..5a0ee7f2af06 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -184,7 +184,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
184 if (cifs_pdu == NULL || server == NULL) 184 if (cifs_pdu == NULL || server == NULL)
185 return -EINVAL; 185 return -EINVAL;
186 186
187 if (cifs_pdu->Command == SMB_COM_NEGOTIATE) 187 if (!server->session_estab)
188 return 0; 188 return 0;
189 189
190 if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) { 190 if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 989442dcfb45..2f0c58646c10 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -257,9 +257,6 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
257{ 257{
258 struct cifs_sb_info *cifs_sb; 258 struct cifs_sb_info *cifs_sb;
259 259
260 if (flags & IPERM_FLAG_RCU)
261 return -ECHILD;
262
263 cifs_sb = CIFS_SB(inode->i_sb); 260 cifs_sb = CIFS_SB(inode->i_sb);
264 261
265 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { 262 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
@@ -352,6 +349,37 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
352 } 349 }
353} 350}
354 351
352static void
353cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
354{
355 seq_printf(s, ",sec=");
356
357 switch (server->secType) {
358 case LANMAN:
359 seq_printf(s, "lanman");
360 break;
361 case NTLMv2:
362 seq_printf(s, "ntlmv2");
363 break;
364 case NTLM:
365 seq_printf(s, "ntlm");
366 break;
367 case Kerberos:
368 seq_printf(s, "krb5");
369 break;
370 case RawNTLMSSP:
371 seq_printf(s, "ntlmssp");
372 break;
373 default:
374 /* shouldn't ever happen */
375 seq_printf(s, "unknown");
376 break;
377 }
378
379 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
380 seq_printf(s, "i");
381}
382
355/* 383/*
356 * cifs_show_options() is for displaying mount options in /proc/mounts. 384 * cifs_show_options() is for displaying mount options in /proc/mounts.
357 * Not all settable options are displayed but most of the important 385 * Not all settable options are displayed but most of the important
@@ -365,6 +393,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
365 struct sockaddr *srcaddr; 393 struct sockaddr *srcaddr;
366 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; 394 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
367 395
396 cifs_show_security(s, tcon->ses->server);
397
368 seq_printf(s, ",unc=%s", tcon->treeName); 398 seq_printf(s, ",unc=%s", tcon->treeName);
369 399
370 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) 400 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 64313f778ebf..0900e1658c96 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -129,5 +129,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
129extern const struct export_operations cifs_export_ops; 129extern const struct export_operations cifs_export_ops;
130#endif /* CIFS_NFSD_EXPORT */ 130#endif /* CIFS_NFSD_EXPORT */
131 131
132#define CIFS_VERSION "1.72" 132#define CIFS_VERSION "1.73"
133#endif /* _CIFSFS_H */ 133#endif /* _CIFSFS_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6d88b82537c3..12cf72dd0c42 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -152,7 +152,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
152 mid_entry->callback(mid_entry); 152 mid_entry->callback(mid_entry);
153 } 153 }
154 154
155 while (server->tcpStatus == CifsNeedReconnect) { 155 do {
156 try_to_freeze(); 156 try_to_freeze();
157 157
158 /* we should try only the port we connected to before */ 158 /* we should try only the port we connected to before */
@@ -167,7 +167,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
167 server->tcpStatus = CifsNeedNegotiate; 167 server->tcpStatus = CifsNeedNegotiate;
168 spin_unlock(&GlobalMid_Lock); 168 spin_unlock(&GlobalMid_Lock);
169 } 169 }
170 } 170 } while (server->tcpStatus == CifsNeedReconnect);
171 171
172 return rc; 172 return rc;
173} 173}
@@ -784,7 +784,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
784 struct smb_vol *vol) 784 struct smb_vol *vol)
785{ 785{
786 char *value, *data, *end; 786 char *value, *data, *end;
787 char *mountdata_copy, *options; 787 char *mountdata_copy = NULL, *options;
788 unsigned int temp_len, i, j; 788 unsigned int temp_len, i, j;
789 char separator[2]; 789 char separator[2];
790 short int override_uid = -1; 790 short int override_uid = -1;
@@ -1391,7 +1391,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1391 "/proc/fs/cifs/LookupCacheEnabled to 0\n"); 1391 "/proc/fs/cifs/LookupCacheEnabled to 0\n");
1392 } else if (strnicmp(data, "fsc", 3) == 0) { 1392 } else if (strnicmp(data, "fsc", 3) == 0) {
1393#ifndef CONFIG_CIFS_FSCACHE 1393#ifndef CONFIG_CIFS_FSCACHE
1394 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" 1394 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE "
1395 "kernel config option set"); 1395 "kernel config option set");
1396 goto cifs_parse_mount_err; 1396 goto cifs_parse_mount_err;
1397#endif 1397#endif
@@ -1976,7 +1976,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1976 warned_on_ntlm = true; 1976 warned_on_ntlm = true;
1977 cERROR(1, "default security mechanism requested. The default " 1977 cERROR(1, "default security mechanism requested. The default "
1978 "security mechanism will be upgraded from ntlm to " 1978 "security mechanism will be upgraded from ntlm to "
1979 "ntlmv2 in kernel release 2.6.41"); 1979 "ntlmv2 in kernel release 3.1");
1980 } 1980 }
1981 ses->overrideSecFlg = volume_info->secFlg; 1981 ses->overrideSecFlg = volume_info->secFlg;
1982 1982
@@ -2149,7 +2149,10 @@ cifs_put_tlink(struct tcon_link *tlink)
2149} 2149}
2150 2150
2151static inline struct tcon_link * 2151static inline struct tcon_link *
2152cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb); 2152cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
2153{
2154 return cifs_sb->master_tlink;
2155}
2153 2156
2154static int 2157static int
2155compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) 2158compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
@@ -3171,6 +3174,10 @@ out:
3171 return rc; 3174 return rc;
3172} 3175}
3173 3176
3177/*
3178 * Issue a TREE_CONNECT request. Note that for IPC$ shares, that the tcon
3179 * pointer may be NULL.
3180 */
3174int 3181int
3175CIFSTCon(unsigned int xid, struct cifs_ses *ses, 3182CIFSTCon(unsigned int xid, struct cifs_ses *ses,
3176 const char *tree, struct cifs_tcon *tcon, 3183 const char *tree, struct cifs_tcon *tcon,
@@ -3205,7 +3212,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
3205 pSMB->AndXCommand = 0xFF; 3212 pSMB->AndXCommand = 0xFF;
3206 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); 3213 pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO);
3207 bcc_ptr = &pSMB->Password[0]; 3214 bcc_ptr = &pSMB->Password[0];
3208 if ((ses->server->sec_mode) & SECMODE_USER) { 3215 if (!tcon || (ses->server->sec_mode & SECMODE_USER)) {
3209 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ 3216 pSMB->PasswordLength = cpu_to_le16(1); /* minimum */
3210 *bcc_ptr = 0; /* password is null byte */ 3217 *bcc_ptr = 0; /* password is null byte */
3211 bcc_ptr++; /* skip password */ 3218 bcc_ptr++; /* skip password */
@@ -3371,7 +3378,7 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses)
3371 } 3378 }
3372 if (rc == 0) { 3379 if (rc == 0) {
3373 spin_lock(&GlobalMid_Lock); 3380 spin_lock(&GlobalMid_Lock);
3374 if (server->tcpStatus != CifsExiting) 3381 if (server->tcpStatus == CifsNeedNegotiate)
3375 server->tcpStatus = CifsGood; 3382 server->tcpStatus = CifsGood;
3376 else 3383 else
3377 rc = -EHOSTDOWN; 3384 rc = -EHOSTDOWN;
@@ -3484,12 +3491,6 @@ out:
3484 return tcon; 3491 return tcon;
3485} 3492}
3486 3493
3487static inline struct tcon_link *
3488cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
3489{
3490 return cifs_sb->master_tlink;
3491}
3492
3493struct cifs_tcon * 3494struct cifs_tcon *
3494cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) 3495cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
3495{ 3496{
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index d368a47ba5eb..816696621ec9 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -28,14 +28,14 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
28 server->fscache = 28 server->fscache =
29 fscache_acquire_cookie(cifs_fscache_netfs.primary_index, 29 fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
30 &cifs_fscache_server_index_def, server); 30 &cifs_fscache_server_index_def, server);
31 cFYI(1, "CIFS: get client cookie (0x%p/0x%p)", server, 31 cFYI(1, "%s: (0x%p/0x%p)", __func__, server,
32 server->fscache); 32 server->fscache);
33} 33}
34 34
35void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) 35void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
36{ 36{
37 cFYI(1, "CIFS: release client cookie (0x%p/0x%p)", server, 37 cFYI(1, "%s: (0x%p/0x%p)", __func__, server,
38 server->fscache); 38 server->fscache);
39 fscache_relinquish_cookie(server->fscache, 0); 39 fscache_relinquish_cookie(server->fscache, 0);
40 server->fscache = NULL; 40 server->fscache = NULL;
41} 41}
@@ -47,13 +47,13 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
47 tcon->fscache = 47 tcon->fscache =
48 fscache_acquire_cookie(server->fscache, 48 fscache_acquire_cookie(server->fscache,
49 &cifs_fscache_super_index_def, tcon); 49 &cifs_fscache_super_index_def, tcon);
50 cFYI(1, "CIFS: get superblock cookie (0x%p/0x%p)", 50 cFYI(1, "%s: (0x%p/0x%p)", __func__, server->fscache,
51 server->fscache, tcon->fscache); 51 tcon->fscache);
52} 52}
53 53
54void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) 54void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
55{ 55{
56 cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache); 56 cFYI(1, "%s: (0x%p)", __func__, tcon->fscache);
57 fscache_relinquish_cookie(tcon->fscache, 0); 57 fscache_relinquish_cookie(tcon->fscache, 0);
58 tcon->fscache = NULL; 58 tcon->fscache = NULL;
59} 59}
@@ -70,8 +70,8 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
70 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) { 70 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
71 cifsi->fscache = fscache_acquire_cookie(tcon->fscache, 71 cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
72 &cifs_fscache_inode_object_def, cifsi); 72 &cifs_fscache_inode_object_def, cifsi);
73 cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache, 73 cFYI(1, "%s: got FH cookie (0x%p/0x%p)", __func__,
74 cifsi->fscache); 74 tcon->fscache, cifsi->fscache);
75 } 75 }
76} 76}
77 77
@@ -80,8 +80,7 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
80 struct cifsInodeInfo *cifsi = CIFS_I(inode); 80 struct cifsInodeInfo *cifsi = CIFS_I(inode);
81 81
82 if (cifsi->fscache) { 82 if (cifsi->fscache) {
83 cFYI(1, "CIFS releasing inode cookie (0x%p)", 83 cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
84 cifsi->fscache);
85 fscache_relinquish_cookie(cifsi->fscache, 0); 84 fscache_relinquish_cookie(cifsi->fscache, 0);
86 cifsi->fscache = NULL; 85 cifsi->fscache = NULL;
87 } 86 }
@@ -92,8 +91,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode)
92 struct cifsInodeInfo *cifsi = CIFS_I(inode); 91 struct cifsInodeInfo *cifsi = CIFS_I(inode);
93 92
94 if (cifsi->fscache) { 93 if (cifsi->fscache) {
95 cFYI(1, "CIFS disabling inode cookie (0x%p)", 94 cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache);
96 cifsi->fscache);
97 fscache_relinquish_cookie(cifsi->fscache, 1); 95 fscache_relinquish_cookie(cifsi->fscache, 1);
98 cifsi->fscache = NULL; 96 cifsi->fscache = NULL;
99 } 97 }
@@ -121,8 +119,8 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode)
121 cifs_sb_master_tcon(cifs_sb)->fscache, 119 cifs_sb_master_tcon(cifs_sb)->fscache,
122 &cifs_fscache_inode_object_def, 120 &cifs_fscache_inode_object_def,
123 cifsi); 121 cifsi);
124 cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p", 122 cFYI(1, "%s: new cookie 0x%p oldcookie 0x%p",
125 cifsi->fscache, old); 123 __func__, cifsi->fscache, old);
126 } 124 }
127} 125}
128 126
@@ -132,8 +130,8 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp)
132 struct inode *inode = page->mapping->host; 130 struct inode *inode = page->mapping->host;
133 struct cifsInodeInfo *cifsi = CIFS_I(inode); 131 struct cifsInodeInfo *cifsi = CIFS_I(inode);
134 132
135 cFYI(1, "CIFS: fscache release page (0x%p/0x%p)", 133 cFYI(1, "%s: (0x%p/0x%p)", __func__, page,
136 page, cifsi->fscache); 134 cifsi->fscache);
137 if (!fscache_maybe_release_page(cifsi->fscache, page, gfp)) 135 if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
138 return 0; 136 return 0;
139 } 137 }
@@ -144,8 +142,7 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp)
144static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx, 142static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
145 int error) 143 int error)
146{ 144{
147 cFYI(1, "CFS: readpage_from_fscache_complete (0x%p/%d)", 145 cFYI(1, "%s: (0x%p/%d)", __func__, page, error);
148 page, error);
149 if (!error) 146 if (!error)
150 SetPageUptodate(page); 147 SetPageUptodate(page);
151 unlock_page(page); 148 unlock_page(page);
@@ -158,7 +155,7 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
158{ 155{
159 int ret; 156 int ret;
160 157
161 cFYI(1, "CIFS: readpage_from_fscache(fsc:%p, p:%p, i:0x%p", 158 cFYI(1, "%s: (fsc:%p, p:%p, i:0x%p", __func__,
162 CIFS_I(inode)->fscache, page, inode); 159 CIFS_I(inode)->fscache, page, inode);
163 ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page, 160 ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
164 cifs_readpage_from_fscache_complete, 161 cifs_readpage_from_fscache_complete,
@@ -167,11 +164,11 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
167 switch (ret) { 164 switch (ret) {
168 165
169 case 0: /* page found in fscache, read submitted */ 166 case 0: /* page found in fscache, read submitted */
170 cFYI(1, "CIFS: readpage_from_fscache: submitted"); 167 cFYI(1, "%s: submitted", __func__);
171 return ret; 168 return ret;
172 case -ENOBUFS: /* page won't be cached */ 169 case -ENOBUFS: /* page won't be cached */
173 case -ENODATA: /* page not in cache */ 170 case -ENODATA: /* page not in cache */
174 cFYI(1, "CIFS: readpage_from_fscache %d", ret); 171 cFYI(1, "%s: %d", __func__, ret);
175 return 1; 172 return 1;
176 173
177 default: 174 default:
@@ -190,7 +187,7 @@ int __cifs_readpages_from_fscache(struct inode *inode,
190{ 187{
191 int ret; 188 int ret;
192 189
193 cFYI(1, "CIFS: __cifs_readpages_from_fscache (0x%p/%u/0x%p)", 190 cFYI(1, "%s: (0x%p/%u/0x%p)", __func__,
194 CIFS_I(inode)->fscache, *nr_pages, inode); 191 CIFS_I(inode)->fscache, *nr_pages, inode);
195 ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping, 192 ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
196 pages, nr_pages, 193 pages, nr_pages,
@@ -199,12 +196,12 @@ int __cifs_readpages_from_fscache(struct inode *inode,
199 mapping_gfp_mask(mapping)); 196 mapping_gfp_mask(mapping));
200 switch (ret) { 197 switch (ret) {
201 case 0: /* read submitted to the cache for all pages */ 198 case 0: /* read submitted to the cache for all pages */
202 cFYI(1, "CIFS: readpages_from_fscache: submitted"); 199 cFYI(1, "%s: submitted", __func__);
203 return ret; 200 return ret;
204 201
205 case -ENOBUFS: /* some pages are not cached and can't be */ 202 case -ENOBUFS: /* some pages are not cached and can't be */
206 case -ENODATA: /* some pages are not cached */ 203 case -ENODATA: /* some pages are not cached */
207 cFYI(1, "CIFS: readpages_from_fscache: no page"); 204 cFYI(1, "%s: no page", __func__);
208 return 1; 205 return 1;
209 206
210 default: 207 default:
@@ -218,7 +215,7 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
218{ 215{
219 int ret; 216 int ret;
220 217
221 cFYI(1, "CIFS: readpage_to_fscache(fsc: %p, p: %p, i: %p", 218 cFYI(1, "%s: (fsc: %p, p: %p, i: %p)", __func__,
222 CIFS_I(inode)->fscache, page, inode); 219 CIFS_I(inode)->fscache, page, inode);
223 ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL); 220 ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
224 if (ret != 0) 221 if (ret != 0)
@@ -230,7 +227,7 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
230 struct cifsInodeInfo *cifsi = CIFS_I(inode); 227 struct cifsInodeInfo *cifsi = CIFS_I(inode);
231 struct fscache_cookie *cookie = cifsi->fscache; 228 struct fscache_cookie *cookie = cifsi->fscache;
232 229
233 cFYI(1, "CIFS: fscache invalidatepage (0x%p/0x%p)", page, cookie); 230 cFYI(1, "%s: (0x%p/0x%p)", __func__, page, cookie);
234 fscache_wait_on_page_write(cookie, page); 231 fscache_wait_on_page_write(cookie, page);
235 fscache_uncache_page(cookie, page); 232 fscache_uncache_page(cookie, page);
236} 233}
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 6cbb3afb36dc..cb140ef293e4 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -43,8 +43,6 @@ const struct file_operations coda_ioctl_operations = {
43/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
44static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags) 44static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
45{ 45{
46 if (flags & IPERM_FLAG_RCU)
47 return -ECHILD;
48 return (mask & MAY_EXEC) ? -EACCES : 0; 46 return (mask & MAY_EXEC) ? -EACCES : 0;
49} 47}
50 48
diff --git a/fs/dcookies.c b/fs/dcookies.c
index a21cabdbd87b..dda0dc702d1b 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -178,6 +178,8 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len)
178 /* FIXME: (deleted) ? */ 178 /* FIXME: (deleted) ? */
179 path = d_path(&dcs->path, kbuf, PAGE_SIZE); 179 path = d_path(&dcs->path, kbuf, PAGE_SIZE);
180 180
181 mutex_unlock(&dcookie_mutex);
182
181 if (IS_ERR(path)) { 183 if (IS_ERR(path)) {
182 err = PTR_ERR(path); 184 err = PTR_ERR(path);
183 goto out_free; 185 goto out_free;
@@ -194,6 +196,7 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len)
194 196
195out_free: 197out_free:
196 kfree(kbuf); 198 kfree(kbuf);
199 return err;
197out: 200out:
198 mutex_unlock(&dcookie_mutex); 201 mutex_unlock(&dcookie_mutex);
199 return err; 202 return err;
diff --git a/fs/exec.c b/fs/exec.c
index ea5f748906a8..6075a1e727ae 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1093,6 +1093,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1093 1093
1094 bprm->mm = NULL; /* We're using it now */ 1094 bprm->mm = NULL; /* We're using it now */
1095 1095
1096 set_fs(USER_DS);
1096 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); 1097 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
1097 flush_thread(); 1098 flush_thread();
1098 current->personality &= ~bprm->per_clear; 1099 current->personality &= ~bprm->per_clear;
@@ -1357,10 +1358,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1357 if (retval) 1358 if (retval)
1358 return retval; 1359 return retval;
1359 1360
1360 /* kernel module loader fixup */
1361 /* so we don't try to load run modprobe in kernel space. */
1362 set_fs(USER_DS);
1363
1364 retval = audit_bprm(bprm); 1361 retval = audit_bprm(bprm);
1365 if (retval) 1362 if (retval)
1366 return retval; 1363 return retval;
@@ -1999,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file)
1999 * is a special value that we use to trap recursive 1996 * is a special value that we use to trap recursive
2000 * core dumps 1997 * core dumps
2001 */ 1998 */
2002static int umh_pipe_setup(struct subprocess_info *info) 1999static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
2003{ 2000{
2004 struct file *rp, *wp; 2001 struct file *rp, *wp;
2005 struct fdtable *fdt; 2002 struct fdtable *fdt;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 3db5ba4568fc..b3cc8586984e 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -974,7 +974,7 @@ out_no_inode:
974out_no_read: 974out_no_read:
975 printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n", 975 printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
976 __func__, s->s_id, iso_blknum, block); 976 __func__, s->s_id, iso_blknum, block);
977 goto out_freesbi; 977 goto out_freebh;
978out_bad_zone_size: 978out_bad_zone_size:
979 printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n", 979 printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
980 sbi->s_log_zone_size); 980 sbi->s_log_zone_size);
@@ -989,6 +989,7 @@ out_unknown_format:
989 989
990out_freebh: 990out_freebh:
991 brelse(bh); 991 brelse(bh);
992 brelse(pri_bh);
992out_freesbi: 993out_freesbi:
993 kfree(opt.iocharset); 994 kfree(opt.iocharset);
994 kfree(sbi); 995 kfree(sbi);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 9ed89d1663f8..1afae26cf236 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -555,13 +555,6 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
555 return __logfs_create(dir, dentry, inode, target, destlen); 555 return __logfs_create(dir, dentry, inode, target, destlen);
556} 556}
557 557
558static int logfs_permission(struct inode *inode, int mask, unsigned int flags)
559{
560 if (flags & IPERM_FLAG_RCU)
561 return -ECHILD;
562 return generic_permission(inode, mask, flags, NULL);
563}
564
565static int logfs_link(struct dentry *old_dentry, struct inode *dir, 558static int logfs_link(struct dentry *old_dentry, struct inode *dir,
566 struct dentry *dentry) 559 struct dentry *dentry)
567{ 560{
@@ -820,7 +813,6 @@ const struct inode_operations logfs_dir_iops = {
820 .mknod = logfs_mknod, 813 .mknod = logfs_mknod,
821 .rename = logfs_rename, 814 .rename = logfs_rename,
822 .rmdir = logfs_rmdir, 815 .rmdir = logfs_rmdir,
823 .permission = logfs_permission,
824 .symlink = logfs_symlink, 816 .symlink = logfs_symlink,
825 .unlink = logfs_unlink, 817 .unlink = logfs_unlink,
826}; 818};
diff --git a/fs/namei.c b/fs/namei.c
index 9802345df5e7..0223c41fb114 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -238,7 +238,8 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
238 238
239 /* 239 /*
240 * Read/write DACs are always overridable. 240 * Read/write DACs are always overridable.
241 * Executable DACs are overridable if at least one exec bit is set. 241 * Executable DACs are overridable for all directories and
242 * for non-directories that have least one exec bit set.
242 */ 243 */
243 if (!(mask & MAY_EXEC) || execute_ok(inode)) 244 if (!(mask & MAY_EXEC) || execute_ok(inode))
244 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) 245 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
@@ -812,6 +813,11 @@ static int follow_automount(struct path *path, unsigned flags,
812 if (!mnt) /* mount collision */ 813 if (!mnt) /* mount collision */
813 return 0; 814 return 0;
814 815
816 if (!*need_mntput) {
817 /* lock_mount() may release path->mnt on error */
818 mntget(path->mnt);
819 *need_mntput = true;
820 }
815 err = finish_automount(mnt, path); 821 err = finish_automount(mnt, path);
816 822
817 switch (err) { 823 switch (err) {
@@ -819,12 +825,9 @@ static int follow_automount(struct path *path, unsigned flags,
819 /* Someone else made a mount here whilst we were busy */ 825 /* Someone else made a mount here whilst we were busy */
820 return 0; 826 return 0;
821 case 0: 827 case 0:
822 dput(path->dentry); 828 path_put(path);
823 if (*need_mntput)
824 mntput(path->mnt);
825 path->mnt = mnt; 829 path->mnt = mnt;
826 path->dentry = dget(mnt->mnt_root); 830 path->dentry = dget(mnt->mnt_root);
827 *need_mntput = true;
828 return 0; 831 return 0;
829 default: 832 default:
830 return err; 833 return err;
@@ -844,9 +847,10 @@ static int follow_automount(struct path *path, unsigned flags,
844 */ 847 */
845static int follow_managed(struct path *path, unsigned flags) 848static int follow_managed(struct path *path, unsigned flags)
846{ 849{
850 struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
847 unsigned managed; 851 unsigned managed;
848 bool need_mntput = false; 852 bool need_mntput = false;
849 int ret; 853 int ret = 0;
850 854
851 /* Given that we're not holding a lock here, we retain the value in a 855 /* Given that we're not holding a lock here, we retain the value in a
852 * local variable for each dentry as we look at it so that we don't see 856 * local variable for each dentry as we look at it so that we don't see
@@ -861,7 +865,7 @@ static int follow_managed(struct path *path, unsigned flags)
861 BUG_ON(!path->dentry->d_op->d_manage); 865 BUG_ON(!path->dentry->d_op->d_manage);
862 ret = path->dentry->d_op->d_manage(path->dentry, false); 866 ret = path->dentry->d_op->d_manage(path->dentry, false);
863 if (ret < 0) 867 if (ret < 0)
864 return ret == -EISDIR ? 0 : ret; 868 break;
865 } 869 }
866 870
867 /* Transit to a mounted filesystem. */ 871 /* Transit to a mounted filesystem. */
@@ -887,14 +891,19 @@ static int follow_managed(struct path *path, unsigned flags)
887 if (managed & DCACHE_NEED_AUTOMOUNT) { 891 if (managed & DCACHE_NEED_AUTOMOUNT) {
888 ret = follow_automount(path, flags, &need_mntput); 892 ret = follow_automount(path, flags, &need_mntput);
889 if (ret < 0) 893 if (ret < 0)
890 return ret == -EISDIR ? 0 : ret; 894 break;
891 continue; 895 continue;
892 } 896 }
893 897
894 /* We didn't change the current path point */ 898 /* We didn't change the current path point */
895 break; 899 break;
896 } 900 }
897 return 0; 901
902 if (need_mntput && path->mnt == mnt)
903 mntput(path->mnt);
904 if (ret == -EISDIR)
905 ret = 0;
906 return ret;
898} 907}
899 908
900int follow_down_one(struct path *path) 909int follow_down_one(struct path *path)
@@ -1003,9 +1012,6 @@ failed:
1003 * Follow down to the covering mount currently visible to userspace. At each 1012 * Follow down to the covering mount currently visible to userspace. At each
1004 * point, the filesystem owning that dentry may be queried as to whether the 1013 * point, the filesystem owning that dentry may be queried as to whether the
1005 * caller is permitted to proceed or not. 1014 * caller is permitted to proceed or not.
1006 *
1007 * Care must be taken as namespace_sem may be held (indicated by mounting_here
1008 * being true).
1009 */ 1015 */
1010int follow_down(struct path *path) 1016int follow_down(struct path *path)
1011{ 1017{
@@ -2713,8 +2719,10 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2713 error = PTR_ERR(dentry); 2719 error = PTR_ERR(dentry);
2714 if (!IS_ERR(dentry)) { 2720 if (!IS_ERR(dentry)) {
2715 /* Why not before? Because we want correct error value */ 2721 /* Why not before? Because we want correct error value */
2722 if (nd.last.name[nd.last.len])
2723 goto slashes;
2716 inode = dentry->d_inode; 2724 inode = dentry->d_inode;
2717 if (nd.last.name[nd.last.len] || !inode) 2725 if (!inode)
2718 goto slashes; 2726 goto slashes;
2719 ihold(inode); 2727 ihold(inode);
2720 error = mnt_want_write(nd.path.mnt); 2728 error = mnt_want_write(nd.path.mnt);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 18b3e8975fe0..fbb2a5ef5817 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -82,6 +82,7 @@ config NFSD_V4
82 select NFSD_V3 82 select NFSD_V3
83 select FS_POSIX_ACL 83 select FS_POSIX_ACL
84 select SUNRPC_GSS 84 select SUNRPC_GSS
85 select CRYPTO
85 help 86 help
86 This option enables support in your system's NFS server for 87 This option enables support in your system's NFS server for
87 version 4 of the NFS protocol (RFC 3530). 88 version 4 of the NFS protocol (RFC 3530).
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1f5eae40f34e..2b1449dd2f49 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -13,6 +13,7 @@
13#include <linux/lockd/lockd.h> 13#include <linux/lockd/lockd.h>
14#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/gss_api.h> 15#include <linux/sunrpc/gss_api.h>
16#include <linux/sunrpc/gss_krb5_enctypes.h>
16 17
17#include "idmap.h" 18#include "idmap.h"
18#include "nfsd.h" 19#include "nfsd.h"
@@ -189,18 +190,10 @@ static struct file_operations export_features_operations = {
189 .release = single_release, 190 .release = single_release,
190}; 191};
191 192
192#ifdef CONFIG_SUNRPC_GSS 193#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
193static int supported_enctypes_show(struct seq_file *m, void *v) 194static int supported_enctypes_show(struct seq_file *m, void *v)
194{ 195{
195 struct gss_api_mech *k5mech; 196 seq_printf(m, KRB5_SUPPORTED_ENCTYPES);
196
197 k5mech = gss_mech_get_by_name("krb5");
198 if (k5mech == NULL)
199 goto out;
200 if (k5mech->gm_upcall_enctypes != NULL)
201 seq_printf(m, k5mech->gm_upcall_enctypes);
202 gss_mech_put(k5mech);
203out:
204 return 0; 197 return 0;
205} 198}
206 199
@@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = {
215 .llseek = seq_lseek, 208 .llseek = seq_lseek,
216 .release = single_release, 209 .release = single_release,
217}; 210};
218#endif /* CONFIG_SUNRPC_GSS */ 211#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
219 212
220extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); 213extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
221extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); 214extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
@@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1427 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, 1420 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
1428 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, 1421 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
1429 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, 1422 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
1430#ifdef CONFIG_SUNRPC_GSS 1423#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
1431 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, 1424 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
1432#endif /* CONFIG_SUNRPC_GSS */ 1425#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
1433#ifdef CONFIG_NFSD_V4 1426#ifdef CONFIG_NFSD_V4
1434 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1427 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
1435 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1428 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d5718273bb32..fd0acca5370a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -696,7 +696,15 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
696} 696}
697#endif /* CONFIG_NFSD_V3 */ 697#endif /* CONFIG_NFSD_V3 */
698 698
699static int nfsd_open_break_lease(struct inode *inode, int access)
700{
701 unsigned int mode;
699 702
703 if (access & NFSD_MAY_NOT_BREAK_LEASE)
704 return 0;
705 mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
706 return break_lease(inode, mode | O_NONBLOCK);
707}
700 708
701/* 709/*
702 * Open an existing file or directory. 710 * Open an existing file or directory.
@@ -744,12 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
744 if (!inode->i_fop) 752 if (!inode->i_fop)
745 goto out; 753 goto out;
746 754
747 /* 755 host_err = nfsd_open_break_lease(inode, access);
748 * Check to see if there are any leases on this file.
749 * This may block while leases are broken.
750 */
751 if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
752 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
753 if (host_err) /* NOMEM or WOULDBLOCK */ 756 if (host_err) /* NOMEM or WOULDBLOCK */
754 goto out_nfserr; 757 goto out_nfserr;
755 758
@@ -1660,8 +1663,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1660 if (!dold->d_inode) 1663 if (!dold->d_inode)
1661 goto out_drop_write; 1664 goto out_drop_write;
1662 host_err = nfsd_break_lease(dold->d_inode); 1665 host_err = nfsd_break_lease(dold->d_inode);
1663 if (host_err) 1666 if (host_err) {
1667 err = nfserrno(host_err);
1664 goto out_drop_write; 1668 goto out_drop_write;
1669 }
1665 host_err = vfs_link(dold, dirp, dnew); 1670 host_err = vfs_link(dold, dirp, dnew);
1666 if (!host_err) { 1671 if (!host_err) {
1667 err = nfserrno(commit_metadata(ffhp)); 1672 err = nfserrno(commit_metadata(ffhp));
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 7eafe468a29c..b2e3ff347620 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1346,6 +1346,11 @@ static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1346 path[level].bp_bh = NULL; 1346 path[level].bp_bh = NULL;
1347} 1347}
1348 1348
1349static void nilfs_btree_nop(struct nilfs_bmap *btree,
1350 struct nilfs_btree_path *path,
1351 int level, __u64 *keyp, __u64 *ptrp)
1352{
1353}
1349 1354
1350static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1355static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1351 struct nilfs_btree_path *path, 1356 struct nilfs_btree_path *path,
@@ -1356,20 +1361,19 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1356 struct buffer_head *bh; 1361 struct buffer_head *bh;
1357 struct nilfs_btree_node *node, *parent, *sib; 1362 struct nilfs_btree_node *node, *parent, *sib;
1358 __u64 sibptr; 1363 __u64 sibptr;
1359 int pindex, level, ncmin, ncmax, ncblk, ret; 1364 int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1360 1365
1361 ret = 0; 1366 ret = 0;
1362 stats->bs_nblocks = 0; 1367 stats->bs_nblocks = 0;
1363 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1368 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1364 ncblk = nilfs_btree_nchildren_per_block(btree); 1369 ncblk = nilfs_btree_nchildren_per_block(btree);
1365 1370
1366 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1371 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1367 level < nilfs_btree_height(btree) - 1; 1372 level < nilfs_btree_height(btree) - 1;
1368 level++) { 1373 level++) {
1369 node = nilfs_btree_get_nonroot_node(path, level); 1374 node = nilfs_btree_get_nonroot_node(path, level);
1370 path[level].bp_oldreq.bpr_ptr = 1375 path[level].bp_oldreq.bpr_ptr =
1371 nilfs_btree_node_get_ptr(node, path[level].bp_index, 1376 nilfs_btree_node_get_ptr(node, dindex, ncblk);
1372 ncblk);
1373 ret = nilfs_bmap_prepare_end_ptr(btree, 1377 ret = nilfs_bmap_prepare_end_ptr(btree,
1374 &path[level].bp_oldreq, dat); 1378 &path[level].bp_oldreq, dat);
1375 if (ret < 0) 1379 if (ret < 0)
@@ -1383,6 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1383 1387
1384 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1388 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1385 pindex = path[level + 1].bp_index; 1389 pindex = path[level + 1].bp_index;
1390 dindex = pindex;
1386 1391
1387 if (pindex > 0) { 1392 if (pindex > 0) {
1388 /* left sibling */ 1393 /* left sibling */
@@ -1421,6 +1426,14 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1421 path[level].bp_sib_bh = bh; 1426 path[level].bp_sib_bh = bh;
1422 path[level].bp_op = nilfs_btree_concat_right; 1427 path[level].bp_op = nilfs_btree_concat_right;
1423 stats->bs_nblocks++; 1428 stats->bs_nblocks++;
1429 /*
1430 * When merging right sibling node
1431 * into the current node, pointer to
1432 * the right sibling node must be
1433 * terminated instead. The adjustment
1434 * below is required for that.
1435 */
1436 dindex = pindex + 1;
1424 /* continue; */ 1437 /* continue; */
1425 } 1438 }
1426 } else { 1439 } else {
@@ -1431,29 +1444,31 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1431 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1444 NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1432 path[level].bp_op = nilfs_btree_shrink; 1445 path[level].bp_op = nilfs_btree_shrink;
1433 stats->bs_nblocks += 2; 1446 stats->bs_nblocks += 2;
1447 level++;
1448 path[level].bp_op = nilfs_btree_nop;
1449 goto shrink_root_child;
1434 } else { 1450 } else {
1435 path[level].bp_op = nilfs_btree_do_delete; 1451 path[level].bp_op = nilfs_btree_do_delete;
1436 stats->bs_nblocks++; 1452 stats->bs_nblocks++;
1453 goto out;
1437 } 1454 }
1438
1439 goto out;
1440
1441 } 1455 }
1442 } 1456 }
1443 1457
1458 /* child of the root node is deleted */
1459 path[level].bp_op = nilfs_btree_do_delete;
1460 stats->bs_nblocks++;
1461
1462shrink_root_child:
1444 node = nilfs_btree_get_root(btree); 1463 node = nilfs_btree_get_root(btree);
1445 path[level].bp_oldreq.bpr_ptr = 1464 path[level].bp_oldreq.bpr_ptr =
1446 nilfs_btree_node_get_ptr(node, path[level].bp_index, 1465 nilfs_btree_node_get_ptr(node, dindex,
1447 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1466 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1448 1467
1449 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1468 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1450 if (ret < 0) 1469 if (ret < 0)
1451 goto err_out_child_node; 1470 goto err_out_child_node;
1452 1471
1453 /* child of the root node is deleted */
1454 path[level].bp_op = nilfs_btree_do_delete;
1455 stats->bs_nblocks++;
1456
1457 /* success */ 1472 /* success */
1458 out: 1473 out:
1459 *levelp = level; 1474 *levelp = level;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b954878ad6ce..b9b45fc2903e 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -801,12 +801,7 @@ out_err:
801 801
802int nilfs_permission(struct inode *inode, int mask, unsigned int flags) 802int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
803{ 803{
804 struct nilfs_root *root; 804 struct nilfs_root *root = NILFS_I(inode)->i_root;
805
806 if (flags & IPERM_FLAG_RCU)
807 return -ECHILD;
808
809 root = NILFS_I(inode)->i_root;
810 if ((mask & MAY_WRITE) && root && 805 if ((mask & MAY_WRITE) && root &&
811 root->cno != NILFS_CPTREE_CURRENT_CNO) 806 root->cno != NILFS_CPTREE_CURRENT_CNO)
812 return -EROFS; /* snapshot is not writable */ 807 return -EROFS; /* snapshot is not writable */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 141646e88fb5..bb24ab6c282f 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2573,7 +2573,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2573 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2573 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2574 2574
2575 if (nilfs->ns_interval) 2575 if (nilfs->ns_interval)
2576 sci->sc_interval = nilfs->ns_interval; 2576 sci->sc_interval = HZ * nilfs->ns_interval;
2577 if (nilfs->ns_watermark) 2577 if (nilfs->ns_watermark)
2578 sci->sc_watermark = nilfs->ns_watermark; 2578 sci->sc_watermark = nilfs->ns_watermark;
2579 return sci; 2579 return sci;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 14def991d9dd..8a84210ca080 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2169,11 +2169,7 @@ static const struct file_operations proc_fd_operations = {
2169 */ 2169 */
2170static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) 2170static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
2171{ 2171{
2172 int rv; 2172 int rv = generic_permission(inode, mask, flags, NULL);
2173
2174 if (flags & IPERM_FLAG_RCU)
2175 return -ECHILD;
2176 rv = generic_permission(inode, mask, flags, NULL);
2177 if (rv == 0) 2173 if (rv == 0)
2178 return 0; 2174 return 0;
2179 if (task_pid(current) == proc_pid(inode)) 2175 if (task_pid(current) == proc_pid(inode))
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 781dec5bd682..be177f702acb 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -38,18 +38,21 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
38 struct inode *inode; 38 struct inode *inode;
39 struct proc_inode *ei; 39 struct proc_inode *ei;
40 struct dentry *error = ERR_PTR(-ENOENT); 40 struct dentry *error = ERR_PTR(-ENOENT);
41 void *ns;
41 42
42 inode = proc_pid_make_inode(dir->i_sb, task); 43 inode = proc_pid_make_inode(dir->i_sb, task);
43 if (!inode) 44 if (!inode)
44 goto out; 45 goto out;
45 46
47 ns = ns_ops->get(task);
48 if (!ns)
49 goto out_iput;
50
46 ei = PROC_I(inode); 51 ei = PROC_I(inode);
47 inode->i_mode = S_IFREG|S_IRUSR; 52 inode->i_mode = S_IFREG|S_IRUSR;
48 inode->i_fop = &ns_file_operations; 53 inode->i_fop = &ns_file_operations;
49 ei->ns_ops = ns_ops; 54 ei->ns_ops = ns_ops;
50 ei->ns = ns_ops->get(task); 55 ei->ns = ns;
51 if (!ei->ns)
52 goto out_iput;
53 56
54 dentry->d_op = &pid_dentry_operations; 57 dentry->d_op = &pid_dentry_operations;
55 d_add(dentry, inode); 58 d_add(dentry, inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f50133c11c24..d167de365a8d 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -304,9 +304,6 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
304 struct ctl_table *table; 304 struct ctl_table *table;
305 int error; 305 int error;
306 306
307 if (flags & IPERM_FLAG_RCU)
308 return -ECHILD;
309
310 /* Executable files are not allowed under /proc/sys/ */ 307 /* Executable files are not allowed under /proc/sys/ */
311 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) 308 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
312 return -EACCES; 309 return -EACCES;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index a9000e9cfee5..d6c3b416529b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -28,11 +28,12 @@ static int proc_test_super(struct super_block *sb, void *data)
28 28
29static int proc_set_super(struct super_block *sb, void *data) 29static int proc_set_super(struct super_block *sb, void *data)
30{ 30{
31 struct pid_namespace *ns; 31 int err = set_anon_super(sb, NULL);
32 32 if (!err) {
33 ns = (struct pid_namespace *)data; 33 struct pid_namespace *ns = (struct pid_namespace *)data;
34 sb->s_fs_info = get_pid_ns(ns); 34 sb->s_fs_info = get_pid_ns(ns);
35 return set_anon_super(sb, NULL); 35 }
36 return err;
36} 37}
37 38
38static struct dentry *proc_mount(struct file_system_type *fs_type, 39static struct dentry *proc_mount(struct file_system_type *fs_type,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e8a62f41b458..d78089690965 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -954,8 +954,6 @@ static int xattr_mount_check(struct super_block *s)
954 954
955int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) 955int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
956{ 956{
957 if (flags & IPERM_FLAG_RCU)
958 return -ECHILD;
959 /* 957 /*
960 * We don't do permission checks on the internal objects. 958 * We don't do permission checks on the internal objects.
961 * Permissions are determined by the "owning" object. 959 * Permissions are determined by the "owning" object.
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 266895783b47..e34f0d99ea4e 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data)
95 return error; 95 return error;
96} 96}
97 97
98static void free_sysfs_super_info(struct sysfs_super_info *info)
99{
100 int type;
101 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
102 kobj_ns_drop(type, info->ns[type]);
103 kfree(info);
104}
105
98static struct dentry *sysfs_mount(struct file_system_type *fs_type, 106static struct dentry *sysfs_mount(struct file_system_type *fs_type,
99 int flags, const char *dev_name, void *data) 107 int flags, const char *dev_name, void *data)
100{ 108{
@@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
108 return ERR_PTR(-ENOMEM); 116 return ERR_PTR(-ENOMEM);
109 117
110 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) 118 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
111 info->ns[type] = kobj_ns_current(type); 119 info->ns[type] = kobj_ns_grab_current(type);
112 120
113 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); 121 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
114 if (IS_ERR(sb) || sb->s_fs_info != info) 122 if (IS_ERR(sb) || sb->s_fs_info != info)
115 kfree(info); 123 free_sysfs_super_info(info);
116 if (IS_ERR(sb)) 124 if (IS_ERR(sb))
117 return ERR_CAST(sb); 125 return ERR_CAST(sb);
118 if (!sb->s_root) { 126 if (!sb->s_root) {
@@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
131static void sysfs_kill_sb(struct super_block *sb) 139static void sysfs_kill_sb(struct super_block *sb)
132{ 140{
133 struct sysfs_super_info *info = sysfs_info(sb); 141 struct sysfs_super_info *info = sysfs_info(sb);
134
135 /* Remove the superblock from fs_supers/s_instances 142 /* Remove the superblock from fs_supers/s_instances
136 * so we can't find it, before freeing sysfs_super_info. 143 * so we can't find it, before freeing sysfs_super_info.
137 */ 144 */
138 kill_anon_super(sb); 145 kill_anon_super(sb);
139 kfree(info); 146 free_sysfs_super_info(info);
140} 147}
141 148
142static struct file_system_type sysfs_fs_type = { 149static struct file_system_type sysfs_fs_type = {
@@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = {
145 .kill_sb = sysfs_kill_sb, 152 .kill_sb = sysfs_kill_sb,
146}; 153};
147 154
148void sysfs_exit_ns(enum kobj_ns_type type, const void *ns)
149{
150 struct super_block *sb;
151
152 mutex_lock(&sysfs_mutex);
153 spin_lock(&sb_lock);
154 list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
155 struct sysfs_super_info *info = sysfs_info(sb);
156 /*
157 * If we see a superblock on the fs_supers/s_instances
158 * list the unmount has not completed and sb->s_fs_info
159 * points to a valid struct sysfs_super_info.
160 */
161 /* Ignore superblocks with the wrong ns */
162 if (info->ns[type] != ns)
163 continue;
164 info->ns[type] = NULL;
165 }
166 spin_unlock(&sb_lock);
167 mutex_unlock(&sysfs_mutex);
168}
169
170int __init sysfs_init(void) 155int __init sysfs_init(void)
171{ 156{
172 int err = -ENOMEM; 157 int err = -ENOMEM;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3d28af31d863..2ed2404f3113 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -136,7 +136,7 @@ struct sysfs_addrm_cxt {
136 * instance). 136 * instance).
137 */ 137 */
138struct sysfs_super_info { 138struct sysfs_super_info {
139 const void *ns[KOBJ_NS_TYPES]; 139 void *ns[KOBJ_NS_TYPES];
140}; 140};
141#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) 141#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
142extern struct sysfs_dirent sysfs_root; 142extern struct sysfs_dirent sysfs_root;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index f67acbdda5e8..dffeb3795af1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -61,7 +61,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
61 61
62/* 62/*
63 * Called when the clock was set to cancel the timers in the cancel 63 * Called when the clock was set to cancel the timers in the cancel
64 * list. 64 * list. This will wake up processes waiting on these timers. The
65 * wake-up requires ctx->ticks to be non zero, therefore we increment
66 * it before calling wake_up_locked().
65 */ 67 */
66void timerfd_clock_was_set(void) 68void timerfd_clock_was_set(void)
67{ 69{
@@ -76,6 +78,7 @@ void timerfd_clock_was_set(void)
76 spin_lock_irqsave(&ctx->wqh.lock, flags); 78 spin_lock_irqsave(&ctx->wqh.lock, flags);
77 if (ctx->moffs.tv64 != moffs.tv64) { 79 if (ctx->moffs.tv64 != moffs.tv64) {
78 ctx->moffs.tv64 = KTIME_MAX; 80 ctx->moffs.tv64 = KTIME_MAX;
81 ctx->ticks++;
79 wake_up_locked(&ctx->wqh); 82 wake_up_locked(&ctx->wqh);
80 } 83 }
81 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 84 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index b5aeb5a8ebed..529be0582029 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1848,7 +1848,6 @@ static void ubifs_put_super(struct super_block *sb)
1848 bdi_destroy(&c->bdi); 1848 bdi_destroy(&c->bdi);
1849 ubi_close_volume(c->ubi); 1849 ubi_close_volume(c->ubi);
1850 mutex_unlock(&c->umount_mutex); 1850 mutex_unlock(&c->umount_mutex);
1851 kfree(c);
1852} 1851}
1853 1852
1854static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) 1853static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1971,61 +1970,65 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
1971 return ERR_PTR(-EINVAL); 1970 return ERR_PTR(-EINVAL);
1972} 1971}
1973 1972
1974static int ubifs_fill_super(struct super_block *sb, void *data, int silent) 1973static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
1975{ 1974{
1976 struct ubi_volume_desc *ubi = sb->s_fs_info;
1977 struct ubifs_info *c; 1975 struct ubifs_info *c;
1978 struct inode *root;
1979 int err;
1980 1976
1981 c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); 1977 c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL);
1982 if (!c) 1978 if (c) {
1983 return -ENOMEM; 1979 spin_lock_init(&c->cnt_lock);
1980 spin_lock_init(&c->cs_lock);
1981 spin_lock_init(&c->buds_lock);
1982 spin_lock_init(&c->space_lock);
1983 spin_lock_init(&c->orphan_lock);
1984 init_rwsem(&c->commit_sem);
1985 mutex_init(&c->lp_mutex);
1986 mutex_init(&c->tnc_mutex);
1987 mutex_init(&c->log_mutex);
1988 mutex_init(&c->mst_mutex);
1989 mutex_init(&c->umount_mutex);
1990 mutex_init(&c->bu_mutex);
1991 mutex_init(&c->write_reserve_mutex);
1992 init_waitqueue_head(&c->cmt_wq);
1993 c->buds = RB_ROOT;
1994 c->old_idx = RB_ROOT;
1995 c->size_tree = RB_ROOT;
1996 c->orph_tree = RB_ROOT;
1997 INIT_LIST_HEAD(&c->infos_list);
1998 INIT_LIST_HEAD(&c->idx_gc);
1999 INIT_LIST_HEAD(&c->replay_list);
2000 INIT_LIST_HEAD(&c->replay_buds);
2001 INIT_LIST_HEAD(&c->uncat_list);
2002 INIT_LIST_HEAD(&c->empty_list);
2003 INIT_LIST_HEAD(&c->freeable_list);
2004 INIT_LIST_HEAD(&c->frdi_idx_list);
2005 INIT_LIST_HEAD(&c->unclean_leb_list);
2006 INIT_LIST_HEAD(&c->old_buds);
2007 INIT_LIST_HEAD(&c->orph_list);
2008 INIT_LIST_HEAD(&c->orph_new);
2009 c->no_chk_data_crc = 1;
2010
2011 c->highest_inum = UBIFS_FIRST_INO;
2012 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
2013
2014 ubi_get_volume_info(ubi, &c->vi);
2015 ubi_get_device_info(c->vi.ubi_num, &c->di);
2016 }
2017 return c;
2018}
1984 2019
1985 spin_lock_init(&c->cnt_lock); 2020static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1986 spin_lock_init(&c->cs_lock); 2021{
1987 spin_lock_init(&c->buds_lock); 2022 struct ubifs_info *c = sb->s_fs_info;
1988 spin_lock_init(&c->space_lock); 2023 struct inode *root;
1989 spin_lock_init(&c->orphan_lock); 2024 int err;
1990 init_rwsem(&c->commit_sem);
1991 mutex_init(&c->lp_mutex);
1992 mutex_init(&c->tnc_mutex);
1993 mutex_init(&c->log_mutex);
1994 mutex_init(&c->mst_mutex);
1995 mutex_init(&c->umount_mutex);
1996 mutex_init(&c->bu_mutex);
1997 mutex_init(&c->write_reserve_mutex);
1998 init_waitqueue_head(&c->cmt_wq);
1999 c->buds = RB_ROOT;
2000 c->old_idx = RB_ROOT;
2001 c->size_tree = RB_ROOT;
2002 c->orph_tree = RB_ROOT;
2003 INIT_LIST_HEAD(&c->infos_list);
2004 INIT_LIST_HEAD(&c->idx_gc);
2005 INIT_LIST_HEAD(&c->replay_list);
2006 INIT_LIST_HEAD(&c->replay_buds);
2007 INIT_LIST_HEAD(&c->uncat_list);
2008 INIT_LIST_HEAD(&c->empty_list);
2009 INIT_LIST_HEAD(&c->freeable_list);
2010 INIT_LIST_HEAD(&c->frdi_idx_list);
2011 INIT_LIST_HEAD(&c->unclean_leb_list);
2012 INIT_LIST_HEAD(&c->old_buds);
2013 INIT_LIST_HEAD(&c->orph_list);
2014 INIT_LIST_HEAD(&c->orph_new);
2015 c->no_chk_data_crc = 1;
2016 2025
2017 c->vfs_sb = sb; 2026 c->vfs_sb = sb;
2018 c->highest_inum = UBIFS_FIRST_INO;
2019 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
2020
2021 ubi_get_volume_info(ubi, &c->vi);
2022 ubi_get_device_info(c->vi.ubi_num, &c->di);
2023
2024 /* Re-open the UBI device in read-write mode */ 2027 /* Re-open the UBI device in read-write mode */
2025 c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); 2028 c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE);
2026 if (IS_ERR(c->ubi)) { 2029 if (IS_ERR(c->ubi)) {
2027 err = PTR_ERR(c->ubi); 2030 err = PTR_ERR(c->ubi);
2028 goto out_free; 2031 goto out;
2029 } 2032 }
2030 2033
2031 /* 2034 /*
@@ -2091,24 +2094,29 @@ out_bdi:
2091 bdi_destroy(&c->bdi); 2094 bdi_destroy(&c->bdi);
2092out_close: 2095out_close:
2093 ubi_close_volume(c->ubi); 2096 ubi_close_volume(c->ubi);
2094out_free: 2097out:
2095 kfree(c);
2096 return err; 2098 return err;
2097} 2099}
2098 2100
2099static int sb_test(struct super_block *sb, void *data) 2101static int sb_test(struct super_block *sb, void *data)
2100{ 2102{
2101 dev_t *dev = data; 2103 struct ubifs_info *c1 = data;
2102 struct ubifs_info *c = sb->s_fs_info; 2104 struct ubifs_info *c = sb->s_fs_info;
2103 2105
2104 return c->vi.cdev == *dev; 2106 return c->vi.cdev == c1->vi.cdev;
2107}
2108
2109static int sb_set(struct super_block *sb, void *data)
2110{
2111 sb->s_fs_info = data;
2112 return set_anon_super(sb, NULL);
2105} 2113}
2106 2114
2107static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, 2115static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2108 const char *name, void *data) 2116 const char *name, void *data)
2109{ 2117{
2110 struct ubi_volume_desc *ubi; 2118 struct ubi_volume_desc *ubi;
2111 struct ubi_volume_info vi; 2119 struct ubifs_info *c;
2112 struct super_block *sb; 2120 struct super_block *sb;
2113 int err; 2121 int err;
2114 2122
@@ -2125,19 +2133,25 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2125 name, (int)PTR_ERR(ubi)); 2133 name, (int)PTR_ERR(ubi));
2126 return ERR_CAST(ubi); 2134 return ERR_CAST(ubi);
2127 } 2135 }
2128 ubi_get_volume_info(ubi, &vi);
2129 2136
2130 dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); 2137 c = alloc_ubifs_info(ubi);
2138 if (!c) {
2139 err = -ENOMEM;
2140 goto out_close;
2141 }
2142
2143 dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2131 2144
2132 sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev); 2145 sb = sget(fs_type, sb_test, sb_set, c);
2133 if (IS_ERR(sb)) { 2146 if (IS_ERR(sb)) {
2134 err = PTR_ERR(sb); 2147 err = PTR_ERR(sb);
2148 kfree(c);
2135 goto out_close; 2149 goto out_close;
2136 } 2150 }
2137 2151
2138 if (sb->s_root) { 2152 if (sb->s_root) {
2139 struct ubifs_info *c1 = sb->s_fs_info; 2153 struct ubifs_info *c1 = sb->s_fs_info;
2140 2154 kfree(c);
2141 /* A new mount point for already mounted UBIFS */ 2155 /* A new mount point for already mounted UBIFS */
2142 dbg_gen("this ubi volume is already mounted"); 2156 dbg_gen("this ubi volume is already mounted");
2143 if (!!(flags & MS_RDONLY) != c1->ro_mount) { 2157 if (!!(flags & MS_RDONLY) != c1->ro_mount) {
@@ -2146,11 +2160,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2146 } 2160 }
2147 } else { 2161 } else {
2148 sb->s_flags = flags; 2162 sb->s_flags = flags;
2149 /*
2150 * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is
2151 * replaced by 'c'.
2152 */
2153 sb->s_fs_info = ubi;
2154 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 2163 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
2155 if (err) 2164 if (err)
2156 goto out_deact; 2165 goto out_deact;
@@ -2170,11 +2179,18 @@ out_close:
2170 return ERR_PTR(err); 2179 return ERR_PTR(err);
2171} 2180}
2172 2181
2182static void kill_ubifs_super(struct super_block *s)
2183{
2184 struct ubifs_info *c = s->s_fs_info;
2185 kill_anon_super(s);
2186 kfree(c);
2187}
2188
2173static struct file_system_type ubifs_fs_type = { 2189static struct file_system_type ubifs_fs_type = {
2174 .name = "ubifs", 2190 .name = "ubifs",
2175 .owner = THIS_MODULE, 2191 .owner = THIS_MODULE,
2176 .mount = ubifs_mount, 2192 .mount = ubifs_mount,
2177 .kill_sb = kill_anon_super, 2193 .kill_sb = kill_ubifs_super,
2178}; 2194};
2179 2195
2180/* 2196/*
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4213ba1ff85..7f782af286bf 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -131,19 +131,34 @@ xfs_file_fsync(
131{ 131{
132 struct inode *inode = file->f_mapping->host; 132 struct inode *inode = file->f_mapping->host;
133 struct xfs_inode *ip = XFS_I(inode); 133 struct xfs_inode *ip = XFS_I(inode);
134 struct xfs_mount *mp = ip->i_mount;
134 struct xfs_trans *tp; 135 struct xfs_trans *tp;
135 int error = 0; 136 int error = 0;
136 int log_flushed = 0; 137 int log_flushed = 0;
137 138
138 trace_xfs_file_fsync(ip); 139 trace_xfs_file_fsync(ip);
139 140
140 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 141 if (XFS_FORCED_SHUTDOWN(mp))
141 return -XFS_ERROR(EIO); 142 return -XFS_ERROR(EIO);
142 143
143 xfs_iflags_clear(ip, XFS_ITRUNCATED); 144 xfs_iflags_clear(ip, XFS_ITRUNCATED);
144 145
145 xfs_ioend_wait(ip); 146 xfs_ioend_wait(ip);
146 147
148 if (mp->m_flags & XFS_MOUNT_BARRIER) {
149 /*
150 * If we have an RT and/or log subvolume we need to make sure
151 * to flush the write cache the device used for file data
152 * first. This is to ensure newly written file data make
153 * it to disk before logging the new inode size in case of
154 * an extending write.
155 */
156 if (XFS_IS_REALTIME_INODE(ip))
157 xfs_blkdev_issue_flush(mp->m_rtdev_targp);
158 else if (mp->m_logdev_targp != mp->m_ddev_targp)
159 xfs_blkdev_issue_flush(mp->m_ddev_targp);
160 }
161
147 /* 162 /*
148 * We always need to make sure that the required inode state is safe on 163 * We always need to make sure that the required inode state is safe on
149 * disk. The inode might be clean but we still might need to force the 164 * disk. The inode might be clean but we still might need to force the
@@ -175,9 +190,9 @@ xfs_file_fsync(
175 * updates. The sync transaction will also force the log. 190 * updates. The sync transaction will also force the log.
176 */ 191 */
177 xfs_iunlock(ip, XFS_ILOCK_SHARED); 192 xfs_iunlock(ip, XFS_ILOCK_SHARED);
178 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 193 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
179 error = xfs_trans_reserve(tp, 0, 194 error = xfs_trans_reserve(tp, 0,
180 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); 195 XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
181 if (error) { 196 if (error) {
182 xfs_trans_cancel(tp, 0); 197 xfs_trans_cancel(tp, 0);
183 return -error; 198 return -error;
@@ -209,28 +224,25 @@ xfs_file_fsync(
209 * force the log. 224 * force the log.
210 */ 225 */
211 if (xfs_ipincount(ip)) { 226 if (xfs_ipincount(ip)) {
212 error = _xfs_log_force_lsn(ip->i_mount, 227 error = _xfs_log_force_lsn(mp,
213 ip->i_itemp->ili_last_lsn, 228 ip->i_itemp->ili_last_lsn,
214 XFS_LOG_SYNC, &log_flushed); 229 XFS_LOG_SYNC, &log_flushed);
215 } 230 }
216 xfs_iunlock(ip, XFS_ILOCK_SHARED); 231 xfs_iunlock(ip, XFS_ILOCK_SHARED);
217 } 232 }
218 233
219 if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { 234 /*
220 /* 235 * If we only have a single device, and the log force about was
221 * If the log write didn't issue an ordered tag we need 236 * a no-op we might have to flush the data device cache here.
222 * to flush the disk cache for the data device now. 237 * This can only happen for fdatasync/O_DSYNC if we were overwriting
223 */ 238 * an already allocated file and thus do not have any metadata to
224 if (!log_flushed) 239 * commit.
225 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 240 */
226 241 if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
227 /* 242 mp->m_logdev_targp == mp->m_ddev_targp &&
228 * If this inode is on the RT dev we need to flush that 243 !XFS_IS_REALTIME_INODE(ip) &&
229 * cache as well. 244 !log_flushed)
230 */ 245 xfs_blkdev_issue_flush(mp->m_ddev_targp);
231 if (XFS_IS_REALTIME_INODE(ip))
232 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
233 }
234 246
235 return -error; 247 return -error;
236} 248}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index dd21784525a8..d44d92cd12b1 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -182,7 +182,7 @@ xfs_vn_mknod(
182 if (IS_POSIXACL(dir)) { 182 if (IS_POSIXACL(dir)) {
183 default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); 183 default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
184 if (IS_ERR(default_acl)) 184 if (IS_ERR(default_acl))
185 return -PTR_ERR(default_acl); 185 return PTR_ERR(default_acl);
186 186
187 if (!default_acl) 187 if (!default_acl)
188 mode &= ~current_umask(); 188 mode &= ~current_umask();
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1e3a7ce804dc..a1a881e68a9a 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -627,68 +627,6 @@ xfs_blkdev_put(
627 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 627 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
628} 628}
629 629
630/*
631 * Try to write out the superblock using barriers.
632 */
633STATIC int
634xfs_barrier_test(
635 xfs_mount_t *mp)
636{
637 xfs_buf_t *sbp = xfs_getsb(mp, 0);
638 int error;
639
640 XFS_BUF_UNDONE(sbp);
641 XFS_BUF_UNREAD(sbp);
642 XFS_BUF_UNDELAYWRITE(sbp);
643 XFS_BUF_WRITE(sbp);
644 XFS_BUF_UNASYNC(sbp);
645 XFS_BUF_ORDERED(sbp);
646
647 xfsbdstrat(mp, sbp);
648 error = xfs_buf_iowait(sbp);
649
650 /*
651 * Clear all the flags we set and possible error state in the
652 * buffer. We only did the write to try out whether barriers
653 * worked and shouldn't leave any traces in the superblock
654 * buffer.
655 */
656 XFS_BUF_DONE(sbp);
657 XFS_BUF_ERROR(sbp, 0);
658 XFS_BUF_UNORDERED(sbp);
659
660 xfs_buf_relse(sbp);
661 return error;
662}
663
664STATIC void
665xfs_mountfs_check_barriers(xfs_mount_t *mp)
666{
667 int error;
668
669 if (mp->m_logdev_targp != mp->m_ddev_targp) {
670 xfs_notice(mp,
671 "Disabling barriers, not supported with external log device");
672 mp->m_flags &= ~XFS_MOUNT_BARRIER;
673 return;
674 }
675
676 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
677 xfs_notice(mp,
678 "Disabling barriers, underlying device is readonly");
679 mp->m_flags &= ~XFS_MOUNT_BARRIER;
680 return;
681 }
682
683 error = xfs_barrier_test(mp);
684 if (error) {
685 xfs_notice(mp,
686 "Disabling barriers, trial barrier write failed");
687 mp->m_flags &= ~XFS_MOUNT_BARRIER;
688 return;
689 }
690}
691
692void 630void
693xfs_blkdev_issue_flush( 631xfs_blkdev_issue_flush(
694 xfs_buftarg_t *buftarg) 632 xfs_buftarg_t *buftarg)
@@ -1240,14 +1178,6 @@ xfs_fs_remount(
1240 switch (token) { 1178 switch (token) {
1241 case Opt_barrier: 1179 case Opt_barrier:
1242 mp->m_flags |= XFS_MOUNT_BARRIER; 1180 mp->m_flags |= XFS_MOUNT_BARRIER;
1243
1244 /*
1245 * Test if barriers are actually working if we can,
1246 * else delay this check until the filesystem is
1247 * marked writeable.
1248 */
1249 if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1250 xfs_mountfs_check_barriers(mp);
1251 break; 1181 break;
1252 case Opt_nobarrier: 1182 case Opt_nobarrier:
1253 mp->m_flags &= ~XFS_MOUNT_BARRIER; 1183 mp->m_flags &= ~XFS_MOUNT_BARRIER;
@@ -1282,8 +1212,6 @@ xfs_fs_remount(
1282 /* ro -> rw */ 1212 /* ro -> rw */
1283 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { 1213 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1284 mp->m_flags &= ~XFS_MOUNT_RDONLY; 1214 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1285 if (mp->m_flags & XFS_MOUNT_BARRIER)
1286 xfs_mountfs_check_barriers(mp);
1287 1215
1288 /* 1216 /*
1289 * If this is the first remount to writeable state we 1217 * If this is the first remount to writeable state we
@@ -1465,9 +1393,6 @@ xfs_fs_fill_super(
1465 if (error) 1393 if (error)
1466 goto out_free_sb; 1394 goto out_free_sb;
1467 1395
1468 if (mp->m_flags & XFS_MOUNT_BARRIER)
1469 xfs_mountfs_check_barriers(mp);
1470
1471 error = xfs_filestream_mount(mp); 1396 error = xfs_filestream_mount(mp);
1472 if (error) 1397 if (error)
1473 goto out_free_sb; 1398 goto out_free_sb;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 211930246f20..41d5b8f2bf92 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log,
1372 XFS_BUF_ASYNC(bp); 1372 XFS_BUF_ASYNC(bp);
1373 bp->b_flags |= XBF_LOG_BUFFER; 1373 bp->b_flags |= XBF_LOG_BUFFER;
1374 1374
1375 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1375 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
1376 /*
1377 * If we have an external log device, flush the data device
1378 * before flushing the log to make sure all meta data
1379 * written back from the AIL actually made it to disk
1380 * before writing out the new log tail LSN in the log buffer.
1381 */
1382 if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
1383 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
1376 XFS_BUF_ORDERED(bp); 1384 XFS_BUF_ORDERED(bp);
1385 }
1377 1386
1378 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1387 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1379 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1388 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);