diff options
author | Jens Axboe <jaxboe@fusionio.com> | 2011-07-01 10:17:13 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2011-07-01 10:17:13 -0400 |
commit | 04bf7869ca0fd12009aee301cac2264a36df4d98 (patch) | |
tree | 66cb81ebf8b76560a31433c2c493dc430c914af9 /fs | |
parent | d2f31a5fd60d168b00fc4f7617b68a1287b21e90 (diff) | |
parent | 7b28afe01ab6ffb5f152f47831b44933facd2328 (diff) |
Merge branch 'for-linus' into for-3.1/core
Conflicts:
block/blk-throttle.c
block/cfq-iosched.c
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'fs')
118 files changed, 2016 insertions, 1662 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 20c106f24927..1b0b19550015 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -584,11 +584,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
584 | 584 | ||
585 | success: | 585 | success: |
586 | d_add(dentry, inode); | 586 | d_add(dentry, inode); |
587 | _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", | 587 | _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }", |
588 | fid.vnode, | 588 | fid.vnode, |
589 | fid.unique, | 589 | fid.unique, |
590 | dentry->d_inode->i_ino, | 590 | dentry->d_inode->i_ino, |
591 | (unsigned long long)dentry->d_inode->i_version); | 591 | dentry->d_inode->i_generation); |
592 | 592 | ||
593 | return NULL; | 593 | return NULL; |
594 | } | 594 | } |
@@ -671,10 +671,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
671 | * been deleted and replaced, and the original vnode ID has | 671 | * been deleted and replaced, and the original vnode ID has |
672 | * been reused */ | 672 | * been reused */ |
673 | if (fid.unique != vnode->fid.unique) { | 673 | if (fid.unique != vnode->fid.unique) { |
674 | _debug("%s: file deleted (uq %u -> %u I:%llu)", | 674 | _debug("%s: file deleted (uq %u -> %u I:%u)", |
675 | dentry->d_name.name, fid.unique, | 675 | dentry->d_name.name, fid.unique, |
676 | vnode->fid.unique, | 676 | vnode->fid.unique, |
677 | (unsigned long long)dentry->d_inode->i_version); | 677 | dentry->d_inode->i_generation); |
678 | spin_lock(&vnode->lock); | 678 | spin_lock(&vnode->lock); |
679 | set_bit(AFS_VNODE_DELETED, &vnode->flags); | 679 | set_bit(AFS_VNODE_DELETED, &vnode->flags); |
680 | spin_unlock(&vnode->lock); | 680 | spin_unlock(&vnode->lock); |
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 4bd0218473a9..346e3289abd7 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c | |||
@@ -89,7 +89,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, | |||
89 | i_size_write(&vnode->vfs_inode, size); | 89 | i_size_write(&vnode->vfs_inode, size); |
90 | vnode->vfs_inode.i_uid = status->owner; | 90 | vnode->vfs_inode.i_uid = status->owner; |
91 | vnode->vfs_inode.i_gid = status->group; | 91 | vnode->vfs_inode.i_gid = status->group; |
92 | vnode->vfs_inode.i_version = vnode->fid.unique; | 92 | vnode->vfs_inode.i_generation = vnode->fid.unique; |
93 | vnode->vfs_inode.i_nlink = status->nlink; | 93 | vnode->vfs_inode.i_nlink = status->nlink; |
94 | 94 | ||
95 | mode = vnode->vfs_inode.i_mode; | 95 | mode = vnode->vfs_inode.i_mode; |
@@ -102,6 +102,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, | |||
102 | vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; | 102 | vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; |
103 | vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; | 103 | vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; |
104 | vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; | 104 | vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; |
105 | vnode->vfs_inode.i_version = data_version; | ||
105 | } | 106 | } |
106 | 107 | ||
107 | expected_version = status->data_version; | 108 | expected_version = status->data_version; |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index db66c5201474..0fdab6e03d87 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -75,7 +75,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) | |||
75 | inode->i_ctime.tv_nsec = 0; | 75 | inode->i_ctime.tv_nsec = 0; |
76 | inode->i_atime = inode->i_mtime = inode->i_ctime; | 76 | inode->i_atime = inode->i_mtime = inode->i_ctime; |
77 | inode->i_blocks = 0; | 77 | inode->i_blocks = 0; |
78 | inode->i_version = vnode->fid.unique; | 78 | inode->i_generation = vnode->fid.unique; |
79 | inode->i_version = vnode->status.data_version; | ||
79 | inode->i_mapping->a_ops = &afs_fs_aops; | 80 | inode->i_mapping->a_ops = &afs_fs_aops; |
80 | 81 | ||
81 | /* check to see whether a symbolic link is really a mountpoint */ | 82 | /* check to see whether a symbolic link is really a mountpoint */ |
@@ -100,7 +101,7 @@ static int afs_iget5_test(struct inode *inode, void *opaque) | |||
100 | struct afs_iget_data *data = opaque; | 101 | struct afs_iget_data *data = opaque; |
101 | 102 | ||
102 | return inode->i_ino == data->fid.vnode && | 103 | return inode->i_ino == data->fid.vnode && |
103 | inode->i_version == data->fid.unique; | 104 | inode->i_generation == data->fid.unique; |
104 | } | 105 | } |
105 | 106 | ||
106 | /* | 107 | /* |
@@ -122,7 +123,7 @@ static int afs_iget5_set(struct inode *inode, void *opaque) | |||
122 | struct afs_vnode *vnode = AFS_FS_I(inode); | 123 | struct afs_vnode *vnode = AFS_FS_I(inode); |
123 | 124 | ||
124 | inode->i_ino = data->fid.vnode; | 125 | inode->i_ino = data->fid.vnode; |
125 | inode->i_version = data->fid.unique; | 126 | inode->i_generation = data->fid.unique; |
126 | vnode->fid = data->fid; | 127 | vnode->fid = data->fid; |
127 | vnode->volume = data->volume; | 128 | vnode->volume = data->volume; |
128 | 129 | ||
@@ -380,8 +381,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
380 | 381 | ||
381 | inode = dentry->d_inode; | 382 | inode = dentry->d_inode; |
382 | 383 | ||
383 | _enter("{ ino=%lu v=%llu }", inode->i_ino, | 384 | _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); |
384 | (unsigned long long)inode->i_version); | ||
385 | 385 | ||
386 | generic_fillattr(inode, stat); | 386 | generic_fillattr(inode, stat); |
387 | return 0; | 387 | return 0; |
diff --git a/fs/afs/super.c b/fs/afs/super.c index fb240e8766d6..356dcf0929e8 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -31,8 +31,8 @@ | |||
31 | static void afs_i_init_once(void *foo); | 31 | static void afs_i_init_once(void *foo); |
32 | static struct dentry *afs_mount(struct file_system_type *fs_type, | 32 | static struct dentry *afs_mount(struct file_system_type *fs_type, |
33 | int flags, const char *dev_name, void *data); | 33 | int flags, const char *dev_name, void *data); |
34 | static void afs_kill_super(struct super_block *sb); | ||
34 | static struct inode *afs_alloc_inode(struct super_block *sb); | 35 | static struct inode *afs_alloc_inode(struct super_block *sb); |
35 | static void afs_put_super(struct super_block *sb); | ||
36 | static void afs_destroy_inode(struct inode *inode); | 36 | static void afs_destroy_inode(struct inode *inode); |
37 | static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); | 37 | static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); |
38 | 38 | ||
@@ -40,7 +40,7 @@ struct file_system_type afs_fs_type = { | |||
40 | .owner = THIS_MODULE, | 40 | .owner = THIS_MODULE, |
41 | .name = "afs", | 41 | .name = "afs", |
42 | .mount = afs_mount, | 42 | .mount = afs_mount, |
43 | .kill_sb = kill_anon_super, | 43 | .kill_sb = afs_kill_super, |
44 | .fs_flags = 0, | 44 | .fs_flags = 0, |
45 | }; | 45 | }; |
46 | 46 | ||
@@ -50,7 +50,6 @@ static const struct super_operations afs_super_ops = { | |||
50 | .drop_inode = afs_drop_inode, | 50 | .drop_inode = afs_drop_inode, |
51 | .destroy_inode = afs_destroy_inode, | 51 | .destroy_inode = afs_destroy_inode, |
52 | .evict_inode = afs_evict_inode, | 52 | .evict_inode = afs_evict_inode, |
53 | .put_super = afs_put_super, | ||
54 | .show_options = generic_show_options, | 53 | .show_options = generic_show_options, |
55 | }; | 54 | }; |
56 | 55 | ||
@@ -282,19 +281,25 @@ static int afs_parse_device_name(struct afs_mount_params *params, | |||
282 | */ | 281 | */ |
283 | static int afs_test_super(struct super_block *sb, void *data) | 282 | static int afs_test_super(struct super_block *sb, void *data) |
284 | { | 283 | { |
285 | struct afs_mount_params *params = data; | 284 | struct afs_super_info *as1 = data; |
286 | struct afs_super_info *as = sb->s_fs_info; | 285 | struct afs_super_info *as = sb->s_fs_info; |
287 | 286 | ||
288 | return as->volume == params->volume; | 287 | return as->volume == as1->volume; |
288 | } | ||
289 | |||
290 | static int afs_set_super(struct super_block *sb, void *data) | ||
291 | { | ||
292 | sb->s_fs_info = data; | ||
293 | return set_anon_super(sb, NULL); | ||
289 | } | 294 | } |
290 | 295 | ||
291 | /* | 296 | /* |
292 | * fill in the superblock | 297 | * fill in the superblock |
293 | */ | 298 | */ |
294 | static int afs_fill_super(struct super_block *sb, void *data) | 299 | static int afs_fill_super(struct super_block *sb, |
300 | struct afs_mount_params *params) | ||
295 | { | 301 | { |
296 | struct afs_mount_params *params = data; | 302 | struct afs_super_info *as = sb->s_fs_info; |
297 | struct afs_super_info *as = NULL; | ||
298 | struct afs_fid fid; | 303 | struct afs_fid fid; |
299 | struct dentry *root = NULL; | 304 | struct dentry *root = NULL; |
300 | struct inode *inode = NULL; | 305 | struct inode *inode = NULL; |
@@ -302,23 +307,13 @@ static int afs_fill_super(struct super_block *sb, void *data) | |||
302 | 307 | ||
303 | _enter(""); | 308 | _enter(""); |
304 | 309 | ||
305 | /* allocate a superblock info record */ | ||
306 | as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); | ||
307 | if (!as) { | ||
308 | _leave(" = -ENOMEM"); | ||
309 | return -ENOMEM; | ||
310 | } | ||
311 | |||
312 | afs_get_volume(params->volume); | ||
313 | as->volume = params->volume; | ||
314 | |||
315 | /* fill in the superblock */ | 310 | /* fill in the superblock */ |
316 | sb->s_blocksize = PAGE_CACHE_SIZE; | 311 | sb->s_blocksize = PAGE_CACHE_SIZE; |
317 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 312 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
318 | sb->s_magic = AFS_FS_MAGIC; | 313 | sb->s_magic = AFS_FS_MAGIC; |
319 | sb->s_op = &afs_super_ops; | 314 | sb->s_op = &afs_super_ops; |
320 | sb->s_fs_info = as; | ||
321 | sb->s_bdi = &as->volume->bdi; | 315 | sb->s_bdi = &as->volume->bdi; |
316 | strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id)); | ||
322 | 317 | ||
323 | /* allocate the root inode and dentry */ | 318 | /* allocate the root inode and dentry */ |
324 | fid.vid = as->volume->vid; | 319 | fid.vid = as->volume->vid; |
@@ -326,7 +321,7 @@ static int afs_fill_super(struct super_block *sb, void *data) | |||
326 | fid.unique = 1; | 321 | fid.unique = 1; |
327 | inode = afs_iget(sb, params->key, &fid, NULL, NULL); | 322 | inode = afs_iget(sb, params->key, &fid, NULL, NULL); |
328 | if (IS_ERR(inode)) | 323 | if (IS_ERR(inode)) |
329 | goto error_inode; | 324 | return PTR_ERR(inode); |
330 | 325 | ||
331 | if (params->autocell) | 326 | if (params->autocell) |
332 | set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); | 327 | set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); |
@@ -342,16 +337,8 @@ static int afs_fill_super(struct super_block *sb, void *data) | |||
342 | _leave(" = 0"); | 337 | _leave(" = 0"); |
343 | return 0; | 338 | return 0; |
344 | 339 | ||
345 | error_inode: | ||
346 | ret = PTR_ERR(inode); | ||
347 | inode = NULL; | ||
348 | error: | 340 | error: |
349 | iput(inode); | 341 | iput(inode); |
350 | afs_put_volume(as->volume); | ||
351 | kfree(as); | ||
352 | |||
353 | sb->s_fs_info = NULL; | ||
354 | |||
355 | _leave(" = %d", ret); | 342 | _leave(" = %d", ret); |
356 | return ret; | 343 | return ret; |
357 | } | 344 | } |
@@ -367,6 +354,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, | |||
367 | struct afs_volume *vol; | 354 | struct afs_volume *vol; |
368 | struct key *key; | 355 | struct key *key; |
369 | char *new_opts = kstrdup(options, GFP_KERNEL); | 356 | char *new_opts = kstrdup(options, GFP_KERNEL); |
357 | struct afs_super_info *as; | ||
370 | int ret; | 358 | int ret; |
371 | 359 | ||
372 | _enter(",,%s,%p", dev_name, options); | 360 | _enter(",,%s,%p", dev_name, options); |
@@ -399,12 +387,22 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, | |||
399 | ret = PTR_ERR(vol); | 387 | ret = PTR_ERR(vol); |
400 | goto error; | 388 | goto error; |
401 | } | 389 | } |
402 | params.volume = vol; | 390 | |
391 | /* allocate a superblock info record */ | ||
392 | as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); | ||
393 | if (!as) { | ||
394 | ret = -ENOMEM; | ||
395 | afs_put_volume(vol); | ||
396 | goto error; | ||
397 | } | ||
398 | as->volume = vol; | ||
403 | 399 | ||
404 | /* allocate a deviceless superblock */ | 400 | /* allocate a deviceless superblock */ |
405 | sb = sget(fs_type, afs_test_super, set_anon_super, ¶ms); | 401 | sb = sget(fs_type, afs_test_super, afs_set_super, as); |
406 | if (IS_ERR(sb)) { | 402 | if (IS_ERR(sb)) { |
407 | ret = PTR_ERR(sb); | 403 | ret = PTR_ERR(sb); |
404 | afs_put_volume(vol); | ||
405 | kfree(as); | ||
408 | goto error; | 406 | goto error; |
409 | } | 407 | } |
410 | 408 | ||
@@ -422,16 +420,16 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, | |||
422 | } else { | 420 | } else { |
423 | _debug("reuse"); | 421 | _debug("reuse"); |
424 | ASSERTCMP(sb->s_flags, &, MS_ACTIVE); | 422 | ASSERTCMP(sb->s_flags, &, MS_ACTIVE); |
423 | afs_put_volume(vol); | ||
424 | kfree(as); | ||
425 | } | 425 | } |
426 | 426 | ||
427 | afs_put_volume(params.volume); | ||
428 | afs_put_cell(params.cell); | 427 | afs_put_cell(params.cell); |
429 | kfree(new_opts); | 428 | kfree(new_opts); |
430 | _leave(" = 0 [%p]", sb); | 429 | _leave(" = 0 [%p]", sb); |
431 | return dget(sb->s_root); | 430 | return dget(sb->s_root); |
432 | 431 | ||
433 | error: | 432 | error: |
434 | afs_put_volume(params.volume); | ||
435 | afs_put_cell(params.cell); | 433 | afs_put_cell(params.cell); |
436 | key_put(params.key); | 434 | key_put(params.key); |
437 | kfree(new_opts); | 435 | kfree(new_opts); |
@@ -439,18 +437,12 @@ error: | |||
439 | return ERR_PTR(ret); | 437 | return ERR_PTR(ret); |
440 | } | 438 | } |
441 | 439 | ||
442 | /* | 440 | static void afs_kill_super(struct super_block *sb) |
443 | * finish the unmounting process on the superblock | ||
444 | */ | ||
445 | static void afs_put_super(struct super_block *sb) | ||
446 | { | 441 | { |
447 | struct afs_super_info *as = sb->s_fs_info; | 442 | struct afs_super_info *as = sb->s_fs_info; |
448 | 443 | kill_anon_super(sb); | |
449 | _enter(""); | ||
450 | |||
451 | afs_put_volume(as->volume); | 444 | afs_put_volume(as->volume); |
452 | 445 | kfree(as); | |
453 | _leave(""); | ||
454 | } | 446 | } |
455 | 447 | ||
456 | /* | 448 | /* |
diff --git a/fs/afs/write.c b/fs/afs/write.c index 789b3afb3423..b806285ff853 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -84,23 +84,21 @@ void afs_put_writeback(struct afs_writeback *wb) | |||
84 | * partly or wholly fill a page that's under preparation for writing | 84 | * partly or wholly fill a page that's under preparation for writing |
85 | */ | 85 | */ |
86 | static int afs_fill_page(struct afs_vnode *vnode, struct key *key, | 86 | static int afs_fill_page(struct afs_vnode *vnode, struct key *key, |
87 | loff_t pos, unsigned len, struct page *page) | 87 | loff_t pos, struct page *page) |
88 | { | 88 | { |
89 | loff_t i_size; | 89 | loff_t i_size; |
90 | unsigned eof; | ||
91 | int ret; | 90 | int ret; |
91 | int len; | ||
92 | 92 | ||
93 | _enter(",,%llu,%u", (unsigned long long)pos, len); | 93 | _enter(",,%llu", (unsigned long long)pos); |
94 | |||
95 | ASSERTCMP(len, <=, PAGE_CACHE_SIZE); | ||
96 | 94 | ||
97 | i_size = i_size_read(&vnode->vfs_inode); | 95 | i_size = i_size_read(&vnode->vfs_inode); |
98 | if (pos + len > i_size) | 96 | if (pos + PAGE_CACHE_SIZE > i_size) |
99 | eof = i_size; | 97 | len = i_size - pos; |
100 | else | 98 | else |
101 | eof = PAGE_CACHE_SIZE; | 99 | len = PAGE_CACHE_SIZE; |
102 | 100 | ||
103 | ret = afs_vnode_fetch_data(vnode, key, 0, eof, page); | 101 | ret = afs_vnode_fetch_data(vnode, key, pos, len, page); |
104 | if (ret < 0) { | 102 | if (ret < 0) { |
105 | if (ret == -ENOENT) { | 103 | if (ret == -ENOENT) { |
106 | _debug("got NOENT from server" | 104 | _debug("got NOENT from server" |
@@ -153,9 +151,8 @@ int afs_write_begin(struct file *file, struct address_space *mapping, | |||
153 | *pagep = page; | 151 | *pagep = page; |
154 | /* page won't leak in error case: it eventually gets cleaned off LRU */ | 152 | /* page won't leak in error case: it eventually gets cleaned off LRU */ |
155 | 153 | ||
156 | if (!PageUptodate(page)) { | 154 | if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) { |
157 | _debug("not up to date"); | 155 | ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page); |
158 | ret = afs_fill_page(vnode, key, pos, len, page); | ||
159 | if (ret < 0) { | 156 | if (ret < 0) { |
160 | kfree(candidate); | 157 | kfree(candidate); |
161 | _leave(" = %d [prep]", ret); | 158 | _leave(" = %d [prep]", ret); |
diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 9ad2369d9e35..bfcb18feb1df 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c | |||
@@ -231,9 +231,6 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, | |||
231 | 231 | ||
232 | static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags) | 232 | static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags) |
233 | { | 233 | { |
234 | if (flags & IPERM_FLAG_RCU) | ||
235 | return -ECHILD; | ||
236 | |||
237 | return -EIO; | 234 | return -EIO; |
238 | } | 235 | } |
239 | 236 | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index 1f2b19978333..610e8e0b04b8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -762,7 +762,19 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, | |||
762 | if (!disk) | 762 | if (!disk) |
763 | return ERR_PTR(-ENXIO); | 763 | return ERR_PTR(-ENXIO); |
764 | 764 | ||
765 | whole = bdget_disk(disk, 0); | 765 | /* |
766 | * Normally, @bdev should equal what's returned from bdget_disk() | ||
767 | * if partno is 0; however, some drivers (floppy) use multiple | ||
768 | * bdev's for the same physical device and @bdev may be one of the | ||
769 | * aliases. Keep @bdev if partno is 0. This means claimer | ||
770 | * tracking is broken for those devices but it has always been that | ||
771 | * way. | ||
772 | */ | ||
773 | if (partno) | ||
774 | whole = bdget_disk(disk, 0); | ||
775 | else | ||
776 | whole = bdgrab(bdev); | ||
777 | |||
766 | module_put(disk->fops->owner); | 778 | module_put(disk->fops->owner); |
767 | put_disk(disk); | 779 | put_disk(disk); |
768 | if (!whole) | 780 | if (!whole) |
@@ -1272,8 +1284,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | |||
1272 | * individual writeable reference is too fragile given the | 1284 | * individual writeable reference is too fragile given the |
1273 | * way @mode is used in blkdev_get/put(). | 1285 | * way @mode is used in blkdev_get/put(). |
1274 | */ | 1286 | */ |
1275 | if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && | 1287 | if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder && |
1276 | !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { | 1288 | (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { |
1277 | bdev->bd_write_holder = true; | 1289 | bdev->bd_write_holder = true; |
1278 | disk_block_events(disk); | 1290 | disk_block_events(disk); |
1279 | } | 1291 | } |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 93b1aa932014..52d7eca8c7bf 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -121,9 +121,6 @@ struct btrfs_inode { | |||
121 | */ | 121 | */ |
122 | u64 index_cnt; | 122 | u64 index_cnt; |
123 | 123 | ||
124 | /* the start of block group preferred for allocations. */ | ||
125 | u64 block_group; | ||
126 | |||
127 | /* the fsync log has some corner cases that mean we have to check | 124 | /* the fsync log has some corner cases that mean we have to check |
128 | * directories to see if any unlinks have been done before | 125 | * directories to see if any unlinks have been done before |
129 | * the directory was logged. See tree-log.c for all the | 126 | * the directory was logged. See tree-log.c for all the |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b0e18d986e0a..2e667868e0d2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -43,8 +43,6 @@ struct btrfs_path *btrfs_alloc_path(void) | |||
43 | { | 43 | { |
44 | struct btrfs_path *path; | 44 | struct btrfs_path *path; |
45 | path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); | 45 | path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); |
46 | if (path) | ||
47 | path->reada = 1; | ||
48 | return path; | 46 | return path; |
49 | } | 47 | } |
50 | 48 | ||
@@ -1224,11 +1222,13 @@ static void reada_for_search(struct btrfs_root *root, | |||
1224 | u64 search; | 1222 | u64 search; |
1225 | u64 target; | 1223 | u64 target; |
1226 | u64 nread = 0; | 1224 | u64 nread = 0; |
1225 | u64 gen; | ||
1227 | int direction = path->reada; | 1226 | int direction = path->reada; |
1228 | struct extent_buffer *eb; | 1227 | struct extent_buffer *eb; |
1229 | u32 nr; | 1228 | u32 nr; |
1230 | u32 blocksize; | 1229 | u32 blocksize; |
1231 | u32 nscan = 0; | 1230 | u32 nscan = 0; |
1231 | bool map = true; | ||
1232 | 1232 | ||
1233 | if (level != 1) | 1233 | if (level != 1) |
1234 | return; | 1234 | return; |
@@ -1250,7 +1250,19 @@ static void reada_for_search(struct btrfs_root *root, | |||
1250 | 1250 | ||
1251 | nritems = btrfs_header_nritems(node); | 1251 | nritems = btrfs_header_nritems(node); |
1252 | nr = slot; | 1252 | nr = slot; |
1253 | if (node->map_token || path->skip_locking) | ||
1254 | map = false; | ||
1255 | |||
1253 | while (1) { | 1256 | while (1) { |
1257 | if (map && !node->map_token) { | ||
1258 | unsigned long offset = btrfs_node_key_ptr_offset(nr); | ||
1259 | map_private_extent_buffer(node, offset, | ||
1260 | sizeof(struct btrfs_key_ptr), | ||
1261 | &node->map_token, | ||
1262 | &node->kaddr, | ||
1263 | &node->map_start, | ||
1264 | &node->map_len, KM_USER1); | ||
1265 | } | ||
1254 | if (direction < 0) { | 1266 | if (direction < 0) { |
1255 | if (nr == 0) | 1267 | if (nr == 0) |
1256 | break; | 1268 | break; |
@@ -1268,14 +1280,23 @@ static void reada_for_search(struct btrfs_root *root, | |||
1268 | search = btrfs_node_blockptr(node, nr); | 1280 | search = btrfs_node_blockptr(node, nr); |
1269 | if ((search <= target && target - search <= 65536) || | 1281 | if ((search <= target && target - search <= 65536) || |
1270 | (search > target && search - target <= 65536)) { | 1282 | (search > target && search - target <= 65536)) { |
1271 | readahead_tree_block(root, search, blocksize, | 1283 | gen = btrfs_node_ptr_generation(node, nr); |
1272 | btrfs_node_ptr_generation(node, nr)); | 1284 | if (map && node->map_token) { |
1285 | unmap_extent_buffer(node, node->map_token, | ||
1286 | KM_USER1); | ||
1287 | node->map_token = NULL; | ||
1288 | } | ||
1289 | readahead_tree_block(root, search, blocksize, gen); | ||
1273 | nread += blocksize; | 1290 | nread += blocksize; |
1274 | } | 1291 | } |
1275 | nscan++; | 1292 | nscan++; |
1276 | if ((nread > 65536 || nscan > 32)) | 1293 | if ((nread > 65536 || nscan > 32)) |
1277 | break; | 1294 | break; |
1278 | } | 1295 | } |
1296 | if (map && node->map_token) { | ||
1297 | unmap_extent_buffer(node, node->map_token, KM_USER1); | ||
1298 | node->map_token = NULL; | ||
1299 | } | ||
1279 | } | 1300 | } |
1280 | 1301 | ||
1281 | /* | 1302 | /* |
@@ -1648,9 +1669,6 @@ again: | |||
1648 | } | 1669 | } |
1649 | cow_done: | 1670 | cow_done: |
1650 | BUG_ON(!cow && ins_len); | 1671 | BUG_ON(!cow && ins_len); |
1651 | if (level != btrfs_header_level(b)) | ||
1652 | WARN_ON(1); | ||
1653 | level = btrfs_header_level(b); | ||
1654 | 1672 | ||
1655 | p->nodes[level] = b; | 1673 | p->nodes[level] = b; |
1656 | if (!p->skip_locking) | 1674 | if (!p->skip_locking) |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6c093fa98f61..f30ac05dbda7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -19,7 +19,6 @@ | |||
19 | #ifndef __BTRFS_CTREE__ | 19 | #ifndef __BTRFS_CTREE__ |
20 | #define __BTRFS_CTREE__ | 20 | #define __BTRFS_CTREE__ |
21 | 21 | ||
22 | #include <linux/version.h> | ||
23 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
24 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
25 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
@@ -930,7 +929,6 @@ struct btrfs_fs_info { | |||
930 | * is required instead of the faster short fsync log commits | 929 | * is required instead of the faster short fsync log commits |
931 | */ | 930 | */ |
932 | u64 last_trans_log_full_commit; | 931 | u64 last_trans_log_full_commit; |
933 | u64 open_ioctl_trans; | ||
934 | unsigned long mount_opt:20; | 932 | unsigned long mount_opt:20; |
935 | unsigned long compress_type:4; | 933 | unsigned long compress_type:4; |
936 | u64 max_inline; | 934 | u64 max_inline; |
@@ -947,7 +945,6 @@ struct btrfs_fs_info { | |||
947 | struct super_block *sb; | 945 | struct super_block *sb; |
948 | struct inode *btree_inode; | 946 | struct inode *btree_inode; |
949 | struct backing_dev_info bdi; | 947 | struct backing_dev_info bdi; |
950 | struct mutex trans_mutex; | ||
951 | struct mutex tree_log_mutex; | 948 | struct mutex tree_log_mutex; |
952 | struct mutex transaction_kthread_mutex; | 949 | struct mutex transaction_kthread_mutex; |
953 | struct mutex cleaner_mutex; | 950 | struct mutex cleaner_mutex; |
@@ -968,6 +965,13 @@ struct btrfs_fs_info { | |||
968 | struct rw_semaphore subvol_sem; | 965 | struct rw_semaphore subvol_sem; |
969 | struct srcu_struct subvol_srcu; | 966 | struct srcu_struct subvol_srcu; |
970 | 967 | ||
968 | spinlock_t trans_lock; | ||
969 | /* | ||
970 | * the reloc mutex goes with the trans lock, it is taken | ||
971 | * during commit to protect us from the relocation code | ||
972 | */ | ||
973 | struct mutex reloc_mutex; | ||
974 | |||
971 | struct list_head trans_list; | 975 | struct list_head trans_list; |
972 | struct list_head hashers; | 976 | struct list_head hashers; |
973 | struct list_head dead_roots; | 977 | struct list_head dead_roots; |
@@ -980,6 +984,7 @@ struct btrfs_fs_info { | |||
980 | atomic_t async_submit_draining; | 984 | atomic_t async_submit_draining; |
981 | atomic_t nr_async_bios; | 985 | atomic_t nr_async_bios; |
982 | atomic_t async_delalloc_pages; | 986 | atomic_t async_delalloc_pages; |
987 | atomic_t open_ioctl_trans; | ||
983 | 988 | ||
984 | /* | 989 | /* |
985 | * this is used by the balancing code to wait for all the pending | 990 | * this is used by the balancing code to wait for all the pending |
@@ -1044,6 +1049,7 @@ struct btrfs_fs_info { | |||
1044 | int closing; | 1049 | int closing; |
1045 | int log_root_recovering; | 1050 | int log_root_recovering; |
1046 | int enospc_unlink; | 1051 | int enospc_unlink; |
1052 | int trans_no_join; | ||
1047 | 1053 | ||
1048 | u64 total_pinned; | 1054 | u64 total_pinned; |
1049 | 1055 | ||
@@ -1065,7 +1071,6 @@ struct btrfs_fs_info { | |||
1065 | struct reloc_control *reloc_ctl; | 1071 | struct reloc_control *reloc_ctl; |
1066 | 1072 | ||
1067 | spinlock_t delalloc_lock; | 1073 | spinlock_t delalloc_lock; |
1068 | spinlock_t new_trans_lock; | ||
1069 | u64 delalloc_bytes; | 1074 | u64 delalloc_bytes; |
1070 | 1075 | ||
1071 | /* data_alloc_cluster is only used in ssd mode */ | 1076 | /* data_alloc_cluster is only used in ssd mode */ |
@@ -1172,6 +1177,14 @@ struct btrfs_root { | |||
1172 | u32 type; | 1177 | u32 type; |
1173 | 1178 | ||
1174 | u64 highest_objectid; | 1179 | u64 highest_objectid; |
1180 | |||
1181 | /* btrfs_record_root_in_trans is a multi-step process, | ||
1182 | * and it can race with the balancing code. But the | ||
1183 | * race is very small, and only the first time the root | ||
1184 | * is added to each transaction. So in_trans_setup | ||
1185 | * is used to tell us when more checks are required | ||
1186 | */ | ||
1187 | unsigned long in_trans_setup; | ||
1175 | int ref_cows; | 1188 | int ref_cows; |
1176 | int track_dirty; | 1189 | int track_dirty; |
1177 | int in_radix; | 1190 | int in_radix; |
@@ -1181,7 +1194,6 @@ struct btrfs_root { | |||
1181 | struct btrfs_key defrag_max; | 1194 | struct btrfs_key defrag_max; |
1182 | int defrag_running; | 1195 | int defrag_running; |
1183 | char *name; | 1196 | char *name; |
1184 | int in_sysfs; | ||
1185 | 1197 | ||
1186 | /* the dirty list is only used by non-reference counted roots */ | 1198 | /* the dirty list is only used by non-reference counted roots */ |
1187 | struct list_head dirty_list; | 1199 | struct list_head dirty_list; |
@@ -1340,6 +1352,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
1340 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) | 1352 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) |
1341 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) | 1353 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) |
1342 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) | 1354 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) |
1355 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) | ||
1343 | 1356 | ||
1344 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1357 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1345 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1358 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -2238,6 +2251,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | |||
2238 | void btrfs_block_rsv_release(struct btrfs_root *root, | 2251 | void btrfs_block_rsv_release(struct btrfs_root *root, |
2239 | struct btrfs_block_rsv *block_rsv, | 2252 | struct btrfs_block_rsv *block_rsv, |
2240 | u64 num_bytes); | 2253 | u64 num_bytes); |
2254 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2255 | struct btrfs_root *root, | ||
2256 | struct btrfs_block_rsv *rsv); | ||
2241 | int btrfs_set_block_group_ro(struct btrfs_root *root, | 2257 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
2242 | struct btrfs_block_group_cache *cache); | 2258 | struct btrfs_block_group_cache *cache); |
2243 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2259 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
@@ -2350,6 +2366,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
2350 | struct btrfs_root *root, | 2366 | struct btrfs_root *root, |
2351 | struct extent_buffer *node, | 2367 | struct extent_buffer *node, |
2352 | struct extent_buffer *parent); | 2368 | struct extent_buffer *parent); |
2369 | static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) | ||
2370 | { | ||
2371 | /* | ||
2372 | * Get synced with close_ctree() | ||
2373 | */ | ||
2374 | smp_mb(); | ||
2375 | return fs_info->closing; | ||
2376 | } | ||
2377 | |||
2353 | /* root-item.c */ | 2378 | /* root-item.c */ |
2354 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2379 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
2355 | struct btrfs_path *path, | 2380 | struct btrfs_path *path, |
@@ -2512,8 +2537,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | |||
2512 | int btrfs_writepages(struct address_space *mapping, | 2537 | int btrfs_writepages(struct address_space *mapping, |
2513 | struct writeback_control *wbc); | 2538 | struct writeback_control *wbc); |
2514 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 2539 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
2515 | struct btrfs_root *new_root, | 2540 | struct btrfs_root *new_root, u64 new_dirid); |
2516 | u64 new_dirid, u64 alloc_hint); | ||
2517 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2541 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
2518 | size_t size, struct bio *bio, unsigned long bio_flags); | 2542 | size_t size, struct bio *bio, unsigned long bio_flags); |
2519 | 2543 | ||
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 01e29503a54b..f1cbd028f7b3 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) | |||
297 | item->data_len = data_len; | 297 | item->data_len = data_len; |
298 | item->ins_or_del = 0; | 298 | item->ins_or_del = 0; |
299 | item->bytes_reserved = 0; | 299 | item->bytes_reserved = 0; |
300 | item->block_rsv = NULL; | ||
301 | item->delayed_node = NULL; | 300 | item->delayed_node = NULL; |
302 | atomic_set(&item->refs, 1); | 301 | atomic_set(&item->refs, 1); |
303 | } | 302 | } |
@@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, | |||
593 | 592 | ||
594 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 593 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
595 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | 594 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); |
596 | if (!ret) { | 595 | if (!ret) |
597 | item->bytes_reserved = num_bytes; | 596 | item->bytes_reserved = num_bytes; |
598 | item->block_rsv = dst_rsv; | ||
599 | } | ||
600 | 597 | ||
601 | return ret; | 598 | return ret; |
602 | } | 599 | } |
@@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, | |||
604 | static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, | 601 | static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, |
605 | struct btrfs_delayed_item *item) | 602 | struct btrfs_delayed_item *item) |
606 | { | 603 | { |
604 | struct btrfs_block_rsv *rsv; | ||
605 | |||
607 | if (!item->bytes_reserved) | 606 | if (!item->bytes_reserved) |
608 | return; | 607 | return; |
609 | 608 | ||
610 | btrfs_block_rsv_release(root, item->block_rsv, | 609 | rsv = &root->fs_info->global_block_rsv; |
610 | btrfs_block_rsv_release(root, rsv, | ||
611 | item->bytes_reserved); | 611 | item->bytes_reserved); |
612 | } | 612 | } |
613 | 613 | ||
@@ -678,6 +678,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, | |||
678 | INIT_LIST_HEAD(&head); | 678 | INIT_LIST_HEAD(&head); |
679 | 679 | ||
680 | next = item; | 680 | next = item; |
681 | nitems = 0; | ||
681 | 682 | ||
682 | /* | 683 | /* |
683 | * count the number of the continuous items that we can insert in batch | 684 | * count the number of the continuous items that we can insert in batch |
@@ -1013,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
1013 | struct btrfs_delayed_root *delayed_root; | 1014 | struct btrfs_delayed_root *delayed_root; |
1014 | struct btrfs_delayed_node *curr_node, *prev_node; | 1015 | struct btrfs_delayed_node *curr_node, *prev_node; |
1015 | struct btrfs_path *path; | 1016 | struct btrfs_path *path; |
1017 | struct btrfs_block_rsv *block_rsv; | ||
1016 | int ret = 0; | 1018 | int ret = 0; |
1017 | 1019 | ||
1018 | path = btrfs_alloc_path(); | 1020 | path = btrfs_alloc_path(); |
@@ -1020,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
1020 | return -ENOMEM; | 1022 | return -ENOMEM; |
1021 | path->leave_spinning = 1; | 1023 | path->leave_spinning = 1; |
1022 | 1024 | ||
1025 | block_rsv = trans->block_rsv; | ||
1026 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
1027 | |||
1023 | delayed_root = btrfs_get_delayed_root(root); | 1028 | delayed_root = btrfs_get_delayed_root(root); |
1024 | 1029 | ||
1025 | curr_node = btrfs_first_delayed_node(delayed_root); | 1030 | curr_node = btrfs_first_delayed_node(delayed_root); |
@@ -1044,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
1044 | } | 1049 | } |
1045 | 1050 | ||
1046 | btrfs_free_path(path); | 1051 | btrfs_free_path(path); |
1052 | trans->block_rsv = block_rsv; | ||
1047 | return ret; | 1053 | return ret; |
1048 | } | 1054 | } |
1049 | 1055 | ||
@@ -1051,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | |||
1051 | struct btrfs_delayed_node *node) | 1057 | struct btrfs_delayed_node *node) |
1052 | { | 1058 | { |
1053 | struct btrfs_path *path; | 1059 | struct btrfs_path *path; |
1060 | struct btrfs_block_rsv *block_rsv; | ||
1054 | int ret; | 1061 | int ret; |
1055 | 1062 | ||
1056 | path = btrfs_alloc_path(); | 1063 | path = btrfs_alloc_path(); |
@@ -1058,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | |||
1058 | return -ENOMEM; | 1065 | return -ENOMEM; |
1059 | path->leave_spinning = 1; | 1066 | path->leave_spinning = 1; |
1060 | 1067 | ||
1068 | block_rsv = trans->block_rsv; | ||
1069 | trans->block_rsv = &node->root->fs_info->global_block_rsv; | ||
1070 | |||
1061 | ret = btrfs_insert_delayed_items(trans, path, node->root, node); | 1071 | ret = btrfs_insert_delayed_items(trans, path, node->root, node); |
1062 | if (!ret) | 1072 | if (!ret) |
1063 | ret = btrfs_delete_delayed_items(trans, path, node->root, node); | 1073 | ret = btrfs_delete_delayed_items(trans, path, node->root, node); |
@@ -1065,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | |||
1065 | ret = btrfs_update_delayed_inode(trans, node->root, path, node); | 1075 | ret = btrfs_update_delayed_inode(trans, node->root, path, node); |
1066 | btrfs_free_path(path); | 1076 | btrfs_free_path(path); |
1067 | 1077 | ||
1078 | trans->block_rsv = block_rsv; | ||
1068 | return ret; | 1079 | return ret; |
1069 | } | 1080 | } |
1070 | 1081 | ||
@@ -1115,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
1115 | struct btrfs_path *path; | 1126 | struct btrfs_path *path; |
1116 | struct btrfs_delayed_node *delayed_node = NULL; | 1127 | struct btrfs_delayed_node *delayed_node = NULL; |
1117 | struct btrfs_root *root; | 1128 | struct btrfs_root *root; |
1129 | struct btrfs_block_rsv *block_rsv; | ||
1118 | unsigned long nr = 0; | 1130 | unsigned long nr = 0; |
1119 | int need_requeue = 0; | 1131 | int need_requeue = 0; |
1120 | int ret; | 1132 | int ret; |
@@ -1129,10 +1141,13 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
1129 | delayed_node = async_node->delayed_node; | 1141 | delayed_node = async_node->delayed_node; |
1130 | root = delayed_node->root; | 1142 | root = delayed_node->root; |
1131 | 1143 | ||
1132 | trans = btrfs_join_transaction(root, 0); | 1144 | trans = btrfs_join_transaction(root); |
1133 | if (IS_ERR(trans)) | 1145 | if (IS_ERR(trans)) |
1134 | goto free_path; | 1146 | goto free_path; |
1135 | 1147 | ||
1148 | block_rsv = trans->block_rsv; | ||
1149 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
1150 | |||
1136 | ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); | 1151 | ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); |
1137 | if (!ret) | 1152 | if (!ret) |
1138 | ret = btrfs_delete_delayed_items(trans, path, root, | 1153 | ret = btrfs_delete_delayed_items(trans, path, root, |
@@ -1175,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
1175 | 1190 | ||
1176 | nr = trans->blocks_used; | 1191 | nr = trans->blocks_used; |
1177 | 1192 | ||
1193 | trans->block_rsv = block_rsv; | ||
1178 | btrfs_end_transaction_dmeta(trans, root); | 1194 | btrfs_end_transaction_dmeta(trans, root); |
1179 | __btrfs_btree_balance_dirty(root, nr); | 1195 | __btrfs_btree_balance_dirty(root, nr); |
1180 | free_path: | 1196 | free_path: |
@@ -1221,6 +1237,13 @@ again: | |||
1221 | return 0; | 1237 | return 0; |
1222 | } | 1238 | } |
1223 | 1239 | ||
1240 | void btrfs_assert_delayed_root_empty(struct btrfs_root *root) | ||
1241 | { | ||
1242 | struct btrfs_delayed_root *delayed_root; | ||
1243 | delayed_root = btrfs_get_delayed_root(root); | ||
1244 | WARN_ON(btrfs_first_delayed_node(delayed_root)); | ||
1245 | } | ||
1246 | |||
1224 | void btrfs_balance_delayed_items(struct btrfs_root *root) | 1247 | void btrfs_balance_delayed_items(struct btrfs_root *root) |
1225 | { | 1248 | { |
1226 | struct btrfs_delayed_root *delayed_root; | 1249 | struct btrfs_delayed_root *delayed_root; |
@@ -1572,8 +1595,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, | |||
1572 | btrfs_set_stack_inode_transid(inode_item, trans->transid); | 1595 | btrfs_set_stack_inode_transid(inode_item, trans->transid); |
1573 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); | 1596 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); |
1574 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); | 1597 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); |
1575 | btrfs_set_stack_inode_block_group(inode_item, | 1598 | btrfs_set_stack_inode_block_group(inode_item, 0); |
1576 | BTRFS_I(inode)->block_group); | ||
1577 | 1599 | ||
1578 | btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), | 1600 | btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), |
1579 | inode->i_atime.tv_sec); | 1601 | inode->i_atime.tv_sec); |
@@ -1595,7 +1617,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, | |||
1595 | struct btrfs_root *root, struct inode *inode) | 1617 | struct btrfs_root *root, struct inode *inode) |
1596 | { | 1618 | { |
1597 | struct btrfs_delayed_node *delayed_node; | 1619 | struct btrfs_delayed_node *delayed_node; |
1598 | int ret; | 1620 | int ret = 0; |
1599 | 1621 | ||
1600 | delayed_node = btrfs_get_or_create_delayed_node(inode); | 1622 | delayed_node = btrfs_get_or_create_delayed_node(inode); |
1601 | if (IS_ERR(delayed_node)) | 1623 | if (IS_ERR(delayed_node)) |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index eb7d240aa648..d1a6a2915c66 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
@@ -75,7 +75,6 @@ struct btrfs_delayed_item { | |||
75 | struct list_head tree_list; /* used for batch insert/delete items */ | 75 | struct list_head tree_list; /* used for batch insert/delete items */ |
76 | struct list_head readdir_list; /* used for readdir items */ | 76 | struct list_head readdir_list; /* used for readdir items */ |
77 | u64 bytes_reserved; | 77 | u64 bytes_reserved; |
78 | struct btrfs_block_rsv *block_rsv; | ||
79 | struct btrfs_delayed_node *delayed_node; | 78 | struct btrfs_delayed_node *delayed_node; |
80 | atomic_t refs; | 79 | atomic_t refs; |
81 | int ins_or_del; | 80 | int ins_or_del; |
@@ -138,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | |||
138 | /* for init */ | 137 | /* for init */ |
139 | int __init btrfs_delayed_inode_init(void); | 138 | int __init btrfs_delayed_inode_init(void); |
140 | void btrfs_delayed_inode_exit(void); | 139 | void btrfs_delayed_inode_exit(void); |
140 | |||
141 | /* for debugging */ | ||
142 | void btrfs_assert_delayed_root_empty(struct btrfs_root *root); | ||
143 | |||
141 | #endif | 144 | #endif |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 98b6a71decba..1ac8db5dc0a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1044 | root->last_trans = 0; | 1044 | root->last_trans = 0; |
1045 | root->highest_objectid = 0; | 1045 | root->highest_objectid = 0; |
1046 | root->name = NULL; | 1046 | root->name = NULL; |
1047 | root->in_sysfs = 0; | ||
1048 | root->inode_tree = RB_ROOT; | 1047 | root->inode_tree = RB_ROOT; |
1049 | INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); | 1048 | INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); |
1050 | root->block_rsv = NULL; | 1049 | root->block_rsv = NULL; |
@@ -1300,19 +1299,21 @@ again: | |||
1300 | return root; | 1299 | return root; |
1301 | 1300 | ||
1302 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | 1301 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); |
1303 | if (!root->free_ino_ctl) | ||
1304 | goto fail; | ||
1305 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | 1302 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), |
1306 | GFP_NOFS); | 1303 | GFP_NOFS); |
1307 | if (!root->free_ino_pinned) | 1304 | if (!root->free_ino_pinned || !root->free_ino_ctl) { |
1305 | ret = -ENOMEM; | ||
1308 | goto fail; | 1306 | goto fail; |
1307 | } | ||
1309 | 1308 | ||
1310 | btrfs_init_free_ino_ctl(root); | 1309 | btrfs_init_free_ino_ctl(root); |
1311 | mutex_init(&root->fs_commit_mutex); | 1310 | mutex_init(&root->fs_commit_mutex); |
1312 | spin_lock_init(&root->cache_lock); | 1311 | spin_lock_init(&root->cache_lock); |
1313 | init_waitqueue_head(&root->cache_wait); | 1312 | init_waitqueue_head(&root->cache_wait); |
1314 | 1313 | ||
1315 | set_anon_super(&root->anon_super, NULL); | 1314 | ret = set_anon_super(&root->anon_super, NULL); |
1315 | if (ret) | ||
1316 | goto fail; | ||
1316 | 1317 | ||
1317 | if (btrfs_root_refs(&root->root_item) == 0) { | 1318 | if (btrfs_root_refs(&root->root_item) == 0) { |
1318 | ret = -ENOENT; | 1319 | ret = -ENOENT; |
@@ -1505,24 +1506,24 @@ static int transaction_kthread(void *arg) | |||
1505 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1506 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1506 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1507 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1507 | 1508 | ||
1508 | spin_lock(&root->fs_info->new_trans_lock); | 1509 | spin_lock(&root->fs_info->trans_lock); |
1509 | cur = root->fs_info->running_transaction; | 1510 | cur = root->fs_info->running_transaction; |
1510 | if (!cur) { | 1511 | if (!cur) { |
1511 | spin_unlock(&root->fs_info->new_trans_lock); | 1512 | spin_unlock(&root->fs_info->trans_lock); |
1512 | goto sleep; | 1513 | goto sleep; |
1513 | } | 1514 | } |
1514 | 1515 | ||
1515 | now = get_seconds(); | 1516 | now = get_seconds(); |
1516 | if (!cur->blocked && | 1517 | if (!cur->blocked && |
1517 | (now < cur->start_time || now - cur->start_time < 30)) { | 1518 | (now < cur->start_time || now - cur->start_time < 30)) { |
1518 | spin_unlock(&root->fs_info->new_trans_lock); | 1519 | spin_unlock(&root->fs_info->trans_lock); |
1519 | delay = HZ * 5; | 1520 | delay = HZ * 5; |
1520 | goto sleep; | 1521 | goto sleep; |
1521 | } | 1522 | } |
1522 | transid = cur->transid; | 1523 | transid = cur->transid; |
1523 | spin_unlock(&root->fs_info->new_trans_lock); | 1524 | spin_unlock(&root->fs_info->trans_lock); |
1524 | 1525 | ||
1525 | trans = btrfs_join_transaction(root, 1); | 1526 | trans = btrfs_join_transaction(root); |
1526 | BUG_ON(IS_ERR(trans)); | 1527 | BUG_ON(IS_ERR(trans)); |
1527 | if (transid == trans->transid) { | 1528 | if (transid == trans->transid) { |
1528 | ret = btrfs_commit_transaction(trans, root); | 1529 | ret = btrfs_commit_transaction(trans, root); |
@@ -1613,11 +1614,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1613 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 1614 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
1614 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 1615 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
1615 | spin_lock_init(&fs_info->delalloc_lock); | 1616 | spin_lock_init(&fs_info->delalloc_lock); |
1616 | spin_lock_init(&fs_info->new_trans_lock); | 1617 | spin_lock_init(&fs_info->trans_lock); |
1617 | spin_lock_init(&fs_info->ref_cache_lock); | 1618 | spin_lock_init(&fs_info->ref_cache_lock); |
1618 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1619 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
1619 | spin_lock_init(&fs_info->delayed_iput_lock); | 1620 | spin_lock_init(&fs_info->delayed_iput_lock); |
1620 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1621 | spin_lock_init(&fs_info->defrag_inodes_lock); |
1622 | mutex_init(&fs_info->reloc_mutex); | ||
1621 | 1623 | ||
1622 | init_completion(&fs_info->kobj_unregister); | 1624 | init_completion(&fs_info->kobj_unregister); |
1623 | fs_info->tree_root = tree_root; | 1625 | fs_info->tree_root = tree_root; |
@@ -1645,6 +1647,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1645 | fs_info->max_inline = 8192 * 1024; | 1647 | fs_info->max_inline = 8192 * 1024; |
1646 | fs_info->metadata_ratio = 0; | 1648 | fs_info->metadata_ratio = 0; |
1647 | fs_info->defrag_inodes = RB_ROOT; | 1649 | fs_info->defrag_inodes = RB_ROOT; |
1650 | fs_info->trans_no_join = 0; | ||
1648 | 1651 | ||
1649 | fs_info->thread_pool_size = min_t(unsigned long, | 1652 | fs_info->thread_pool_size = min_t(unsigned long, |
1650 | num_online_cpus() + 2, 8); | 1653 | num_online_cpus() + 2, 8); |
@@ -1667,8 +1670,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1667 | init_waitqueue_head(&fs_info->scrub_pause_wait); | 1670 | init_waitqueue_head(&fs_info->scrub_pause_wait); |
1668 | init_rwsem(&fs_info->scrub_super_lock); | 1671 | init_rwsem(&fs_info->scrub_super_lock); |
1669 | fs_info->scrub_workers_refcnt = 0; | 1672 | fs_info->scrub_workers_refcnt = 0; |
1670 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | ||
1671 | fs_info->thread_pool_size, &fs_info->generic_worker); | ||
1672 | 1673 | ||
1673 | sb->s_blocksize = 4096; | 1674 | sb->s_blocksize = 4096; |
1674 | sb->s_blocksize_bits = blksize_bits(4096); | 1675 | sb->s_blocksize_bits = blksize_bits(4096); |
@@ -1709,7 +1710,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1709 | fs_info->do_barriers = 1; | 1710 | fs_info->do_barriers = 1; |
1710 | 1711 | ||
1711 | 1712 | ||
1712 | mutex_init(&fs_info->trans_mutex); | ||
1713 | mutex_init(&fs_info->ordered_operations_mutex); | 1713 | mutex_init(&fs_info->ordered_operations_mutex); |
1714 | mutex_init(&fs_info->tree_log_mutex); | 1714 | mutex_init(&fs_info->tree_log_mutex); |
1715 | mutex_init(&fs_info->chunk_mutex); | 1715 | mutex_init(&fs_info->chunk_mutex); |
@@ -2479,13 +2479,13 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2479 | down_write(&root->fs_info->cleanup_work_sem); | 2479 | down_write(&root->fs_info->cleanup_work_sem); |
2480 | up_write(&root->fs_info->cleanup_work_sem); | 2480 | up_write(&root->fs_info->cleanup_work_sem); |
2481 | 2481 | ||
2482 | trans = btrfs_join_transaction(root, 1); | 2482 | trans = btrfs_join_transaction(root); |
2483 | if (IS_ERR(trans)) | 2483 | if (IS_ERR(trans)) |
2484 | return PTR_ERR(trans); | 2484 | return PTR_ERR(trans); |
2485 | ret = btrfs_commit_transaction(trans, root); | 2485 | ret = btrfs_commit_transaction(trans, root); |
2486 | BUG_ON(ret); | 2486 | BUG_ON(ret); |
2487 | /* run commit again to drop the original snapshot */ | 2487 | /* run commit again to drop the original snapshot */ |
2488 | trans = btrfs_join_transaction(root, 1); | 2488 | trans = btrfs_join_transaction(root); |
2489 | if (IS_ERR(trans)) | 2489 | if (IS_ERR(trans)) |
2490 | return PTR_ERR(trans); | 2490 | return PTR_ERR(trans); |
2491 | btrfs_commit_transaction(trans, root); | 2491 | btrfs_commit_transaction(trans, root); |
@@ -2911,9 +2911,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | |||
2911 | 2911 | ||
2912 | INIT_LIST_HEAD(&splice); | 2912 | INIT_LIST_HEAD(&splice); |
2913 | 2913 | ||
2914 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
2915 | |||
2916 | spin_lock(&root->fs_info->delalloc_lock); | 2914 | spin_lock(&root->fs_info->delalloc_lock); |
2915 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
2917 | 2916 | ||
2918 | while (!list_empty(&splice)) { | 2917 | while (!list_empty(&splice)) { |
2919 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 2918 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
@@ -3024,10 +3023,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3024 | 3023 | ||
3025 | WARN_ON(1); | 3024 | WARN_ON(1); |
3026 | 3025 | ||
3027 | mutex_lock(&root->fs_info->trans_mutex); | ||
3028 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 3026 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
3029 | 3027 | ||
3028 | spin_lock(&root->fs_info->trans_lock); | ||
3030 | list_splice_init(&root->fs_info->trans_list, &list); | 3029 | list_splice_init(&root->fs_info->trans_list, &list); |
3030 | root->fs_info->trans_no_join = 1; | ||
3031 | spin_unlock(&root->fs_info->trans_lock); | ||
3032 | |||
3031 | while (!list_empty(&list)) { | 3033 | while (!list_empty(&list)) { |
3032 | t = list_entry(list.next, struct btrfs_transaction, list); | 3034 | t = list_entry(list.next, struct btrfs_transaction, list); |
3033 | if (!t) | 3035 | if (!t) |
@@ -3052,23 +3054,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3052 | t->blocked = 0; | 3054 | t->blocked = 0; |
3053 | if (waitqueue_active(&root->fs_info->transaction_wait)) | 3055 | if (waitqueue_active(&root->fs_info->transaction_wait)) |
3054 | wake_up(&root->fs_info->transaction_wait); | 3056 | wake_up(&root->fs_info->transaction_wait); |
3055 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3056 | 3057 | ||
3057 | mutex_lock(&root->fs_info->trans_mutex); | ||
3058 | t->commit_done = 1; | 3058 | t->commit_done = 1; |
3059 | if (waitqueue_active(&t->commit_wait)) | 3059 | if (waitqueue_active(&t->commit_wait)) |
3060 | wake_up(&t->commit_wait); | 3060 | wake_up(&t->commit_wait); |
3061 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3062 | |||
3063 | mutex_lock(&root->fs_info->trans_mutex); | ||
3064 | 3061 | ||
3065 | btrfs_destroy_pending_snapshots(t); | 3062 | btrfs_destroy_pending_snapshots(t); |
3066 | 3063 | ||
3067 | btrfs_destroy_delalloc_inodes(root); | 3064 | btrfs_destroy_delalloc_inodes(root); |
3068 | 3065 | ||
3069 | spin_lock(&root->fs_info->new_trans_lock); | 3066 | spin_lock(&root->fs_info->trans_lock); |
3070 | root->fs_info->running_transaction = NULL; | 3067 | root->fs_info->running_transaction = NULL; |
3071 | spin_unlock(&root->fs_info->new_trans_lock); | 3068 | spin_unlock(&root->fs_info->trans_lock); |
3072 | 3069 | ||
3073 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | 3070 | btrfs_destroy_marked_extents(root, &t->dirty_pages, |
3074 | EXTENT_DIRTY); | 3071 | EXTENT_DIRTY); |
@@ -3082,8 +3079,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3082 | kmem_cache_free(btrfs_transaction_cachep, t); | 3079 | kmem_cache_free(btrfs_transaction_cachep, t); |
3083 | } | 3080 | } |
3084 | 3081 | ||
3082 | spin_lock(&root->fs_info->trans_lock); | ||
3083 | root->fs_info->trans_no_join = 0; | ||
3084 | spin_unlock(&root->fs_info->trans_lock); | ||
3085 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 3085 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
3086 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3087 | 3086 | ||
3088 | return 0; | 3087 | return 0; |
3089 | } | 3088 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 169bd62ce776..1f61bf5b4960 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -348,7 +348,7 @@ static int caching_kthread(void *data) | |||
348 | */ | 348 | */ |
349 | path->skip_locking = 1; | 349 | path->skip_locking = 1; |
350 | path->search_commit_root = 1; | 350 | path->search_commit_root = 1; |
351 | path->reada = 2; | 351 | path->reada = 1; |
352 | 352 | ||
353 | key.objectid = last; | 353 | key.objectid = last; |
354 | key.offset = 0; | 354 | key.offset = 0; |
@@ -366,8 +366,7 @@ again: | |||
366 | nritems = btrfs_header_nritems(leaf); | 366 | nritems = btrfs_header_nritems(leaf); |
367 | 367 | ||
368 | while (1) { | 368 | while (1) { |
369 | smp_mb(); | 369 | if (btrfs_fs_closing(fs_info) > 1) { |
370 | if (fs_info->closing > 1) { | ||
371 | last = (u64)-1; | 370 | last = (u64)-1; |
372 | break; | 371 | break; |
373 | } | 372 | } |
@@ -379,15 +378,18 @@ again: | |||
379 | if (ret) | 378 | if (ret) |
380 | break; | 379 | break; |
381 | 380 | ||
382 | caching_ctl->progress = last; | 381 | if (need_resched() || |
383 | btrfs_release_path(path); | 382 | btrfs_next_leaf(extent_root, path)) { |
384 | up_read(&fs_info->extent_commit_sem); | 383 | caching_ctl->progress = last; |
385 | mutex_unlock(&caching_ctl->mutex); | 384 | btrfs_release_path(path); |
386 | if (btrfs_transaction_in_commit(fs_info)) | 385 | up_read(&fs_info->extent_commit_sem); |
387 | schedule_timeout(1); | 386 | mutex_unlock(&caching_ctl->mutex); |
388 | else | ||
389 | cond_resched(); | 387 | cond_resched(); |
390 | goto again; | 388 | goto again; |
389 | } | ||
390 | leaf = path->nodes[0]; | ||
391 | nritems = btrfs_header_nritems(leaf); | ||
392 | continue; | ||
391 | } | 393 | } |
392 | 394 | ||
393 | if (key.objectid < block_group->key.objectid) { | 395 | if (key.objectid < block_group->key.objectid) { |
@@ -3065,7 +3067,7 @@ again: | |||
3065 | spin_unlock(&data_sinfo->lock); | 3067 | spin_unlock(&data_sinfo->lock); |
3066 | alloc: | 3068 | alloc: |
3067 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3069 | alloc_target = btrfs_get_alloc_profile(root, 1); |
3068 | trans = btrfs_join_transaction(root, 1); | 3070 | trans = btrfs_join_transaction(root); |
3069 | if (IS_ERR(trans)) | 3071 | if (IS_ERR(trans)) |
3070 | return PTR_ERR(trans); | 3072 | return PTR_ERR(trans); |
3071 | 3073 | ||
@@ -3087,13 +3089,21 @@ alloc: | |||
3087 | } | 3089 | } |
3088 | goto again; | 3090 | goto again; |
3089 | } | 3091 | } |
3092 | |||
3093 | /* | ||
3094 | * If we have less pinned bytes than we want to allocate then | ||
3095 | * don't bother committing the transaction, it won't help us. | ||
3096 | */ | ||
3097 | if (data_sinfo->bytes_pinned < bytes) | ||
3098 | committed = 1; | ||
3090 | spin_unlock(&data_sinfo->lock); | 3099 | spin_unlock(&data_sinfo->lock); |
3091 | 3100 | ||
3092 | /* commit the current transaction and try again */ | 3101 | /* commit the current transaction and try again */ |
3093 | commit_trans: | 3102 | commit_trans: |
3094 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3103 | if (!committed && |
3104 | !atomic_read(&root->fs_info->open_ioctl_trans)) { | ||
3095 | committed = 1; | 3105 | committed = 1; |
3096 | trans = btrfs_join_transaction(root, 1); | 3106 | trans = btrfs_join_transaction(root); |
3097 | if (IS_ERR(trans)) | 3107 | if (IS_ERR(trans)) |
3098 | return PTR_ERR(trans); | 3108 | return PTR_ERR(trans); |
3099 | ret = btrfs_commit_transaction(trans, root); | 3109 | ret = btrfs_commit_transaction(trans, root); |
@@ -3304,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3304 | if (reserved == 0) | 3314 | if (reserved == 0) |
3305 | return 0; | 3315 | return 0; |
3306 | 3316 | ||
3307 | /* nothing to shrink - nothing to reclaim */ | ||
3308 | if (root->fs_info->delalloc_bytes == 0) | ||
3309 | return 0; | ||
3310 | |||
3311 | max_reclaim = min(reserved, to_reclaim); | 3317 | max_reclaim = min(reserved, to_reclaim); |
3312 | 3318 | ||
3313 | while (loops < 1024) { | 3319 | while (loops < 1024) { |
@@ -3472,7 +3478,7 @@ again: | |||
3472 | goto out; | 3478 | goto out; |
3473 | 3479 | ||
3474 | ret = -ENOSPC; | 3480 | ret = -ENOSPC; |
3475 | trans = btrfs_join_transaction(root, 1); | 3481 | trans = btrfs_join_transaction(root); |
3476 | if (IS_ERR(trans)) | 3482 | if (IS_ERR(trans)) |
3477 | goto out; | 3483 | goto out; |
3478 | ret = btrfs_commit_transaction(trans, root); | 3484 | ret = btrfs_commit_transaction(trans, root); |
@@ -3699,7 +3705,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3699 | if (trans) | 3705 | if (trans) |
3700 | return -EAGAIN; | 3706 | return -EAGAIN; |
3701 | 3707 | ||
3702 | trans = btrfs_join_transaction(root, 1); | 3708 | trans = btrfs_join_transaction(root); |
3703 | BUG_ON(IS_ERR(trans)); | 3709 | BUG_ON(IS_ERR(trans)); |
3704 | ret = btrfs_commit_transaction(trans, root); | 3710 | ret = btrfs_commit_transaction(trans, root); |
3705 | return 0; | 3711 | return 0; |
@@ -3837,6 +3843,37 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3837 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | 3843 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); |
3838 | } | 3844 | } |
3839 | 3845 | ||
3846 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3847 | struct btrfs_root *root, | ||
3848 | struct btrfs_block_rsv *rsv) | ||
3849 | { | ||
3850 | struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; | ||
3851 | u64 num_bytes; | ||
3852 | int ret; | ||
3853 | |||
3854 | /* | ||
3855 | * Truncate should be freeing data, but give us 2 items just in case it | ||
3856 | * needs to use some space. We may want to be smarter about this in the | ||
3857 | * future. | ||
3858 | */ | ||
3859 | num_bytes = btrfs_calc_trans_metadata_size(root, 2); | ||
3860 | |||
3861 | /* We already have enough bytes, just return */ | ||
3862 | if (rsv->reserved >= num_bytes) | ||
3863 | return 0; | ||
3864 | |||
3865 | num_bytes -= rsv->reserved; | ||
3866 | |||
3867 | /* | ||
3868 | * You should have reserved enough space before hand to do this, so this | ||
3869 | * should not fail. | ||
3870 | */ | ||
3871 | ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); | ||
3872 | BUG_ON(ret); | ||
3873 | |||
3874 | return 0; | ||
3875 | } | ||
3876 | |||
3840 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | 3877 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
3841 | struct btrfs_root *root, | 3878 | struct btrfs_root *root, |
3842 | int num_items) | 3879 | int num_items) |
@@ -3877,23 +3914,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3877 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | 3914 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; |
3878 | 3915 | ||
3879 | /* | 3916 | /* |
3880 | * one for deleting orphan item, one for updating inode and | 3917 | * We need to hold space in order to delete our orphan item once we've |
3881 | * two for calling btrfs_truncate_inode_items. | 3918 | * added it, so this takes the reservation so we can release it later |
3882 | * | 3919 | * when we are truly done with the orphan item. |
3883 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
3884 | * more space than it uses in most cases. So two units of | ||
3885 | * metadata space should be enough for calling it many times. | ||
3886 | * If all of the metadata space is used, we can commit | ||
3887 | * transaction and use space it freed. | ||
3888 | */ | 3920 | */ |
3889 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); | 3921 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
3890 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3922 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3891 | } | 3923 | } |
3892 | 3924 | ||
3893 | void btrfs_orphan_release_metadata(struct inode *inode) | 3925 | void btrfs_orphan_release_metadata(struct inode *inode) |
3894 | { | 3926 | { |
3895 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3927 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3896 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); | 3928 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
3897 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | 3929 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); |
3898 | } | 3930 | } |
3899 | 3931 | ||
@@ -4987,6 +5019,15 @@ have_block_group: | |||
4987 | if (unlikely(block_group->ro)) | 5019 | if (unlikely(block_group->ro)) |
4988 | goto loop; | 5020 | goto loop; |
4989 | 5021 | ||
5022 | spin_lock(&block_group->free_space_ctl->tree_lock); | ||
5023 | if (cached && | ||
5024 | block_group->free_space_ctl->free_space < | ||
5025 | num_bytes + empty_size) { | ||
5026 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5027 | goto loop; | ||
5028 | } | ||
5029 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5030 | |||
4990 | /* | 5031 | /* |
4991 | * Ok we want to try and use the cluster allocator, so lets look | 5032 | * Ok we want to try and use the cluster allocator, so lets look |
4992 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | 5033 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will |
@@ -5150,6 +5191,7 @@ checks: | |||
5150 | btrfs_add_free_space(block_group, offset, | 5191 | btrfs_add_free_space(block_group, offset, |
5151 | search_start - offset); | 5192 | search_start - offset); |
5152 | BUG_ON(offset > search_start); | 5193 | BUG_ON(offset > search_start); |
5194 | btrfs_put_block_group(block_group); | ||
5153 | break; | 5195 | break; |
5154 | loop: | 5196 | loop: |
5155 | failed_cluster_refill = false; | 5197 | failed_cluster_refill = false; |
@@ -5172,9 +5214,7 @@ loop: | |||
5172 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | 5214 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
5173 | * again | 5215 | * again |
5174 | */ | 5216 | */ |
5175 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 5217 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { |
5176 | (found_uncached_bg || empty_size || empty_cluster || | ||
5177 | allowed_chunk_alloc)) { | ||
5178 | index = 0; | 5218 | index = 0; |
5179 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 5219 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
5180 | found_uncached_bg = false; | 5220 | found_uncached_bg = false; |
@@ -5214,42 +5254,39 @@ loop: | |||
5214 | goto search; | 5254 | goto search; |
5215 | } | 5255 | } |
5216 | 5256 | ||
5217 | if (loop < LOOP_CACHING_WAIT) { | 5257 | loop++; |
5218 | loop++; | ||
5219 | goto search; | ||
5220 | } | ||
5221 | 5258 | ||
5222 | if (loop == LOOP_ALLOC_CHUNK) { | 5259 | if (loop == LOOP_ALLOC_CHUNK) { |
5223 | empty_size = 0; | 5260 | if (allowed_chunk_alloc) { |
5224 | empty_cluster = 0; | 5261 | ret = do_chunk_alloc(trans, root, num_bytes + |
5225 | } | 5262 | 2 * 1024 * 1024, data, |
5263 | CHUNK_ALLOC_LIMITED); | ||
5264 | allowed_chunk_alloc = 0; | ||
5265 | if (ret == 1) | ||
5266 | done_chunk_alloc = 1; | ||
5267 | } else if (!done_chunk_alloc && | ||
5268 | space_info->force_alloc == | ||
5269 | CHUNK_ALLOC_NO_FORCE) { | ||
5270 | space_info->force_alloc = CHUNK_ALLOC_LIMITED; | ||
5271 | } | ||
5226 | 5272 | ||
5227 | if (allowed_chunk_alloc) { | 5273 | /* |
5228 | ret = do_chunk_alloc(trans, root, num_bytes + | 5274 | * We didn't allocate a chunk, go ahead and drop the |
5229 | 2 * 1024 * 1024, data, | 5275 | * empty size and loop again. |
5230 | CHUNK_ALLOC_LIMITED); | 5276 | */ |
5231 | allowed_chunk_alloc = 0; | 5277 | if (!done_chunk_alloc) |
5232 | done_chunk_alloc = 1; | 5278 | loop = LOOP_NO_EMPTY_SIZE; |
5233 | } else if (!done_chunk_alloc && | ||
5234 | space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) { | ||
5235 | space_info->force_alloc = CHUNK_ALLOC_LIMITED; | ||
5236 | } | 5279 | } |
5237 | 5280 | ||
5238 | if (loop < LOOP_NO_EMPTY_SIZE) { | 5281 | if (loop == LOOP_NO_EMPTY_SIZE) { |
5239 | loop++; | 5282 | empty_size = 0; |
5240 | goto search; | 5283 | empty_cluster = 0; |
5241 | } | 5284 | } |
5242 | ret = -ENOSPC; | 5285 | |
5286 | goto search; | ||
5243 | } else if (!ins->objectid) { | 5287 | } else if (!ins->objectid) { |
5244 | ret = -ENOSPC; | 5288 | ret = -ENOSPC; |
5245 | } | 5289 | } else if (ins->objectid) { |
5246 | |||
5247 | /* we found what we needed */ | ||
5248 | if (ins->objectid) { | ||
5249 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) | ||
5250 | trans->block_group = block_group->key.objectid; | ||
5251 | |||
5252 | btrfs_put_block_group(block_group); | ||
5253 | ret = 0; | 5290 | ret = 0; |
5254 | } | 5291 | } |
5255 | 5292 | ||
@@ -6526,7 +6563,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6526 | 6563 | ||
6527 | BUG_ON(cache->ro); | 6564 | BUG_ON(cache->ro); |
6528 | 6565 | ||
6529 | trans = btrfs_join_transaction(root, 1); | 6566 | trans = btrfs_join_transaction(root); |
6530 | BUG_ON(IS_ERR(trans)); | 6567 | BUG_ON(IS_ERR(trans)); |
6531 | 6568 | ||
6532 | alloc_flags = update_block_group_flags(root, cache->flags); | 6569 | alloc_flags = update_block_group_flags(root, cache->flags); |
@@ -6882,6 +6919,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
6882 | path = btrfs_alloc_path(); | 6919 | path = btrfs_alloc_path(); |
6883 | if (!path) | 6920 | if (!path) |
6884 | return -ENOMEM; | 6921 | return -ENOMEM; |
6922 | path->reada = 1; | ||
6885 | 6923 | ||
6886 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | 6924 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); |
6887 | if (cache_gen != 0 && | 6925 | if (cache_gen != 0 && |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c5d9fbb92bc3..7055d11c1efd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1476,7 +1476,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1476 | if (total_bytes >= max_bytes) | 1476 | if (total_bytes >= max_bytes) |
1477 | break; | 1477 | break; |
1478 | if (!found) { | 1478 | if (!found) { |
1479 | *start = state->start; | 1479 | *start = max(cur_start, state->start); |
1480 | found = 1; | 1480 | found = 1; |
1481 | } | 1481 | } |
1482 | last = state->end; | 1482 | last = state->end; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4e8445a4757c..a11a92ee2d30 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -126,9 +126,9 @@ struct extent_buffer { | |||
126 | unsigned long map_len; | 126 | unsigned long map_len; |
127 | struct page *first_page; | 127 | struct page *first_page; |
128 | unsigned long bflags; | 128 | unsigned long bflags; |
129 | atomic_t refs; | ||
130 | struct list_head leak_list; | 129 | struct list_head leak_list; |
131 | struct rcu_head rcu_head; | 130 | struct rcu_head rcu_head; |
131 | atomic_t refs; | ||
132 | 132 | ||
133 | /* the spinlock is used to protect most operations */ | 133 | /* the spinlock is used to protect most operations */ |
134 | spinlock_t lock; | 134 | spinlock_t lock; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c6a22d783c35..fa4ef18b66b1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | 129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) |
130 | return 0; | 130 | return 0; |
131 | 131 | ||
132 | if (root->fs_info->closing) | 132 | if (btrfs_fs_closing(root->fs_info)) |
133 | return 0; | 133 | return 0; |
134 | 134 | ||
135 | if (BTRFS_I(inode)->in_defrag) | 135 | if (BTRFS_I(inode)->in_defrag) |
@@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
144 | if (!defrag) | 144 | if (!defrag) |
145 | return -ENOMEM; | 145 | return -ENOMEM; |
146 | 146 | ||
147 | defrag->ino = inode->i_ino; | 147 | defrag->ino = btrfs_ino(inode); |
148 | defrag->transid = transid; | 148 | defrag->transid = transid; |
149 | defrag->root = root->root_key.objectid; | 149 | defrag->root = root->root_key.objectid; |
150 | 150 | ||
@@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
229 | first_ino = defrag->ino + 1; | 229 | first_ino = defrag->ino + 1; |
230 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | 230 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); |
231 | 231 | ||
232 | if (fs_info->closing) | 232 | if (btrfs_fs_closing(fs_info)) |
233 | goto next_free; | 233 | goto next_free; |
234 | 234 | ||
235 | spin_unlock(&fs_info->defrag_inodes_lock); | 235 | spin_unlock(&fs_info->defrag_inodes_lock); |
@@ -1480,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1480 | * the current transaction, we can bail out now without any | 1480 | * the current transaction, we can bail out now without any |
1481 | * syncing | 1481 | * syncing |
1482 | */ | 1482 | */ |
1483 | mutex_lock(&root->fs_info->trans_mutex); | 1483 | smp_mb(); |
1484 | if (BTRFS_I(inode)->last_trans <= | 1484 | if (BTRFS_I(inode)->last_trans <= |
1485 | root->fs_info->last_trans_committed) { | 1485 | root->fs_info->last_trans_committed) { |
1486 | BTRFS_I(inode)->last_trans = 0; | 1486 | BTRFS_I(inode)->last_trans = 0; |
1487 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1488 | goto out; | 1487 | goto out; |
1489 | } | 1488 | } |
1490 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1491 | 1489 | ||
1492 | /* | 1490 | /* |
1493 | * ok we haven't committed the transaction yet, lets do a commit | 1491 | * ok we haven't committed the transaction yet, lets do a commit |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 70d45795d758..9f985a429877 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -98,7 +98,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
98 | return inode; | 98 | return inode; |
99 | 99 | ||
100 | spin_lock(&block_group->lock); | 100 | spin_lock(&block_group->lock); |
101 | if (!root->fs_info->closing) { | 101 | if (!btrfs_fs_closing(root->fs_info)) { |
102 | block_group->inode = igrab(inode); | 102 | block_group->inode = igrab(inode); |
103 | block_group->iref = 1; | 103 | block_group->iref = 1; |
104 | } | 104 | } |
@@ -250,7 +250,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
250 | pgoff_t index = 0; | 250 | pgoff_t index = 0; |
251 | unsigned long first_page_offset; | 251 | unsigned long first_page_offset; |
252 | int num_checksums; | 252 | int num_checksums; |
253 | int ret = 0, ret2; | 253 | int ret = 0; |
254 | 254 | ||
255 | INIT_LIST_HEAD(&bitmaps); | 255 | INIT_LIST_HEAD(&bitmaps); |
256 | 256 | ||
@@ -402,7 +402,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
402 | spin_lock(&ctl->tree_lock); | 402 | spin_lock(&ctl->tree_lock); |
403 | ret = link_free_space(ctl, e); | 403 | ret = link_free_space(ctl, e); |
404 | spin_unlock(&ctl->tree_lock); | 404 | spin_unlock(&ctl->tree_lock); |
405 | BUG_ON(ret); | 405 | if (ret) { |
406 | printk(KERN_ERR "Duplicate entries in " | ||
407 | "free space cache, dumping\n"); | ||
408 | kunmap(page); | ||
409 | unlock_page(page); | ||
410 | page_cache_release(page); | ||
411 | goto free_cache; | ||
412 | } | ||
406 | } else { | 413 | } else { |
407 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 414 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
408 | if (!e->bitmap) { | 415 | if (!e->bitmap) { |
@@ -414,10 +421,18 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
414 | goto free_cache; | 421 | goto free_cache; |
415 | } | 422 | } |
416 | spin_lock(&ctl->tree_lock); | 423 | spin_lock(&ctl->tree_lock); |
417 | ret2 = link_free_space(ctl, e); | 424 | ret = link_free_space(ctl, e); |
418 | ctl->total_bitmaps++; | 425 | ctl->total_bitmaps++; |
419 | ctl->op->recalc_thresholds(ctl); | 426 | ctl->op->recalc_thresholds(ctl); |
420 | spin_unlock(&ctl->tree_lock); | 427 | spin_unlock(&ctl->tree_lock); |
428 | if (ret) { | ||
429 | printk(KERN_ERR "Duplicate entries in " | ||
430 | "free space cache, dumping\n"); | ||
431 | kunmap(page); | ||
432 | unlock_page(page); | ||
433 | page_cache_release(page); | ||
434 | goto free_cache; | ||
435 | } | ||
421 | list_add_tail(&e->list, &bitmaps); | 436 | list_add_tail(&e->list, &bitmaps); |
422 | } | 437 | } |
423 | 438 | ||
@@ -478,8 +493,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
478 | * If we're unmounting then just return, since this does a search on the | 493 | * If we're unmounting then just return, since this does a search on the |
479 | * normal root and not the commit root and we could deadlock. | 494 | * normal root and not the commit root and we could deadlock. |
480 | */ | 495 | */ |
481 | smp_mb(); | 496 | if (btrfs_fs_closing(fs_info)) |
482 | if (fs_info->closing) | ||
483 | return 0; | 497 | return 0; |
484 | 498 | ||
485 | /* | 499 | /* |
@@ -575,10 +589,25 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
575 | 589 | ||
576 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 590 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
577 | PAGE_CACHE_SHIFT; | 591 | PAGE_CACHE_SHIFT; |
592 | |||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
578 | filemap_write_and_wait(inode->i_mapping); | 599 | filemap_write_and_wait(inode->i_mapping); |
579 | btrfs_wait_ordered_range(inode, inode->i_size & | 600 | btrfs_wait_ordered_range(inode, inode->i_size & |
580 | ~(root->sectorsize - 1), (u64)-1); | 601 | ~(root->sectorsize - 1), (u64)-1); |
581 | 602 | ||
603 | /* make sure we don't overflow that first page */ | ||
604 | if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { | ||
605 | /* this is really the same as running out of space, where we also return 0 */ | ||
606 | printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); | ||
607 | ret = 0; | ||
608 | goto out_update; | ||
609 | } | ||
610 | |||
582 | /* We need a checksum per page. */ | 611 | /* We need a checksum per page. */ |
583 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); | 612 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); |
584 | if (!crc) | 613 | if (!crc) |
@@ -590,12 +619,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
590 | return -1; | 619 | return -1; |
591 | } | 620 | } |
592 | 621 | ||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
599 | /* Get the cluster for this block_group if it exists */ | 622 | /* Get the cluster for this block_group if it exists */ |
600 | if (block_group && !list_empty(&block_group->cluster_list)) | 623 | if (block_group && !list_empty(&block_group->cluster_list)) |
601 | cluster = list_entry(block_group->cluster_list.next, | 624 | cluster = list_entry(block_group->cluster_list.next, |
@@ -857,12 +880,14 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
857 | ret = 1; | 880 | ret = 1; |
858 | 881 | ||
859 | out_free: | 882 | out_free: |
883 | kfree(checksums); | ||
884 | kfree(pages); | ||
885 | |||
886 | out_update: | ||
860 | if (ret != 1) { | 887 | if (ret != 1) { |
861 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | 888 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); |
862 | BTRFS_I(inode)->generation = 0; | 889 | BTRFS_I(inode)->generation = 0; |
863 | } | 890 | } |
864 | kfree(checksums); | ||
865 | kfree(pages); | ||
866 | btrfs_update_inode(trans, root, inode); | 891 | btrfs_update_inode(trans, root, inode); |
867 | return ret; | 892 | return ret; |
868 | } | 893 | } |
@@ -963,10 +988,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset, | |||
963 | * logically. | 988 | * logically. |
964 | */ | 989 | */ |
965 | if (bitmap) { | 990 | if (bitmap) { |
966 | WARN_ON(info->bitmap); | 991 | if (info->bitmap) { |
992 | WARN_ON_ONCE(1); | ||
993 | return -EEXIST; | ||
994 | } | ||
967 | p = &(*p)->rb_right; | 995 | p = &(*p)->rb_right; |
968 | } else { | 996 | } else { |
969 | WARN_ON(!info->bitmap); | 997 | if (!info->bitmap) { |
998 | WARN_ON_ONCE(1); | ||
999 | return -EEXIST; | ||
1000 | } | ||
970 | p = &(*p)->rb_left; | 1001 | p = &(*p)->rb_left; |
971 | } | 1002 | } |
972 | } | 1003 | } |
@@ -1386,6 +1417,23 @@ again: | |||
1386 | return 0; | 1417 | return 0; |
1387 | } | 1418 | } |
1388 | 1419 | ||
1420 | static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, | ||
1421 | struct btrfs_free_space *info, u64 offset, | ||
1422 | u64 bytes) | ||
1423 | { | ||
1424 | u64 bytes_to_set = 0; | ||
1425 | u64 end; | ||
1426 | |||
1427 | end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); | ||
1428 | |||
1429 | bytes_to_set = min(end - offset, bytes); | ||
1430 | |||
1431 | bitmap_set_bits(ctl, info, offset, bytes_to_set); | ||
1432 | |||
1433 | return bytes_to_set; | ||
1434 | |||
1435 | } | ||
1436 | |||
1389 | static bool use_bitmap(struct btrfs_free_space_ctl *ctl, | 1437 | static bool use_bitmap(struct btrfs_free_space_ctl *ctl, |
1390 | struct btrfs_free_space *info) | 1438 | struct btrfs_free_space *info) |
1391 | { | 1439 | { |
@@ -1422,12 +1470,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1422 | return true; | 1470 | return true; |
1423 | } | 1471 | } |
1424 | 1472 | ||
1473 | static struct btrfs_free_space_op free_space_op = { | ||
1474 | .recalc_thresholds = recalculate_thresholds, | ||
1475 | .use_bitmap = use_bitmap, | ||
1476 | }; | ||
1477 | |||
1425 | static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, | 1478 | static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, |
1426 | struct btrfs_free_space *info) | 1479 | struct btrfs_free_space *info) |
1427 | { | 1480 | { |
1428 | struct btrfs_free_space *bitmap_info; | 1481 | struct btrfs_free_space *bitmap_info; |
1482 | struct btrfs_block_group_cache *block_group = NULL; | ||
1429 | int added = 0; | 1483 | int added = 0; |
1430 | u64 bytes, offset, end; | 1484 | u64 bytes, offset, bytes_added; |
1431 | int ret; | 1485 | int ret; |
1432 | 1486 | ||
1433 | bytes = info->bytes; | 1487 | bytes = info->bytes; |
@@ -1436,7 +1490,49 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1436 | if (!ctl->op->use_bitmap(ctl, info)) | 1490 | if (!ctl->op->use_bitmap(ctl, info)) |
1437 | return 0; | 1491 | return 0; |
1438 | 1492 | ||
1493 | if (ctl->op == &free_space_op) | ||
1494 | block_group = ctl->private; | ||
1439 | again: | 1495 | again: |
1496 | /* | ||
1497 | * Since we link bitmaps right into the cluster we need to see if we | ||
1498 | * have a cluster here, and if so and it has our bitmap we need to add | ||
1499 | * the free space to that bitmap. | ||
1500 | */ | ||
1501 | if (block_group && !list_empty(&block_group->cluster_list)) { | ||
1502 | struct btrfs_free_cluster *cluster; | ||
1503 | struct rb_node *node; | ||
1504 | struct btrfs_free_space *entry; | ||
1505 | |||
1506 | cluster = list_entry(block_group->cluster_list.next, | ||
1507 | struct btrfs_free_cluster, | ||
1508 | block_group_list); | ||
1509 | spin_lock(&cluster->lock); | ||
1510 | node = rb_first(&cluster->root); | ||
1511 | if (!node) { | ||
1512 | spin_unlock(&cluster->lock); | ||
1513 | goto no_cluster_bitmap; | ||
1514 | } | ||
1515 | |||
1516 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
1517 | if (!entry->bitmap) { | ||
1518 | spin_unlock(&cluster->lock); | ||
1519 | goto no_cluster_bitmap; | ||
1520 | } | ||
1521 | |||
1522 | if (entry->offset == offset_to_bitmap(ctl, offset)) { | ||
1523 | bytes_added = add_bytes_to_bitmap(ctl, entry, | ||
1524 | offset, bytes); | ||
1525 | bytes -= bytes_added; | ||
1526 | offset += bytes_added; | ||
1527 | } | ||
1528 | spin_unlock(&cluster->lock); | ||
1529 | if (!bytes) { | ||
1530 | ret = 1; | ||
1531 | goto out; | ||
1532 | } | ||
1533 | } | ||
1534 | |||
1535 | no_cluster_bitmap: | ||
1440 | bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), | 1536 | bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), |
1441 | 1, 0); | 1537 | 1, 0); |
1442 | if (!bitmap_info) { | 1538 | if (!bitmap_info) { |
@@ -1444,19 +1540,10 @@ again: | |||
1444 | goto new_bitmap; | 1540 | goto new_bitmap; |
1445 | } | 1541 | } |
1446 | 1542 | ||
1447 | end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); | 1543 | bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); |
1448 | 1544 | bytes -= bytes_added; | |
1449 | if (offset >= bitmap_info->offset && offset + bytes > end) { | 1545 | offset += bytes_added; |
1450 | bitmap_set_bits(ctl, bitmap_info, offset, end - offset); | 1546 | added = 0; |
1451 | bytes -= end - offset; | ||
1452 | offset = end; | ||
1453 | added = 0; | ||
1454 | } else if (offset >= bitmap_info->offset && offset + bytes <= end) { | ||
1455 | bitmap_set_bits(ctl, bitmap_info, offset, bytes); | ||
1456 | bytes = 0; | ||
1457 | } else { | ||
1458 | BUG(); | ||
1459 | } | ||
1460 | 1547 | ||
1461 | if (!bytes) { | 1548 | if (!bytes) { |
1462 | ret = 1; | 1549 | ret = 1; |
@@ -1735,11 +1822,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
1735 | "\n", count); | 1822 | "\n", count); |
1736 | } | 1823 | } |
1737 | 1824 | ||
1738 | static struct btrfs_free_space_op free_space_op = { | ||
1739 | .recalc_thresholds = recalculate_thresholds, | ||
1740 | .use_bitmap = use_bitmap, | ||
1741 | }; | ||
1742 | |||
1743 | void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) | 1825 | void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) |
1744 | { | 1826 | { |
1745 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 1827 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
@@ -2111,9 +2193,11 @@ again: | |||
2111 | /* | 2193 | /* |
2112 | * This searches the block group for just extents to fill the cluster with. | 2194 | * This searches the block group for just extents to fill the cluster with. |
2113 | */ | 2195 | */ |
2114 | static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, | 2196 | static noinline int |
2115 | struct btrfs_free_cluster *cluster, | 2197 | setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, |
2116 | u64 offset, u64 bytes, u64 min_bytes) | 2198 | struct btrfs_free_cluster *cluster, |
2199 | struct list_head *bitmaps, u64 offset, u64 bytes, | ||
2200 | u64 min_bytes) | ||
2117 | { | 2201 | { |
2118 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2202 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
2119 | struct btrfs_free_space *first = NULL; | 2203 | struct btrfs_free_space *first = NULL; |
@@ -2135,6 +2219,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2135 | * extent entry. | 2219 | * extent entry. |
2136 | */ | 2220 | */ |
2137 | while (entry->bitmap) { | 2221 | while (entry->bitmap) { |
2222 | if (list_empty(&entry->list)) | ||
2223 | list_add_tail(&entry->list, bitmaps); | ||
2138 | node = rb_next(&entry->offset_index); | 2224 | node = rb_next(&entry->offset_index); |
2139 | if (!node) | 2225 | if (!node) |
2140 | return -ENOSPC; | 2226 | return -ENOSPC; |
@@ -2154,8 +2240,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2154 | return -ENOSPC; | 2240 | return -ENOSPC; |
2155 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2241 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2156 | 2242 | ||
2157 | if (entry->bitmap) | 2243 | if (entry->bitmap) { |
2244 | if (list_empty(&entry->list)) | ||
2245 | list_add_tail(&entry->list, bitmaps); | ||
2158 | continue; | 2246 | continue; |
2247 | } | ||
2248 | |||
2159 | /* | 2249 | /* |
2160 | * we haven't filled the empty size and the window is | 2250 | * we haven't filled the empty size and the window is |
2161 | * very large. reset and try again | 2251 | * very large. reset and try again |
@@ -2207,9 +2297,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2207 | * This specifically looks for bitmaps that may work in the cluster, we assume | 2297 | * This specifically looks for bitmaps that may work in the cluster, we assume |
2208 | * that we have already failed to find extents that will work. | 2298 | * that we have already failed to find extents that will work. |
2209 | */ | 2299 | */ |
2210 | static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | 2300 | static noinline int |
2211 | struct btrfs_free_cluster *cluster, | 2301 | setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, |
2212 | u64 offset, u64 bytes, u64 min_bytes) | 2302 | struct btrfs_free_cluster *cluster, |
2303 | struct list_head *bitmaps, u64 offset, u64 bytes, | ||
2304 | u64 min_bytes) | ||
2213 | { | 2305 | { |
2214 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2306 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
2215 | struct btrfs_free_space *entry; | 2307 | struct btrfs_free_space *entry; |
@@ -2219,10 +2311,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | |||
2219 | if (ctl->total_bitmaps == 0) | 2311 | if (ctl->total_bitmaps == 0) |
2220 | return -ENOSPC; | 2312 | return -ENOSPC; |
2221 | 2313 | ||
2314 | /* | ||
2315 | * First check our cached list of bitmaps and see if there is an entry | ||
2316 | * here that will work. | ||
2317 | */ | ||
2318 | list_for_each_entry(entry, bitmaps, list) { | ||
2319 | if (entry->bytes < min_bytes) | ||
2320 | continue; | ||
2321 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, | ||
2322 | bytes, min_bytes); | ||
2323 | if (!ret) | ||
2324 | return 0; | ||
2325 | } | ||
2326 | |||
2327 | /* | ||
2328 | * If we do have entries on our list and we are here then we didn't find | ||
2329 | * anything, so go ahead and get the next entry after the last entry in | ||
2330 | * this list and start the search from there. | ||
2331 | */ | ||
2332 | if (!list_empty(bitmaps)) { | ||
2333 | entry = list_entry(bitmaps->prev, struct btrfs_free_space, | ||
2334 | list); | ||
2335 | node = rb_next(&entry->offset_index); | ||
2336 | if (!node) | ||
2337 | return -ENOSPC; | ||
2338 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2339 | goto search; | ||
2340 | } | ||
2341 | |||
2222 | entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); | 2342 | entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); |
2223 | if (!entry) | 2343 | if (!entry) |
2224 | return -ENOSPC; | 2344 | return -ENOSPC; |
2225 | 2345 | ||
2346 | search: | ||
2226 | node = &entry->offset_index; | 2347 | node = &entry->offset_index; |
2227 | do { | 2348 | do { |
2228 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2349 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
@@ -2253,6 +2374,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
2253 | u64 offset, u64 bytes, u64 empty_size) | 2374 | u64 offset, u64 bytes, u64 empty_size) |
2254 | { | 2375 | { |
2255 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2376 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
2377 | struct list_head bitmaps; | ||
2378 | struct btrfs_free_space *entry, *tmp; | ||
2256 | u64 min_bytes; | 2379 | u64 min_bytes; |
2257 | int ret; | 2380 | int ret; |
2258 | 2381 | ||
@@ -2291,11 +2414,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
2291 | goto out; | 2414 | goto out; |
2292 | } | 2415 | } |
2293 | 2416 | ||
2294 | ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, | 2417 | INIT_LIST_HEAD(&bitmaps); |
2295 | min_bytes); | 2418 | ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, |
2419 | bytes, min_bytes); | ||
2296 | if (ret) | 2420 | if (ret) |
2297 | ret = setup_cluster_bitmap(block_group, cluster, offset, | 2421 | ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, |
2298 | bytes, min_bytes); | 2422 | offset, bytes, min_bytes); |
2423 | |||
2424 | /* Clear our temporary list */ | ||
2425 | list_for_each_entry_safe(entry, tmp, &bitmaps, list) | ||
2426 | list_del_init(&entry->list); | ||
2299 | 2427 | ||
2300 | if (!ret) { | 2428 | if (!ret) { |
2301 | atomic_inc(&block_group->count); | 2429 | atomic_inc(&block_group->count); |
@@ -2481,7 +2609,7 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, | |||
2481 | return inode; | 2609 | return inode; |
2482 | 2610 | ||
2483 | spin_lock(&root->cache_lock); | 2611 | spin_lock(&root->cache_lock); |
2484 | if (!root->fs_info->closing) | 2612 | if (!btrfs_fs_closing(root->fs_info)) |
2485 | root->cache_inode = igrab(inode); | 2613 | root->cache_inode = igrab(inode); |
2486 | spin_unlock(&root->cache_lock); | 2614 | spin_unlock(&root->cache_lock); |
2487 | 2615 | ||
@@ -2504,12 +2632,14 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | |||
2504 | int ret = 0; | 2632 | int ret = 0; |
2505 | u64 root_gen = btrfs_root_generation(&root->root_item); | 2633 | u64 root_gen = btrfs_root_generation(&root->root_item); |
2506 | 2634 | ||
2635 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
2636 | return 0; | ||
2637 | |||
2507 | /* | 2638 | /* |
2508 | * If we're unmounting then just return, since this does a search on the | 2639 | * If we're unmounting then just return, since this does a search on the |
2509 | * normal root and not the commit root and we could deadlock. | 2640 | * normal root and not the commit root and we could deadlock. |
2510 | */ | 2641 | */ |
2511 | smp_mb(); | 2642 | if (btrfs_fs_closing(fs_info)) |
2512 | if (fs_info->closing) | ||
2513 | return 0; | 2643 | return 0; |
2514 | 2644 | ||
2515 | path = btrfs_alloc_path(); | 2645 | path = btrfs_alloc_path(); |
@@ -2543,6 +2673,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, | |||
2543 | struct inode *inode; | 2673 | struct inode *inode; |
2544 | int ret; | 2674 | int ret; |
2545 | 2675 | ||
2676 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
2677 | return 0; | ||
2678 | |||
2546 | inode = lookup_free_ino_inode(root, path); | 2679 | inode = lookup_free_ino_inode(root, path); |
2547 | if (IS_ERR(inode)) | 2680 | if (IS_ERR(inode)) |
2548 | return 0; | 2681 | return 0; |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 3262cd17a12f..b4087e0fa871 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -38,6 +38,9 @@ static int caching_kthread(void *data) | |||
38 | int slot; | 38 | int slot; |
39 | int ret; | 39 | int ret; |
40 | 40 | ||
41 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
42 | return 0; | ||
43 | |||
41 | path = btrfs_alloc_path(); | 44 | path = btrfs_alloc_path(); |
42 | if (!path) | 45 | if (!path) |
43 | return -ENOMEM; | 46 | return -ENOMEM; |
@@ -59,8 +62,7 @@ again: | |||
59 | goto out; | 62 | goto out; |
60 | 63 | ||
61 | while (1) { | 64 | while (1) { |
62 | smp_mb(); | 65 | if (btrfs_fs_closing(fs_info)) |
63 | if (fs_info->closing) | ||
64 | goto out; | 66 | goto out; |
65 | 67 | ||
66 | leaf = path->nodes[0]; | 68 | leaf = path->nodes[0]; |
@@ -141,6 +143,9 @@ static void start_caching(struct btrfs_root *root) | |||
141 | int ret; | 143 | int ret; |
142 | u64 objectid; | 144 | u64 objectid; |
143 | 145 | ||
146 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
147 | return; | ||
148 | |||
144 | spin_lock(&root->cache_lock); | 149 | spin_lock(&root->cache_lock); |
145 | if (root->cached != BTRFS_CACHE_NO) { | 150 | if (root->cached != BTRFS_CACHE_NO) { |
146 | spin_unlock(&root->cache_lock); | 151 | spin_unlock(&root->cache_lock); |
@@ -178,6 +183,9 @@ static void start_caching(struct btrfs_root *root) | |||
178 | 183 | ||
179 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) | 184 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) |
180 | { | 185 | { |
186 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
187 | return btrfs_find_free_objectid(root, objectid); | ||
188 | |||
181 | again: | 189 | again: |
182 | *objectid = btrfs_find_ino_for_alloc(root); | 190 | *objectid = btrfs_find_ino_for_alloc(root); |
183 | 191 | ||
@@ -201,6 +209,10 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid) | |||
201 | { | 209 | { |
202 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | 210 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
203 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; | 211 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; |
212 | |||
213 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
214 | return; | ||
215 | |||
204 | again: | 216 | again: |
205 | if (root->cached == BTRFS_CACHE_FINISHED) { | 217 | if (root->cached == BTRFS_CACHE_FINISHED) { |
206 | __btrfs_add_free_space(ctl, objectid, 1); | 218 | __btrfs_add_free_space(ctl, objectid, 1); |
@@ -250,6 +262,9 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) | |||
250 | struct rb_node *n; | 262 | struct rb_node *n; |
251 | u64 count; | 263 | u64 count; |
252 | 264 | ||
265 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
266 | return; | ||
267 | |||
253 | while (1) { | 268 | while (1) { |
254 | n = rb_first(rbroot); | 269 | n = rb_first(rbroot); |
255 | if (!n) | 270 | if (!n) |
@@ -388,9 +403,24 @@ int btrfs_save_ino_cache(struct btrfs_root *root, | |||
388 | int prealloc; | 403 | int prealloc; |
389 | bool retry = false; | 404 | bool retry = false; |
390 | 405 | ||
406 | /* only fs tree and subvol/snap needs ino cache */ | ||
407 | if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID && | ||
408 | (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID || | ||
409 | root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID)) | ||
410 | return 0; | ||
411 | |||
412 | /* Don't save inode cache if we are deleting this root */ | ||
413 | if (btrfs_root_refs(&root->root_item) == 0 && | ||
414 | root != root->fs_info->tree_root) | ||
415 | return 0; | ||
416 | |||
417 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
418 | return 0; | ||
419 | |||
391 | path = btrfs_alloc_path(); | 420 | path = btrfs_alloc_path(); |
392 | if (!path) | 421 | if (!path) |
393 | return -ENOMEM; | 422 | return -ENOMEM; |
423 | |||
394 | again: | 424 | again: |
395 | inode = lookup_free_ino_inode(root, path); | 425 | inode = lookup_free_ino_inode(root, path); |
396 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | 426 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 39a9d5750efd..0a9b10c5b0a7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -138,7 +138,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
138 | return -ENOMEM; | 138 | return -ENOMEM; |
139 | 139 | ||
140 | path->leave_spinning = 1; | 140 | path->leave_spinning = 1; |
141 | btrfs_set_trans_block_group(trans, inode); | ||
142 | 141 | ||
143 | key.objectid = btrfs_ino(inode); | 142 | key.objectid = btrfs_ino(inode); |
144 | key.offset = start; | 143 | key.offset = start; |
@@ -426,9 +425,8 @@ again: | |||
426 | } | 425 | } |
427 | } | 426 | } |
428 | if (start == 0) { | 427 | if (start == 0) { |
429 | trans = btrfs_join_transaction(root, 1); | 428 | trans = btrfs_join_transaction(root); |
430 | BUG_ON(IS_ERR(trans)); | 429 | BUG_ON(IS_ERR(trans)); |
431 | btrfs_set_trans_block_group(trans, inode); | ||
432 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 430 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
433 | 431 | ||
434 | /* lets try to make an inline extent */ | 432 | /* lets try to make an inline extent */ |
@@ -623,8 +621,9 @@ retry: | |||
623 | async_extent->start + async_extent->ram_size - 1, | 621 | async_extent->start + async_extent->ram_size - 1, |
624 | GFP_NOFS); | 622 | GFP_NOFS); |
625 | 623 | ||
626 | trans = btrfs_join_transaction(root, 1); | 624 | trans = btrfs_join_transaction(root); |
627 | BUG_ON(IS_ERR(trans)); | 625 | BUG_ON(IS_ERR(trans)); |
626 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
628 | ret = btrfs_reserve_extent(trans, root, | 627 | ret = btrfs_reserve_extent(trans, root, |
629 | async_extent->compressed_size, | 628 | async_extent->compressed_size, |
630 | async_extent->compressed_size, | 629 | async_extent->compressed_size, |
@@ -793,9 +792,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
793 | int ret = 0; | 792 | int ret = 0; |
794 | 793 | ||
795 | BUG_ON(is_free_space_inode(root, inode)); | 794 | BUG_ON(is_free_space_inode(root, inode)); |
796 | trans = btrfs_join_transaction(root, 1); | 795 | trans = btrfs_join_transaction(root); |
797 | BUG_ON(IS_ERR(trans)); | 796 | BUG_ON(IS_ERR(trans)); |
798 | btrfs_set_trans_block_group(trans, inode); | ||
799 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 797 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
800 | 798 | ||
801 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 799 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
@@ -1077,10 +1075,12 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1077 | nolock = is_free_space_inode(root, inode); | 1075 | nolock = is_free_space_inode(root, inode); |
1078 | 1076 | ||
1079 | if (nolock) | 1077 | if (nolock) |
1080 | trans = btrfs_join_transaction_nolock(root, 1); | 1078 | trans = btrfs_join_transaction_nolock(root); |
1081 | else | 1079 | else |
1082 | trans = btrfs_join_transaction(root, 1); | 1080 | trans = btrfs_join_transaction(root); |
1081 | |||
1083 | BUG_ON(IS_ERR(trans)); | 1082 | BUG_ON(IS_ERR(trans)); |
1083 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1084 | 1084 | ||
1085 | cow_start = (u64)-1; | 1085 | cow_start = (u64)-1; |
1086 | cur_offset = start; | 1086 | cur_offset = start; |
@@ -1519,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | |||
1519 | { | 1519 | { |
1520 | struct btrfs_ordered_sum *sum; | 1520 | struct btrfs_ordered_sum *sum; |
1521 | 1521 | ||
1522 | btrfs_set_trans_block_group(trans, inode); | ||
1523 | |||
1524 | list_for_each_entry(sum, list, list) { | 1522 | list_for_each_entry(sum, list, list) { |
1525 | btrfs_csum_file_blocks(trans, | 1523 | btrfs_csum_file_blocks(trans, |
1526 | BTRFS_I(inode)->root->fs_info->csum_root, sum); | 1524 | BTRFS_I(inode)->root->fs_info->csum_root, sum); |
@@ -1735,11 +1733,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1735 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1733 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1736 | if (!ret) { | 1734 | if (!ret) { |
1737 | if (nolock) | 1735 | if (nolock) |
1738 | trans = btrfs_join_transaction_nolock(root, 1); | 1736 | trans = btrfs_join_transaction_nolock(root); |
1739 | else | 1737 | else |
1740 | trans = btrfs_join_transaction(root, 1); | 1738 | trans = btrfs_join_transaction(root); |
1741 | BUG_ON(IS_ERR(trans)); | 1739 | BUG_ON(IS_ERR(trans)); |
1742 | btrfs_set_trans_block_group(trans, inode); | ||
1743 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1740 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1744 | ret = btrfs_update_inode(trans, root, inode); | 1741 | ret = btrfs_update_inode(trans, root, inode); |
1745 | BUG_ON(ret); | 1742 | BUG_ON(ret); |
@@ -1752,11 +1749,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1752 | 0, &cached_state, GFP_NOFS); | 1749 | 0, &cached_state, GFP_NOFS); |
1753 | 1750 | ||
1754 | if (nolock) | 1751 | if (nolock) |
1755 | trans = btrfs_join_transaction_nolock(root, 1); | 1752 | trans = btrfs_join_transaction_nolock(root); |
1756 | else | 1753 | else |
1757 | trans = btrfs_join_transaction(root, 1); | 1754 | trans = btrfs_join_transaction(root); |
1758 | BUG_ON(IS_ERR(trans)); | 1755 | BUG_ON(IS_ERR(trans)); |
1759 | btrfs_set_trans_block_group(trans, inode); | ||
1760 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1756 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1761 | 1757 | ||
1762 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1758 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
@@ -1990,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
1990 | } | 1986 | } |
1991 | 1987 | ||
1992 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) | 1988 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) |
1993 | return 0; | 1989 | goto good; |
1994 | 1990 | ||
1995 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 1991 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
1996 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { | 1992 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { |
@@ -2431,7 +2427,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2431 | (u64)-1); | 2427 | (u64)-1); |
2432 | 2428 | ||
2433 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | 2429 | if (root->orphan_block_rsv || root->orphan_item_inserted) { |
2434 | trans = btrfs_join_transaction(root, 1); | 2430 | trans = btrfs_join_transaction(root); |
2435 | if (!IS_ERR(trans)) | 2431 | if (!IS_ERR(trans)) |
2436 | btrfs_end_transaction(trans, root); | 2432 | btrfs_end_transaction(trans, root); |
2437 | } | 2433 | } |
@@ -2511,12 +2507,12 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2511 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2507 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2512 | struct btrfs_key location; | 2508 | struct btrfs_key location; |
2513 | int maybe_acls; | 2509 | int maybe_acls; |
2514 | u64 alloc_group_block; | ||
2515 | u32 rdev; | 2510 | u32 rdev; |
2516 | int ret; | 2511 | int ret; |
2517 | 2512 | ||
2518 | path = btrfs_alloc_path(); | 2513 | path = btrfs_alloc_path(); |
2519 | BUG_ON(!path); | 2514 | BUG_ON(!path); |
2515 | path->leave_spinning = 1; | ||
2520 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | 2516 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); |
2521 | 2517 | ||
2522 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); | 2518 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); |
@@ -2526,6 +2522,12 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2526 | leaf = path->nodes[0]; | 2522 | leaf = path->nodes[0]; |
2527 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 2523 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
2528 | struct btrfs_inode_item); | 2524 | struct btrfs_inode_item); |
2525 | if (!leaf->map_token) | ||
2526 | map_private_extent_buffer(leaf, (unsigned long)inode_item, | ||
2527 | sizeof(struct btrfs_inode_item), | ||
2528 | &leaf->map_token, &leaf->kaddr, | ||
2529 | &leaf->map_start, &leaf->map_len, | ||
2530 | KM_USER1); | ||
2529 | 2531 | ||
2530 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | 2532 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); |
2531 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); | 2533 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); |
@@ -2555,8 +2557,6 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2555 | BTRFS_I(inode)->index_cnt = (u64)-1; | 2557 | BTRFS_I(inode)->index_cnt = (u64)-1; |
2556 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); | 2558 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); |
2557 | 2559 | ||
2558 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); | ||
2559 | |||
2560 | /* | 2560 | /* |
2561 | * try to precache a NULL acl entry for files that don't have | 2561 | * try to precache a NULL acl entry for files that don't have |
2562 | * any xattrs or acls | 2562 | * any xattrs or acls |
@@ -2566,8 +2566,11 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2566 | if (!maybe_acls) | 2566 | if (!maybe_acls) |
2567 | cache_no_acl(inode); | 2567 | cache_no_acl(inode); |
2568 | 2568 | ||
2569 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, | 2569 | if (leaf->map_token) { |
2570 | alloc_group_block, 0); | 2570 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); |
2571 | leaf->map_token = NULL; | ||
2572 | } | ||
2573 | |||
2571 | btrfs_free_path(path); | 2574 | btrfs_free_path(path); |
2572 | inode_item = NULL; | 2575 | inode_item = NULL; |
2573 | 2576 | ||
@@ -2647,7 +2650,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2647 | btrfs_set_inode_transid(leaf, item, trans->transid); | 2650 | btrfs_set_inode_transid(leaf, item, trans->transid); |
2648 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2651 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2649 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2652 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
2650 | btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); | 2653 | btrfs_set_inode_block_group(leaf, item, 0); |
2651 | 2654 | ||
2652 | if (leaf->map_token) { | 2655 | if (leaf->map_token) { |
2653 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | 2656 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); |
@@ -3004,8 +3007,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3004 | if (IS_ERR(trans)) | 3007 | if (IS_ERR(trans)) |
3005 | return PTR_ERR(trans); | 3008 | return PTR_ERR(trans); |
3006 | 3009 | ||
3007 | btrfs_set_trans_block_group(trans, dir); | ||
3008 | |||
3009 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); | 3010 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); |
3010 | 3011 | ||
3011 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 3012 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
@@ -3075,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
3075 | ret = btrfs_update_inode(trans, root, dir); | 3076 | ret = btrfs_update_inode(trans, root, dir); |
3076 | BUG_ON(ret); | 3077 | BUG_ON(ret); |
3077 | 3078 | ||
3079 | btrfs_free_path(path); | ||
3078 | return 0; | 3080 | return 0; |
3079 | } | 3081 | } |
3080 | 3082 | ||
@@ -3094,8 +3096,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
3094 | if (IS_ERR(trans)) | 3096 | if (IS_ERR(trans)) |
3095 | return PTR_ERR(trans); | 3097 | return PTR_ERR(trans); |
3096 | 3098 | ||
3097 | btrfs_set_trans_block_group(trans, dir); | ||
3098 | |||
3099 | if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | 3099 | if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
3100 | err = btrfs_unlink_subvol(trans, root, dir, | 3100 | err = btrfs_unlink_subvol(trans, root, dir, |
3101 | BTRFS_I(inode)->location.objectid, | 3101 | BTRFS_I(inode)->location.objectid, |
@@ -3514,7 +3514,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3514 | err = PTR_ERR(trans); | 3514 | err = PTR_ERR(trans); |
3515 | break; | 3515 | break; |
3516 | } | 3516 | } |
3517 | btrfs_set_trans_block_group(trans, inode); | ||
3518 | 3517 | ||
3519 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3518 | err = btrfs_drop_extents(trans, inode, cur_offset, |
3520 | cur_offset + hole_size, | 3519 | cur_offset + hole_size, |
@@ -3648,9 +3647,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
3648 | btrfs_i_size_write(inode, 0); | 3647 | btrfs_i_size_write(inode, 0); |
3649 | 3648 | ||
3650 | while (1) { | 3649 | while (1) { |
3651 | trans = btrfs_start_transaction(root, 0); | 3650 | trans = btrfs_join_transaction(root); |
3652 | BUG_ON(IS_ERR(trans)); | 3651 | BUG_ON(IS_ERR(trans)); |
3653 | btrfs_set_trans_block_group(trans, inode); | ||
3654 | trans->block_rsv = root->orphan_block_rsv; | 3652 | trans->block_rsv = root->orphan_block_rsv; |
3655 | 3653 | ||
3656 | ret = btrfs_block_rsv_check(trans, root, | 3654 | ret = btrfs_block_rsv_check(trans, root, |
@@ -4133,7 +4131,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4133 | path = btrfs_alloc_path(); | 4131 | path = btrfs_alloc_path(); |
4134 | if (!path) | 4132 | if (!path) |
4135 | return -ENOMEM; | 4133 | return -ENOMEM; |
4136 | path->reada = 2; | 4134 | |
4135 | path->reada = 1; | ||
4137 | 4136 | ||
4138 | if (key_type == BTRFS_DIR_INDEX_KEY) { | 4137 | if (key_type == BTRFS_DIR_INDEX_KEY) { |
4139 | INIT_LIST_HEAD(&ins_list); | 4138 | INIT_LIST_HEAD(&ins_list); |
@@ -4268,18 +4267,16 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4268 | if (BTRFS_I(inode)->dummy_inode) | 4267 | if (BTRFS_I(inode)->dummy_inode) |
4269 | return 0; | 4268 | return 0; |
4270 | 4269 | ||
4271 | smp_mb(); | 4270 | if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) |
4272 | if (root->fs_info->closing && is_free_space_inode(root, inode)) | ||
4273 | nolock = true; | 4271 | nolock = true; |
4274 | 4272 | ||
4275 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4273 | if (wbc->sync_mode == WB_SYNC_ALL) { |
4276 | if (nolock) | 4274 | if (nolock) |
4277 | trans = btrfs_join_transaction_nolock(root, 1); | 4275 | trans = btrfs_join_transaction_nolock(root); |
4278 | else | 4276 | else |
4279 | trans = btrfs_join_transaction(root, 1); | 4277 | trans = btrfs_join_transaction(root); |
4280 | if (IS_ERR(trans)) | 4278 | if (IS_ERR(trans)) |
4281 | return PTR_ERR(trans); | 4279 | return PTR_ERR(trans); |
4282 | btrfs_set_trans_block_group(trans, inode); | ||
4283 | if (nolock) | 4280 | if (nolock) |
4284 | ret = btrfs_end_transaction_nolock(trans, root); | 4281 | ret = btrfs_end_transaction_nolock(trans, root); |
4285 | else | 4282 | else |
@@ -4303,9 +4300,8 @@ void btrfs_dirty_inode(struct inode *inode, int flags) | |||
4303 | if (BTRFS_I(inode)->dummy_inode) | 4300 | if (BTRFS_I(inode)->dummy_inode) |
4304 | return; | 4301 | return; |
4305 | 4302 | ||
4306 | trans = btrfs_join_transaction(root, 1); | 4303 | trans = btrfs_join_transaction(root); |
4307 | BUG_ON(IS_ERR(trans)); | 4304 | BUG_ON(IS_ERR(trans)); |
4308 | btrfs_set_trans_block_group(trans, inode); | ||
4309 | 4305 | ||
4310 | ret = btrfs_update_inode(trans, root, inode); | 4306 | ret = btrfs_update_inode(trans, root, inode); |
4311 | if (ret && ret == -ENOSPC) { | 4307 | if (ret && ret == -ENOSPC) { |
@@ -4319,7 +4315,6 @@ void btrfs_dirty_inode(struct inode *inode, int flags) | |||
4319 | PTR_ERR(trans)); | 4315 | PTR_ERR(trans)); |
4320 | return; | 4316 | return; |
4321 | } | 4317 | } |
4322 | btrfs_set_trans_block_group(trans, inode); | ||
4323 | 4318 | ||
4324 | ret = btrfs_update_inode(trans, root, inode); | 4319 | ret = btrfs_update_inode(trans, root, inode); |
4325 | if (ret) { | 4320 | if (ret) { |
@@ -4418,8 +4413,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4418 | struct btrfs_root *root, | 4413 | struct btrfs_root *root, |
4419 | struct inode *dir, | 4414 | struct inode *dir, |
4420 | const char *name, int name_len, | 4415 | const char *name, int name_len, |
4421 | u64 ref_objectid, u64 objectid, | 4416 | u64 ref_objectid, u64 objectid, int mode, |
4422 | u64 alloc_hint, int mode, u64 *index) | 4417 | u64 *index) |
4423 | { | 4418 | { |
4424 | struct inode *inode; | 4419 | struct inode *inode; |
4425 | struct btrfs_inode_item *inode_item; | 4420 | struct btrfs_inode_item *inode_item; |
@@ -4472,8 +4467,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4472 | owner = 0; | 4467 | owner = 0; |
4473 | else | 4468 | else |
4474 | owner = 1; | 4469 | owner = 1; |
4475 | BTRFS_I(inode)->block_group = | ||
4476 | btrfs_find_block_group(root, 0, alloc_hint, owner); | ||
4477 | 4470 | ||
4478 | key[0].objectid = objectid; | 4471 | key[0].objectid = objectid; |
4479 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 4472 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); |
@@ -4629,15 +4622,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4629 | if (IS_ERR(trans)) | 4622 | if (IS_ERR(trans)) |
4630 | return PTR_ERR(trans); | 4623 | return PTR_ERR(trans); |
4631 | 4624 | ||
4632 | btrfs_set_trans_block_group(trans, dir); | ||
4633 | |||
4634 | err = btrfs_find_free_ino(root, &objectid); | 4625 | err = btrfs_find_free_ino(root, &objectid); |
4635 | if (err) | 4626 | if (err) |
4636 | goto out_unlock; | 4627 | goto out_unlock; |
4637 | 4628 | ||
4638 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4629 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4639 | dentry->d_name.len, btrfs_ino(dir), objectid, | 4630 | dentry->d_name.len, btrfs_ino(dir), objectid, |
4640 | BTRFS_I(dir)->block_group, mode, &index); | 4631 | mode, &index); |
4641 | if (IS_ERR(inode)) { | 4632 | if (IS_ERR(inode)) { |
4642 | err = PTR_ERR(inode); | 4633 | err = PTR_ERR(inode); |
4643 | goto out_unlock; | 4634 | goto out_unlock; |
@@ -4649,7 +4640,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4649 | goto out_unlock; | 4640 | goto out_unlock; |
4650 | } | 4641 | } |
4651 | 4642 | ||
4652 | btrfs_set_trans_block_group(trans, inode); | ||
4653 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 4643 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4654 | if (err) | 4644 | if (err) |
4655 | drop_inode = 1; | 4645 | drop_inode = 1; |
@@ -4658,8 +4648,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4658 | init_special_inode(inode, inode->i_mode, rdev); | 4648 | init_special_inode(inode, inode->i_mode, rdev); |
4659 | btrfs_update_inode(trans, root, inode); | 4649 | btrfs_update_inode(trans, root, inode); |
4660 | } | 4650 | } |
4661 | btrfs_update_inode_block_group(trans, inode); | ||
4662 | btrfs_update_inode_block_group(trans, dir); | ||
4663 | out_unlock: | 4651 | out_unlock: |
4664 | nr = trans->blocks_used; | 4652 | nr = trans->blocks_used; |
4665 | btrfs_end_transaction_throttle(trans, root); | 4653 | btrfs_end_transaction_throttle(trans, root); |
@@ -4692,15 +4680,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4692 | if (IS_ERR(trans)) | 4680 | if (IS_ERR(trans)) |
4693 | return PTR_ERR(trans); | 4681 | return PTR_ERR(trans); |
4694 | 4682 | ||
4695 | btrfs_set_trans_block_group(trans, dir); | ||
4696 | |||
4697 | err = btrfs_find_free_ino(root, &objectid); | 4683 | err = btrfs_find_free_ino(root, &objectid); |
4698 | if (err) | 4684 | if (err) |
4699 | goto out_unlock; | 4685 | goto out_unlock; |
4700 | 4686 | ||
4701 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4687 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4702 | dentry->d_name.len, btrfs_ino(dir), objectid, | 4688 | dentry->d_name.len, btrfs_ino(dir), objectid, |
4703 | BTRFS_I(dir)->block_group, mode, &index); | 4689 | mode, &index); |
4704 | if (IS_ERR(inode)) { | 4690 | if (IS_ERR(inode)) { |
4705 | err = PTR_ERR(inode); | 4691 | err = PTR_ERR(inode); |
4706 | goto out_unlock; | 4692 | goto out_unlock; |
@@ -4712,7 +4698,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4712 | goto out_unlock; | 4698 | goto out_unlock; |
4713 | } | 4699 | } |
4714 | 4700 | ||
4715 | btrfs_set_trans_block_group(trans, inode); | ||
4716 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 4701 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4717 | if (err) | 4702 | if (err) |
4718 | drop_inode = 1; | 4703 | drop_inode = 1; |
@@ -4723,8 +4708,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4723 | inode->i_op = &btrfs_file_inode_operations; | 4708 | inode->i_op = &btrfs_file_inode_operations; |
4724 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 4709 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
4725 | } | 4710 | } |
4726 | btrfs_update_inode_block_group(trans, inode); | ||
4727 | btrfs_update_inode_block_group(trans, dir); | ||
4728 | out_unlock: | 4711 | out_unlock: |
4729 | nr = trans->blocks_used; | 4712 | nr = trans->blocks_used; |
4730 | btrfs_end_transaction_throttle(trans, root); | 4713 | btrfs_end_transaction_throttle(trans, root); |
@@ -4771,8 +4754,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4771 | 4754 | ||
4772 | btrfs_inc_nlink(inode); | 4755 | btrfs_inc_nlink(inode); |
4773 | inode->i_ctime = CURRENT_TIME; | 4756 | inode->i_ctime = CURRENT_TIME; |
4774 | |||
4775 | btrfs_set_trans_block_group(trans, dir); | ||
4776 | ihold(inode); | 4757 | ihold(inode); |
4777 | 4758 | ||
4778 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); | 4759 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); |
@@ -4781,7 +4762,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4781 | drop_inode = 1; | 4762 | drop_inode = 1; |
4782 | } else { | 4763 | } else { |
4783 | struct dentry *parent = dget_parent(dentry); | 4764 | struct dentry *parent = dget_parent(dentry); |
4784 | btrfs_update_inode_block_group(trans, dir); | ||
4785 | err = btrfs_update_inode(trans, root, inode); | 4765 | err = btrfs_update_inode(trans, root, inode); |
4786 | BUG_ON(err); | 4766 | BUG_ON(err); |
4787 | btrfs_log_new_name(trans, inode, NULL, parent); | 4767 | btrfs_log_new_name(trans, inode, NULL, parent); |
@@ -4818,7 +4798,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4818 | trans = btrfs_start_transaction(root, 5); | 4798 | trans = btrfs_start_transaction(root, 5); |
4819 | if (IS_ERR(trans)) | 4799 | if (IS_ERR(trans)) |
4820 | return PTR_ERR(trans); | 4800 | return PTR_ERR(trans); |
4821 | btrfs_set_trans_block_group(trans, dir); | ||
4822 | 4801 | ||
4823 | err = btrfs_find_free_ino(root, &objectid); | 4802 | err = btrfs_find_free_ino(root, &objectid); |
4824 | if (err) | 4803 | if (err) |
@@ -4826,8 +4805,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4826 | 4805 | ||
4827 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4806 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4828 | dentry->d_name.len, btrfs_ino(dir), objectid, | 4807 | dentry->d_name.len, btrfs_ino(dir), objectid, |
4829 | BTRFS_I(dir)->block_group, S_IFDIR | mode, | 4808 | S_IFDIR | mode, &index); |
4830 | &index); | ||
4831 | if (IS_ERR(inode)) { | 4809 | if (IS_ERR(inode)) { |
4832 | err = PTR_ERR(inode); | 4810 | err = PTR_ERR(inode); |
4833 | goto out_fail; | 4811 | goto out_fail; |
@@ -4841,7 +4819,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4841 | 4819 | ||
4842 | inode->i_op = &btrfs_dir_inode_operations; | 4820 | inode->i_op = &btrfs_dir_inode_operations; |
4843 | inode->i_fop = &btrfs_dir_file_operations; | 4821 | inode->i_fop = &btrfs_dir_file_operations; |
4844 | btrfs_set_trans_block_group(trans, inode); | ||
4845 | 4822 | ||
4846 | btrfs_i_size_write(inode, 0); | 4823 | btrfs_i_size_write(inode, 0); |
4847 | err = btrfs_update_inode(trans, root, inode); | 4824 | err = btrfs_update_inode(trans, root, inode); |
@@ -4855,8 +4832,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4855 | 4832 | ||
4856 | d_instantiate(dentry, inode); | 4833 | d_instantiate(dentry, inode); |
4857 | drop_on_err = 0; | 4834 | drop_on_err = 0; |
4858 | btrfs_update_inode_block_group(trans, inode); | ||
4859 | btrfs_update_inode_block_group(trans, dir); | ||
4860 | 4835 | ||
4861 | out_fail: | 4836 | out_fail: |
4862 | nr = trans->blocks_used; | 4837 | nr = trans->blocks_used; |
@@ -4989,7 +4964,15 @@ again: | |||
4989 | 4964 | ||
4990 | if (!path) { | 4965 | if (!path) { |
4991 | path = btrfs_alloc_path(); | 4966 | path = btrfs_alloc_path(); |
4992 | BUG_ON(!path); | 4967 | if (!path) { |
4968 | err = -ENOMEM; | ||
4969 | goto out; | ||
4970 | } | ||
4971 | /* | ||
4972 | * Chances are we'll be called again, so go ahead and do | ||
4973 | * readahead | ||
4974 | */ | ||
4975 | path->reada = 1; | ||
4993 | } | 4976 | } |
4994 | 4977 | ||
4995 | ret = btrfs_lookup_file_extent(trans, root, path, | 4978 | ret = btrfs_lookup_file_extent(trans, root, path, |
@@ -5130,8 +5113,10 @@ again: | |||
5130 | kunmap(page); | 5113 | kunmap(page); |
5131 | free_extent_map(em); | 5114 | free_extent_map(em); |
5132 | em = NULL; | 5115 | em = NULL; |
5116 | |||
5133 | btrfs_release_path(path); | 5117 | btrfs_release_path(path); |
5134 | trans = btrfs_join_transaction(root, 1); | 5118 | trans = btrfs_join_transaction(root); |
5119 | |||
5135 | if (IS_ERR(trans)) | 5120 | if (IS_ERR(trans)) |
5136 | return ERR_CAST(trans); | 5121 | return ERR_CAST(trans); |
5137 | goto again; | 5122 | goto again; |
@@ -5375,7 +5360,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
5375 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | 5360 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); |
5376 | } | 5361 | } |
5377 | 5362 | ||
5378 | trans = btrfs_join_transaction(root, 0); | 5363 | trans = btrfs_join_transaction(root); |
5379 | if (IS_ERR(trans)) | 5364 | if (IS_ERR(trans)) |
5380 | return ERR_CAST(trans); | 5365 | return ERR_CAST(trans); |
5381 | 5366 | ||
@@ -5611,7 +5596,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5611 | * to make sure the current transaction stays open | 5596 | * to make sure the current transaction stays open |
5612 | * while we look for nocow cross refs | 5597 | * while we look for nocow cross refs |
5613 | */ | 5598 | */ |
5614 | trans = btrfs_join_transaction(root, 0); | 5599 | trans = btrfs_join_transaction(root); |
5615 | if (IS_ERR(trans)) | 5600 | if (IS_ERR(trans)) |
5616 | goto must_cow; | 5601 | goto must_cow; |
5617 | 5602 | ||
@@ -5750,7 +5735,7 @@ again: | |||
5750 | 5735 | ||
5751 | BUG_ON(!ordered); | 5736 | BUG_ON(!ordered); |
5752 | 5737 | ||
5753 | trans = btrfs_join_transaction(root, 1); | 5738 | trans = btrfs_join_transaction(root); |
5754 | if (IS_ERR(trans)) { | 5739 | if (IS_ERR(trans)) { |
5755 | err = -ENOMEM; | 5740 | err = -ENOMEM; |
5756 | goto out; | 5741 | goto out; |
@@ -6500,6 +6485,7 @@ out: | |||
6500 | static int btrfs_truncate(struct inode *inode) | 6485 | static int btrfs_truncate(struct inode *inode) |
6501 | { | 6486 | { |
6502 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6487 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6488 | struct btrfs_block_rsv *rsv; | ||
6503 | int ret; | 6489 | int ret; |
6504 | int err = 0; | 6490 | int err = 0; |
6505 | struct btrfs_trans_handle *trans; | 6491 | struct btrfs_trans_handle *trans; |
@@ -6513,28 +6499,80 @@ static int btrfs_truncate(struct inode *inode) | |||
6513 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6499 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
6514 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6500 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
6515 | 6501 | ||
6516 | trans = btrfs_start_transaction(root, 5); | 6502 | /* |
6517 | if (IS_ERR(trans)) | 6503 | * Yes ladies and gentelment, this is indeed ugly. The fact is we have |
6518 | return PTR_ERR(trans); | 6504 | * 3 things going on here |
6505 | * | ||
6506 | * 1) We need to reserve space for our orphan item and the space to | ||
6507 | * delete our orphan item. Lord knows we don't want to have a dangling | ||
6508 | * orphan item because we didn't reserve space to remove it. | ||
6509 | * | ||
6510 | * 2) We need to reserve space to update our inode. | ||
6511 | * | ||
6512 | * 3) We need to have something to cache all the space that is going to | ||
6513 | * be free'd up by the truncate operation, but also have some slack | ||
6514 | * space reserved in case it uses space during the truncate (thank you | ||
6515 | * very much snapshotting). | ||
6516 | * | ||
6517 | * And we need these to all be seperate. The fact is we can use alot of | ||
6518 | * space doing the truncate, and we have no earthly idea how much space | ||
6519 | * we will use, so we need the truncate reservation to be seperate so it | ||
6520 | * doesn't end up using space reserved for updating the inode or | ||
6521 | * removing the orphan item. We also need to be able to stop the | ||
6522 | * transaction and start a new one, which means we need to be able to | ||
6523 | * update the inode several times, and we have no idea of knowing how | ||
6524 | * many times that will be, so we can't just reserve 1 item for the | ||
6525 | * entirety of the opration, so that has to be done seperately as well. | ||
6526 | * Then there is the orphan item, which does indeed need to be held on | ||
6527 | * to for the whole operation, and we need nobody to touch this reserved | ||
6528 | * space except the orphan code. | ||
6529 | * | ||
6530 | * So that leaves us with | ||
6531 | * | ||
6532 | * 1) root->orphan_block_rsv - for the orphan deletion. | ||
6533 | * 2) rsv - for the truncate reservation, which we will steal from the | ||
6534 | * transaction reservation. | ||
6535 | * 3) fs_info->trans_block_rsv - this will have 1 items worth left for | ||
6536 | * updating the inode. | ||
6537 | */ | ||
6538 | rsv = btrfs_alloc_block_rsv(root); | ||
6539 | if (!rsv) | ||
6540 | return -ENOMEM; | ||
6541 | btrfs_add_durable_block_rsv(root->fs_info, rsv); | ||
6519 | 6542 | ||
6520 | btrfs_set_trans_block_group(trans, inode); | 6543 | trans = btrfs_start_transaction(root, 4); |
6544 | if (IS_ERR(trans)) { | ||
6545 | err = PTR_ERR(trans); | ||
6546 | goto out; | ||
6547 | } | ||
6548 | |||
6549 | /* | ||
6550 | * Reserve space for the truncate process. Truncate should be adding | ||
6551 | * space, but if there are snapshots it may end up using space. | ||
6552 | */ | ||
6553 | ret = btrfs_truncate_reserve_metadata(trans, root, rsv); | ||
6554 | BUG_ON(ret); | ||
6521 | 6555 | ||
6522 | ret = btrfs_orphan_add(trans, inode); | 6556 | ret = btrfs_orphan_add(trans, inode); |
6523 | if (ret) { | 6557 | if (ret) { |
6524 | btrfs_end_transaction(trans, root); | 6558 | btrfs_end_transaction(trans, root); |
6525 | return ret; | 6559 | goto out; |
6526 | } | 6560 | } |
6527 | 6561 | ||
6528 | nr = trans->blocks_used; | 6562 | nr = trans->blocks_used; |
6529 | btrfs_end_transaction(trans, root); | 6563 | btrfs_end_transaction(trans, root); |
6530 | btrfs_btree_balance_dirty(root, nr); | 6564 | btrfs_btree_balance_dirty(root, nr); |
6531 | 6565 | ||
6532 | /* Now start a transaction for the truncate */ | 6566 | /* |
6533 | trans = btrfs_start_transaction(root, 0); | 6567 | * Ok so we've already migrated our bytes over for the truncate, so here |
6534 | if (IS_ERR(trans)) | 6568 | * just reserve the one slot we need for updating the inode. |
6535 | return PTR_ERR(trans); | 6569 | */ |
6536 | btrfs_set_trans_block_group(trans, inode); | 6570 | trans = btrfs_start_transaction(root, 1); |
6537 | trans->block_rsv = root->orphan_block_rsv; | 6571 | if (IS_ERR(trans)) { |
6572 | err = PTR_ERR(trans); | ||
6573 | goto out; | ||
6574 | } | ||
6575 | trans->block_rsv = rsv; | ||
6538 | 6576 | ||
6539 | /* | 6577 | /* |
6540 | * setattr is responsible for setting the ordered_data_close flag, | 6578 | * setattr is responsible for setting the ordered_data_close flag, |
@@ -6558,24 +6596,17 @@ static int btrfs_truncate(struct inode *inode) | |||
6558 | 6596 | ||
6559 | while (1) { | 6597 | while (1) { |
6560 | if (!trans) { | 6598 | if (!trans) { |
6561 | trans = btrfs_start_transaction(root, 0); | 6599 | trans = btrfs_start_transaction(root, 3); |
6562 | if (IS_ERR(trans)) | 6600 | if (IS_ERR(trans)) { |
6563 | return PTR_ERR(trans); | 6601 | err = PTR_ERR(trans); |
6564 | btrfs_set_trans_block_group(trans, inode); | 6602 | goto out; |
6565 | trans->block_rsv = root->orphan_block_rsv; | 6603 | } |
6566 | } | ||
6567 | 6604 | ||
6568 | ret = btrfs_block_rsv_check(trans, root, | 6605 | ret = btrfs_truncate_reserve_metadata(trans, root, |
6569 | root->orphan_block_rsv, 0, 5); | 6606 | rsv); |
6570 | if (ret == -EAGAIN) { | 6607 | BUG_ON(ret); |
6571 | ret = btrfs_commit_transaction(trans, root); | 6608 | |
6572 | if (ret) | 6609 | trans->block_rsv = rsv; |
6573 | return ret; | ||
6574 | trans = NULL; | ||
6575 | continue; | ||
6576 | } else if (ret) { | ||
6577 | err = ret; | ||
6578 | break; | ||
6579 | } | 6610 | } |
6580 | 6611 | ||
6581 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6612 | ret = btrfs_truncate_inode_items(trans, root, inode, |
@@ -6586,6 +6617,7 @@ static int btrfs_truncate(struct inode *inode) | |||
6586 | break; | 6617 | break; |
6587 | } | 6618 | } |
6588 | 6619 | ||
6620 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
6589 | ret = btrfs_update_inode(trans, root, inode); | 6621 | ret = btrfs_update_inode(trans, root, inode); |
6590 | if (ret) { | 6622 | if (ret) { |
6591 | err = ret; | 6623 | err = ret; |
@@ -6599,6 +6631,7 @@ static int btrfs_truncate(struct inode *inode) | |||
6599 | } | 6631 | } |
6600 | 6632 | ||
6601 | if (ret == 0 && inode->i_nlink > 0) { | 6633 | if (ret == 0 && inode->i_nlink > 0) { |
6634 | trans->block_rsv = root->orphan_block_rsv; | ||
6602 | ret = btrfs_orphan_del(trans, inode); | 6635 | ret = btrfs_orphan_del(trans, inode); |
6603 | if (ret) | 6636 | if (ret) |
6604 | err = ret; | 6637 | err = ret; |
@@ -6610,15 +6643,20 @@ static int btrfs_truncate(struct inode *inode) | |||
6610 | ret = btrfs_orphan_del(NULL, inode); | 6643 | ret = btrfs_orphan_del(NULL, inode); |
6611 | } | 6644 | } |
6612 | 6645 | ||
6646 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
6613 | ret = btrfs_update_inode(trans, root, inode); | 6647 | ret = btrfs_update_inode(trans, root, inode); |
6614 | if (ret && !err) | 6648 | if (ret && !err) |
6615 | err = ret; | 6649 | err = ret; |
6616 | 6650 | ||
6617 | nr = trans->blocks_used; | 6651 | nr = trans->blocks_used; |
6618 | ret = btrfs_end_transaction_throttle(trans, root); | 6652 | ret = btrfs_end_transaction_throttle(trans, root); |
6653 | btrfs_btree_balance_dirty(root, nr); | ||
6654 | |||
6655 | out: | ||
6656 | btrfs_free_block_rsv(root, rsv); | ||
6657 | |||
6619 | if (ret && !err) | 6658 | if (ret && !err) |
6620 | err = ret; | 6659 | err = ret; |
6621 | btrfs_btree_balance_dirty(root, nr); | ||
6622 | 6660 | ||
6623 | return err; | 6661 | return err; |
6624 | } | 6662 | } |
@@ -6627,15 +6665,14 @@ static int btrfs_truncate(struct inode *inode) | |||
6627 | * create a new subvolume directory/inode (helper for the ioctl). | 6665 | * create a new subvolume directory/inode (helper for the ioctl). |
6628 | */ | 6666 | */ |
6629 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 6667 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
6630 | struct btrfs_root *new_root, | 6668 | struct btrfs_root *new_root, u64 new_dirid) |
6631 | u64 new_dirid, u64 alloc_hint) | ||
6632 | { | 6669 | { |
6633 | struct inode *inode; | 6670 | struct inode *inode; |
6634 | int err; | 6671 | int err; |
6635 | u64 index = 0; | 6672 | u64 index = 0; |
6636 | 6673 | ||
6637 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, | 6674 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, |
6638 | new_dirid, alloc_hint, S_IFDIR | 0700, &index); | 6675 | new_dirid, S_IFDIR | 0700, &index); |
6639 | if (IS_ERR(inode)) | 6676 | if (IS_ERR(inode)) |
6640 | return PTR_ERR(inode); | 6677 | return PTR_ERR(inode); |
6641 | inode->i_op = &btrfs_dir_inode_operations; | 6678 | inode->i_op = &btrfs_dir_inode_operations; |
@@ -6748,21 +6785,6 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6748 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6785 | spin_unlock(&root->fs_info->ordered_extent_lock); |
6749 | } | 6786 | } |
6750 | 6787 | ||
6751 | if (root == root->fs_info->tree_root) { | ||
6752 | struct btrfs_block_group_cache *block_group; | ||
6753 | |||
6754 | block_group = btrfs_lookup_block_group(root->fs_info, | ||
6755 | BTRFS_I(inode)->block_group); | ||
6756 | if (block_group && block_group->inode == inode) { | ||
6757 | spin_lock(&block_group->lock); | ||
6758 | block_group->inode = NULL; | ||
6759 | spin_unlock(&block_group->lock); | ||
6760 | btrfs_put_block_group(block_group); | ||
6761 | } else if (block_group) { | ||
6762 | btrfs_put_block_group(block_group); | ||
6763 | } | ||
6764 | } | ||
6765 | |||
6766 | spin_lock(&root->orphan_lock); | 6788 | spin_lock(&root->orphan_lock); |
6767 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6789 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
6768 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", | 6790 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", |
@@ -6948,8 +6970,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6948 | goto out_notrans; | 6970 | goto out_notrans; |
6949 | } | 6971 | } |
6950 | 6972 | ||
6951 | btrfs_set_trans_block_group(trans, new_dir); | ||
6952 | |||
6953 | if (dest != root) | 6973 | if (dest != root) |
6954 | btrfs_record_root_in_trans(trans, dest); | 6974 | btrfs_record_root_in_trans(trans, dest); |
6955 | 6975 | ||
@@ -7131,16 +7151,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7131 | if (IS_ERR(trans)) | 7151 | if (IS_ERR(trans)) |
7132 | return PTR_ERR(trans); | 7152 | return PTR_ERR(trans); |
7133 | 7153 | ||
7134 | btrfs_set_trans_block_group(trans, dir); | ||
7135 | |||
7136 | err = btrfs_find_free_ino(root, &objectid); | 7154 | err = btrfs_find_free_ino(root, &objectid); |
7137 | if (err) | 7155 | if (err) |
7138 | goto out_unlock; | 7156 | goto out_unlock; |
7139 | 7157 | ||
7140 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 7158 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
7141 | dentry->d_name.len, btrfs_ino(dir), objectid, | 7159 | dentry->d_name.len, btrfs_ino(dir), objectid, |
7142 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, | 7160 | S_IFLNK|S_IRWXUGO, &index); |
7143 | &index); | ||
7144 | if (IS_ERR(inode)) { | 7161 | if (IS_ERR(inode)) { |
7145 | err = PTR_ERR(inode); | 7162 | err = PTR_ERR(inode); |
7146 | goto out_unlock; | 7163 | goto out_unlock; |
@@ -7152,7 +7169,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7152 | goto out_unlock; | 7169 | goto out_unlock; |
7153 | } | 7170 | } |
7154 | 7171 | ||
7155 | btrfs_set_trans_block_group(trans, inode); | ||
7156 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 7172 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
7157 | if (err) | 7173 | if (err) |
7158 | drop_inode = 1; | 7174 | drop_inode = 1; |
@@ -7163,8 +7179,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7163 | inode->i_op = &btrfs_file_inode_operations; | 7179 | inode->i_op = &btrfs_file_inode_operations; |
7164 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 7180 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
7165 | } | 7181 | } |
7166 | btrfs_update_inode_block_group(trans, inode); | ||
7167 | btrfs_update_inode_block_group(trans, dir); | ||
7168 | if (drop_inode) | 7182 | if (drop_inode) |
7169 | goto out_unlock; | 7183 | goto out_unlock; |
7170 | 7184 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 85e818ce00c5..a3c4751e07db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -243,7 +243,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
243 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); | 243 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); |
244 | } | 244 | } |
245 | 245 | ||
246 | trans = btrfs_join_transaction(root, 1); | 246 | trans = btrfs_join_transaction(root); |
247 | BUG_ON(IS_ERR(trans)); | 247 | BUG_ON(IS_ERR(trans)); |
248 | 248 | ||
249 | ret = btrfs_update_inode(trans, root, inode); | 249 | ret = btrfs_update_inode(trans, root, inode); |
@@ -414,8 +414,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
414 | 414 | ||
415 | btrfs_record_root_in_trans(trans, new_root); | 415 | btrfs_record_root_in_trans(trans, new_root); |
416 | 416 | ||
417 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid, | 417 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid); |
418 | BTRFS_I(dir)->block_group); | ||
419 | /* | 418 | /* |
420 | * insert the directory item | 419 | * insert the directory item |
421 | */ | 420 | */ |
@@ -483,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
483 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); | 482 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); |
484 | BUG_ON(ret); | 483 | BUG_ON(ret); |
485 | 484 | ||
485 | spin_lock(&root->fs_info->trans_lock); | ||
486 | list_add(&pending_snapshot->list, | 486 | list_add(&pending_snapshot->list, |
487 | &trans->transaction->pending_snapshots); | 487 | &trans->transaction->pending_snapshots); |
488 | spin_unlock(&root->fs_info->trans_lock); | ||
488 | if (async_transid) { | 489 | if (async_transid) { |
489 | *async_transid = trans->transid; | 490 | *async_transid = trans->transid; |
490 | ret = btrfs_commit_transaction_async(trans, | 491 | ret = btrfs_commit_transaction_async(trans, |
@@ -707,16 +708,17 @@ static int find_new_extents(struct btrfs_root *root, | |||
707 | struct btrfs_file_extent_item *extent; | 708 | struct btrfs_file_extent_item *extent; |
708 | int type; | 709 | int type; |
709 | int ret; | 710 | int ret; |
711 | u64 ino = btrfs_ino(inode); | ||
710 | 712 | ||
711 | path = btrfs_alloc_path(); | 713 | path = btrfs_alloc_path(); |
712 | if (!path) | 714 | if (!path) |
713 | return -ENOMEM; | 715 | return -ENOMEM; |
714 | 716 | ||
715 | min_key.objectid = inode->i_ino; | 717 | min_key.objectid = ino; |
716 | min_key.type = BTRFS_EXTENT_DATA_KEY; | 718 | min_key.type = BTRFS_EXTENT_DATA_KEY; |
717 | min_key.offset = *off; | 719 | min_key.offset = *off; |
718 | 720 | ||
719 | max_key.objectid = inode->i_ino; | 721 | max_key.objectid = ino; |
720 | max_key.type = (u8)-1; | 722 | max_key.type = (u8)-1; |
721 | max_key.offset = (u64)-1; | 723 | max_key.offset = (u64)-1; |
722 | 724 | ||
@@ -727,7 +729,7 @@ static int find_new_extents(struct btrfs_root *root, | |||
727 | path, 0, newer_than); | 729 | path, 0, newer_than); |
728 | if (ret != 0) | 730 | if (ret != 0) |
729 | goto none; | 731 | goto none; |
730 | if (min_key.objectid != inode->i_ino) | 732 | if (min_key.objectid != ino) |
731 | goto none; | 733 | goto none; |
732 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) | 734 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) |
733 | goto none; | 735 | goto none; |
@@ -2054,29 +2056,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
2054 | 2056 | ||
2055 | static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) | 2057 | static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) |
2056 | { | 2058 | { |
2057 | struct btrfs_ioctl_fs_info_args fi_args; | 2059 | struct btrfs_ioctl_fs_info_args *fi_args; |
2058 | struct btrfs_device *device; | 2060 | struct btrfs_device *device; |
2059 | struct btrfs_device *next; | 2061 | struct btrfs_device *next; |
2060 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 2062 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
2063 | int ret = 0; | ||
2061 | 2064 | ||
2062 | if (!capable(CAP_SYS_ADMIN)) | 2065 | if (!capable(CAP_SYS_ADMIN)) |
2063 | return -EPERM; | 2066 | return -EPERM; |
2064 | 2067 | ||
2065 | fi_args.num_devices = fs_devices->num_devices; | 2068 | fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); |
2066 | fi_args.max_id = 0; | 2069 | if (!fi_args) |
2067 | memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid)); | 2070 | return -ENOMEM; |
2071 | |||
2072 | fi_args->num_devices = fs_devices->num_devices; | ||
2073 | memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); | ||
2068 | 2074 | ||
2069 | mutex_lock(&fs_devices->device_list_mutex); | 2075 | mutex_lock(&fs_devices->device_list_mutex); |
2070 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | 2076 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
2071 | if (device->devid > fi_args.max_id) | 2077 | if (device->devid > fi_args->max_id) |
2072 | fi_args.max_id = device->devid; | 2078 | fi_args->max_id = device->devid; |
2073 | } | 2079 | } |
2074 | mutex_unlock(&fs_devices->device_list_mutex); | 2080 | mutex_unlock(&fs_devices->device_list_mutex); |
2075 | 2081 | ||
2076 | if (copy_to_user(arg, &fi_args, sizeof(fi_args))) | 2082 | if (copy_to_user(arg, fi_args, sizeof(*fi_args))) |
2077 | return -EFAULT; | 2083 | ret = -EFAULT; |
2078 | 2084 | ||
2079 | return 0; | 2085 | kfree(fi_args); |
2086 | return ret; | ||
2080 | } | 2087 | } |
2081 | 2088 | ||
2082 | static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | 2089 | static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) |
@@ -2489,12 +2496,10 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
2489 | if (ret) | 2496 | if (ret) |
2490 | goto out; | 2497 | goto out; |
2491 | 2498 | ||
2492 | mutex_lock(&root->fs_info->trans_mutex); | 2499 | atomic_inc(&root->fs_info->open_ioctl_trans); |
2493 | root->fs_info->open_ioctl_trans++; | ||
2494 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2495 | 2500 | ||
2496 | ret = -ENOMEM; | 2501 | ret = -ENOMEM; |
2497 | trans = btrfs_start_ioctl_transaction(root, 0); | 2502 | trans = btrfs_start_ioctl_transaction(root); |
2498 | if (IS_ERR(trans)) | 2503 | if (IS_ERR(trans)) |
2499 | goto out_drop; | 2504 | goto out_drop; |
2500 | 2505 | ||
@@ -2502,9 +2507,7 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
2502 | return 0; | 2507 | return 0; |
2503 | 2508 | ||
2504 | out_drop: | 2509 | out_drop: |
2505 | mutex_lock(&root->fs_info->trans_mutex); | 2510 | atomic_dec(&root->fs_info->open_ioctl_trans); |
2506 | root->fs_info->open_ioctl_trans--; | ||
2507 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2508 | mnt_drop_write(file->f_path.mnt); | 2511 | mnt_drop_write(file->f_path.mnt); |
2509 | out: | 2512 | out: |
2510 | return ret; | 2513 | return ret; |
@@ -2738,9 +2741,7 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
2738 | 2741 | ||
2739 | btrfs_end_transaction(trans, root); | 2742 | btrfs_end_transaction(trans, root); |
2740 | 2743 | ||
2741 | mutex_lock(&root->fs_info->trans_mutex); | 2744 | atomic_dec(&root->fs_info->open_ioctl_trans); |
2742 | root->fs_info->open_ioctl_trans--; | ||
2743 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2744 | 2745 | ||
2745 | mnt_drop_write(file->f_path.mnt); | 2746 | mnt_drop_write(file->f_path.mnt); |
2746 | return 0; | 2747 | return 0; |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ca38eca70af0..5e0a3dc79a45 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -677,6 +677,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
677 | err = -ENOMEM; | 677 | err = -ENOMEM; |
678 | goto out; | 678 | goto out; |
679 | } | 679 | } |
680 | path1->reada = 1; | ||
681 | path2->reada = 2; | ||
680 | 682 | ||
681 | node = alloc_backref_node(cache); | 683 | node = alloc_backref_node(cache); |
682 | if (!node) { | 684 | if (!node) { |
@@ -1366,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
1366 | int ret; | 1368 | int ret; |
1367 | 1369 | ||
1368 | if (!root->reloc_root) | 1370 | if (!root->reloc_root) |
1369 | return 0; | 1371 | goto out; |
1370 | 1372 | ||
1371 | reloc_root = root->reloc_root; | 1373 | reloc_root = root->reloc_root; |
1372 | root_item = &reloc_root->root_item; | 1374 | root_item = &reloc_root->root_item; |
@@ -1388,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
1388 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | 1390 | ret = btrfs_update_root(trans, root->fs_info->tree_root, |
1389 | &reloc_root->root_key, root_item); | 1391 | &reloc_root->root_key, root_item); |
1390 | BUG_ON(ret); | 1392 | BUG_ON(ret); |
1393 | |||
1394 | out: | ||
1391 | return 0; | 1395 | return 0; |
1392 | } | 1396 | } |
1393 | 1397 | ||
@@ -1999,6 +2003,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1999 | path = btrfs_alloc_path(); | 2003 | path = btrfs_alloc_path(); |
2000 | if (!path) | 2004 | if (!path) |
2001 | return -ENOMEM; | 2005 | return -ENOMEM; |
2006 | path->reada = 1; | ||
2002 | 2007 | ||
2003 | reloc_root = root->reloc_root; | 2008 | reloc_root = root->reloc_root; |
2004 | root_item = &reloc_root->root_item; | 2009 | root_item = &reloc_root->root_item; |
@@ -2139,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err) | |||
2139 | u64 num_bytes = 0; | 2144 | u64 num_bytes = 0; |
2140 | int ret; | 2145 | int ret; |
2141 | 2146 | ||
2142 | mutex_lock(&root->fs_info->trans_mutex); | 2147 | mutex_lock(&root->fs_info->reloc_mutex); |
2143 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | 2148 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
2144 | rc->merging_rsv_size += rc->nodes_relocated * 2; | 2149 | rc->merging_rsv_size += rc->nodes_relocated * 2; |
2145 | mutex_unlock(&root->fs_info->trans_mutex); | 2150 | mutex_unlock(&root->fs_info->reloc_mutex); |
2151 | |||
2146 | again: | 2152 | again: |
2147 | if (!err) { | 2153 | if (!err) { |
2148 | num_bytes = rc->merging_rsv_size; | 2154 | num_bytes = rc->merging_rsv_size; |
@@ -2152,7 +2158,7 @@ again: | |||
2152 | err = ret; | 2158 | err = ret; |
2153 | } | 2159 | } |
2154 | 2160 | ||
2155 | trans = btrfs_join_transaction(rc->extent_root, 1); | 2161 | trans = btrfs_join_transaction(rc->extent_root); |
2156 | if (IS_ERR(trans)) { | 2162 | if (IS_ERR(trans)) { |
2157 | if (!err) | 2163 | if (!err) |
2158 | btrfs_block_rsv_release(rc->extent_root, | 2164 | btrfs_block_rsv_release(rc->extent_root, |
@@ -2211,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc) | |||
2211 | int ret; | 2217 | int ret; |
2212 | again: | 2218 | again: |
2213 | root = rc->extent_root; | 2219 | root = rc->extent_root; |
2214 | mutex_lock(&root->fs_info->trans_mutex); | 2220 | |
2221 | /* | ||
2222 | * this serializes us with btrfs_record_root_in_transaction, | ||
2223 | * we have to make sure nobody is in the middle of | ||
2224 | * adding their roots to the list while we are | ||
2225 | * doing this splice | ||
2226 | */ | ||
2227 | mutex_lock(&root->fs_info->reloc_mutex); | ||
2215 | list_splice_init(&rc->reloc_roots, &reloc_roots); | 2228 | list_splice_init(&rc->reloc_roots, &reloc_roots); |
2216 | mutex_unlock(&root->fs_info->trans_mutex); | 2229 | mutex_unlock(&root->fs_info->reloc_mutex); |
2217 | 2230 | ||
2218 | while (!list_empty(&reloc_roots)) { | 2231 | while (!list_empty(&reloc_roots)) { |
2219 | found = 1; | 2232 | found = 1; |
@@ -3236,7 +3249,7 @@ truncate: | |||
3236 | goto out; | 3249 | goto out; |
3237 | } | 3250 | } |
3238 | 3251 | ||
3239 | trans = btrfs_join_transaction(root, 0); | 3252 | trans = btrfs_join_transaction(root); |
3240 | if (IS_ERR(trans)) { | 3253 | if (IS_ERR(trans)) { |
3241 | btrfs_free_path(path); | 3254 | btrfs_free_path(path); |
3242 | ret = PTR_ERR(trans); | 3255 | ret = PTR_ERR(trans); |
@@ -3300,6 +3313,7 @@ static int find_data_references(struct reloc_control *rc, | |||
3300 | path = btrfs_alloc_path(); | 3313 | path = btrfs_alloc_path(); |
3301 | if (!path) | 3314 | if (!path) |
3302 | return -ENOMEM; | 3315 | return -ENOMEM; |
3316 | path->reada = 1; | ||
3303 | 3317 | ||
3304 | root = read_fs_root(rc->extent_root->fs_info, ref_root); | 3318 | root = read_fs_root(rc->extent_root->fs_info, ref_root); |
3305 | if (IS_ERR(root)) { | 3319 | if (IS_ERR(root)) { |
@@ -3586,17 +3600,19 @@ next: | |||
3586 | static void set_reloc_control(struct reloc_control *rc) | 3600 | static void set_reloc_control(struct reloc_control *rc) |
3587 | { | 3601 | { |
3588 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3602 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
3589 | mutex_lock(&fs_info->trans_mutex); | 3603 | |
3604 | mutex_lock(&fs_info->reloc_mutex); | ||
3590 | fs_info->reloc_ctl = rc; | 3605 | fs_info->reloc_ctl = rc; |
3591 | mutex_unlock(&fs_info->trans_mutex); | 3606 | mutex_unlock(&fs_info->reloc_mutex); |
3592 | } | 3607 | } |
3593 | 3608 | ||
3594 | static void unset_reloc_control(struct reloc_control *rc) | 3609 | static void unset_reloc_control(struct reloc_control *rc) |
3595 | { | 3610 | { |
3596 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3611 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
3597 | mutex_lock(&fs_info->trans_mutex); | 3612 | |
3613 | mutex_lock(&fs_info->reloc_mutex); | ||
3598 | fs_info->reloc_ctl = NULL; | 3614 | fs_info->reloc_ctl = NULL; |
3599 | mutex_unlock(&fs_info->trans_mutex); | 3615 | mutex_unlock(&fs_info->reloc_mutex); |
3600 | } | 3616 | } |
3601 | 3617 | ||
3602 | static int check_extent_flags(u64 flags) | 3618 | static int check_extent_flags(u64 flags) |
@@ -3645,7 +3661,7 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3645 | rc->create_reloc_tree = 1; | 3661 | rc->create_reloc_tree = 1; |
3646 | set_reloc_control(rc); | 3662 | set_reloc_control(rc); |
3647 | 3663 | ||
3648 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3664 | trans = btrfs_join_transaction(rc->extent_root); |
3649 | BUG_ON(IS_ERR(trans)); | 3665 | BUG_ON(IS_ERR(trans)); |
3650 | btrfs_commit_transaction(trans, rc->extent_root); | 3666 | btrfs_commit_transaction(trans, rc->extent_root); |
3651 | return 0; | 3667 | return 0; |
@@ -3668,6 +3684,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3668 | path = btrfs_alloc_path(); | 3684 | path = btrfs_alloc_path(); |
3669 | if (!path) | 3685 | if (!path) |
3670 | return -ENOMEM; | 3686 | return -ENOMEM; |
3687 | path->reada = 1; | ||
3671 | 3688 | ||
3672 | ret = prepare_to_relocate(rc); | 3689 | ret = prepare_to_relocate(rc); |
3673 | if (ret) { | 3690 | if (ret) { |
@@ -3834,7 +3851,7 @@ restart: | |||
3834 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); | 3851 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); |
3835 | 3852 | ||
3836 | /* get rid of pinned extents */ | 3853 | /* get rid of pinned extents */ |
3837 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3854 | trans = btrfs_join_transaction(rc->extent_root); |
3838 | if (IS_ERR(trans)) | 3855 | if (IS_ERR(trans)) |
3839 | err = PTR_ERR(trans); | 3856 | err = PTR_ERR(trans); |
3840 | else | 3857 | else |
@@ -4093,6 +4110,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4093 | path = btrfs_alloc_path(); | 4110 | path = btrfs_alloc_path(); |
4094 | if (!path) | 4111 | if (!path) |
4095 | return -ENOMEM; | 4112 | return -ENOMEM; |
4113 | path->reada = -1; | ||
4096 | 4114 | ||
4097 | key.objectid = BTRFS_TREE_RELOC_OBJECTID; | 4115 | key.objectid = BTRFS_TREE_RELOC_OBJECTID; |
4098 | key.type = BTRFS_ROOT_ITEM_KEY; | 4116 | key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -4159,7 +4177,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4159 | 4177 | ||
4160 | set_reloc_control(rc); | 4178 | set_reloc_control(rc); |
4161 | 4179 | ||
4162 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4180 | trans = btrfs_join_transaction(rc->extent_root); |
4163 | if (IS_ERR(trans)) { | 4181 | if (IS_ERR(trans)) { |
4164 | unset_reloc_control(rc); | 4182 | unset_reloc_control(rc); |
4165 | err = PTR_ERR(trans); | 4183 | err = PTR_ERR(trans); |
@@ -4193,7 +4211,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4193 | 4211 | ||
4194 | unset_reloc_control(rc); | 4212 | unset_reloc_control(rc); |
4195 | 4213 | ||
4196 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4214 | trans = btrfs_join_transaction(rc->extent_root); |
4197 | if (IS_ERR(trans)) | 4215 | if (IS_ERR(trans)) |
4198 | err = PTR_ERR(trans); | 4216 | err = PTR_ERR(trans); |
4199 | else | 4217 | else |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6dfed0c27ac3..a8d03d5efb5d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -16,13 +16,7 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/sched.h> | ||
20 | #include <linux/pagemap.h> | ||
21 | #include <linux/writeback.h> | ||
22 | #include <linux/blkdev.h> | 19 | #include <linux/blkdev.h> |
23 | #include <linux/rbtree.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/workqueue.h> | ||
26 | #include "ctree.h" | 20 | #include "ctree.h" |
27 | #include "volumes.h" | 21 | #include "volumes.h" |
28 | #include "disk-io.h" | 22 | #include "disk-io.h" |
@@ -117,33 +111,37 @@ static void scrub_free_csums(struct scrub_dev *sdev) | |||
117 | } | 111 | } |
118 | } | 112 | } |
119 | 113 | ||
114 | static void scrub_free_bio(struct bio *bio) | ||
115 | { | ||
116 | int i; | ||
117 | struct page *last_page = NULL; | ||
118 | |||
119 | if (!bio) | ||
120 | return; | ||
121 | |||
122 | for (i = 0; i < bio->bi_vcnt; ++i) { | ||
123 | if (bio->bi_io_vec[i].bv_page == last_page) | ||
124 | continue; | ||
125 | last_page = bio->bi_io_vec[i].bv_page; | ||
126 | __free_page(last_page); | ||
127 | } | ||
128 | bio_put(bio); | ||
129 | } | ||
130 | |||
120 | static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) | 131 | static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) |
121 | { | 132 | { |
122 | int i; | 133 | int i; |
123 | int j; | ||
124 | struct page *last_page; | ||
125 | 134 | ||
126 | if (!sdev) | 135 | if (!sdev) |
127 | return; | 136 | return; |
128 | 137 | ||
129 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | 138 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { |
130 | struct scrub_bio *sbio = sdev->bios[i]; | 139 | struct scrub_bio *sbio = sdev->bios[i]; |
131 | struct bio *bio; | ||
132 | 140 | ||
133 | if (!sbio) | 141 | if (!sbio) |
134 | break; | 142 | break; |
135 | 143 | ||
136 | bio = sbio->bio; | 144 | scrub_free_bio(sbio->bio); |
137 | if (bio) { | ||
138 | last_page = NULL; | ||
139 | for (j = 0; j < bio->bi_vcnt; ++j) { | ||
140 | if (bio->bi_io_vec[j].bv_page == last_page) | ||
141 | continue; | ||
142 | last_page = bio->bi_io_vec[j].bv_page; | ||
143 | __free_page(last_page); | ||
144 | } | ||
145 | bio_put(bio); | ||
146 | } | ||
147 | kfree(sbio); | 145 | kfree(sbio); |
148 | } | 146 | } |
149 | 147 | ||
@@ -156,8 +154,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | |||
156 | { | 154 | { |
157 | struct scrub_dev *sdev; | 155 | struct scrub_dev *sdev; |
158 | int i; | 156 | int i; |
159 | int j; | ||
160 | int ret; | ||
161 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | 157 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; |
162 | 158 | ||
163 | sdev = kzalloc(sizeof(*sdev), GFP_NOFS); | 159 | sdev = kzalloc(sizeof(*sdev), GFP_NOFS); |
@@ -165,7 +161,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | |||
165 | goto nomem; | 161 | goto nomem; |
166 | sdev->dev = dev; | 162 | sdev->dev = dev; |
167 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | 163 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { |
168 | struct bio *bio; | ||
169 | struct scrub_bio *sbio; | 164 | struct scrub_bio *sbio; |
170 | 165 | ||
171 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); | 166 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); |
@@ -173,32 +168,10 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | |||
173 | goto nomem; | 168 | goto nomem; |
174 | sdev->bios[i] = sbio; | 169 | sdev->bios[i] = sbio; |
175 | 170 | ||
176 | bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); | ||
177 | if (!bio) | ||
178 | goto nomem; | ||
179 | |||
180 | sbio->index = i; | 171 | sbio->index = i; |
181 | sbio->sdev = sdev; | 172 | sbio->sdev = sdev; |
182 | sbio->bio = bio; | ||
183 | sbio->count = 0; | 173 | sbio->count = 0; |
184 | sbio->work.func = scrub_checksum; | 174 | sbio->work.func = scrub_checksum; |
185 | bio->bi_private = sdev->bios[i]; | ||
186 | bio->bi_end_io = scrub_bio_end_io; | ||
187 | bio->bi_sector = 0; | ||
188 | bio->bi_bdev = dev->bdev; | ||
189 | bio->bi_size = 0; | ||
190 | |||
191 | for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) { | ||
192 | struct page *page; | ||
193 | page = alloc_page(GFP_NOFS); | ||
194 | if (!page) | ||
195 | goto nomem; | ||
196 | |||
197 | ret = bio_add_page(bio, page, PAGE_SIZE, 0); | ||
198 | if (!ret) | ||
199 | goto nomem; | ||
200 | } | ||
201 | WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO); | ||
202 | 175 | ||
203 | if (i != SCRUB_BIOS_PER_DEV-1) | 176 | if (i != SCRUB_BIOS_PER_DEV-1) |
204 | sdev->bios[i]->next_free = i + 1; | 177 | sdev->bios[i]->next_free = i + 1; |
@@ -369,9 +342,6 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, | |||
369 | int ret; | 342 | int ret; |
370 | DECLARE_COMPLETION_ONSTACK(complete); | 343 | DECLARE_COMPLETION_ONSTACK(complete); |
371 | 344 | ||
372 | /* we are going to wait on this IO */ | ||
373 | rw |= REQ_SYNC; | ||
374 | |||
375 | bio = bio_alloc(GFP_NOFS, 1); | 345 | bio = bio_alloc(GFP_NOFS, 1); |
376 | bio->bi_bdev = bdev; | 346 | bio->bi_bdev = bdev; |
377 | bio->bi_sector = sector; | 347 | bio->bi_sector = sector; |
@@ -380,6 +350,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, | |||
380 | bio->bi_private = &complete; | 350 | bio->bi_private = &complete; |
381 | submit_bio(rw, bio); | 351 | submit_bio(rw, bio); |
382 | 352 | ||
353 | /* this will also unplug the queue */ | ||
383 | wait_for_completion(&complete); | 354 | wait_for_completion(&complete); |
384 | 355 | ||
385 | ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); | 356 | ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); |
@@ -394,6 +365,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) | |||
394 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | 365 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; |
395 | 366 | ||
396 | sbio->err = err; | 367 | sbio->err = err; |
368 | sbio->bio = bio; | ||
397 | 369 | ||
398 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); | 370 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); |
399 | } | 371 | } |
@@ -453,6 +425,8 @@ static void scrub_checksum(struct btrfs_work *work) | |||
453 | } | 425 | } |
454 | 426 | ||
455 | out: | 427 | out: |
428 | scrub_free_bio(sbio->bio); | ||
429 | sbio->bio = NULL; | ||
456 | spin_lock(&sdev->list_lock); | 430 | spin_lock(&sdev->list_lock); |
457 | sbio->next_free = sdev->first_free; | 431 | sbio->next_free = sdev->first_free; |
458 | sdev->first_free = sbio->index; | 432 | sdev->first_free = sbio->index; |
@@ -583,25 +557,50 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) | |||
583 | static int scrub_submit(struct scrub_dev *sdev) | 557 | static int scrub_submit(struct scrub_dev *sdev) |
584 | { | 558 | { |
585 | struct scrub_bio *sbio; | 559 | struct scrub_bio *sbio; |
560 | struct bio *bio; | ||
561 | int i; | ||
586 | 562 | ||
587 | if (sdev->curr == -1) | 563 | if (sdev->curr == -1) |
588 | return 0; | 564 | return 0; |
589 | 565 | ||
590 | sbio = sdev->bios[sdev->curr]; | 566 | sbio = sdev->bios[sdev->curr]; |
591 | 567 | ||
592 | sbio->bio->bi_sector = sbio->physical >> 9; | 568 | bio = bio_alloc(GFP_NOFS, sbio->count); |
593 | sbio->bio->bi_size = sbio->count * PAGE_SIZE; | 569 | if (!bio) |
594 | sbio->bio->bi_next = NULL; | 570 | goto nomem; |
595 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; | 571 | |
596 | sbio->bio->bi_comp_cpu = -1; | 572 | bio->bi_private = sbio; |
597 | sbio->bio->bi_bdev = sdev->dev->bdev; | 573 | bio->bi_end_io = scrub_bio_end_io; |
574 | bio->bi_bdev = sdev->dev->bdev; | ||
575 | bio->bi_sector = sbio->physical >> 9; | ||
576 | |||
577 | for (i = 0; i < sbio->count; ++i) { | ||
578 | struct page *page; | ||
579 | int ret; | ||
580 | |||
581 | page = alloc_page(GFP_NOFS); | ||
582 | if (!page) | ||
583 | goto nomem; | ||
584 | |||
585 | ret = bio_add_page(bio, page, PAGE_SIZE, 0); | ||
586 | if (!ret) { | ||
587 | __free_page(page); | ||
588 | goto nomem; | ||
589 | } | ||
590 | } | ||
591 | |||
598 | sbio->err = 0; | 592 | sbio->err = 0; |
599 | sdev->curr = -1; | 593 | sdev->curr = -1; |
600 | atomic_inc(&sdev->in_flight); | 594 | atomic_inc(&sdev->in_flight); |
601 | 595 | ||
602 | submit_bio(0, sbio->bio); | 596 | submit_bio(READ, bio); |
603 | 597 | ||
604 | return 0; | 598 | return 0; |
599 | |||
600 | nomem: | ||
601 | scrub_free_bio(bio); | ||
602 | |||
603 | return -ENOMEM; | ||
605 | } | 604 | } |
606 | 605 | ||
607 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, | 606 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, |
@@ -633,7 +632,11 @@ again: | |||
633 | sbio->logical = logical; | 632 | sbio->logical = logical; |
634 | } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || | 633 | } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || |
635 | sbio->logical + sbio->count * PAGE_SIZE != logical) { | 634 | sbio->logical + sbio->count * PAGE_SIZE != logical) { |
636 | scrub_submit(sdev); | 635 | int ret; |
636 | |||
637 | ret = scrub_submit(sdev); | ||
638 | if (ret) | ||
639 | return ret; | ||
637 | goto again; | 640 | goto again; |
638 | } | 641 | } |
639 | sbio->spag[sbio->count].flags = flags; | 642 | sbio->spag[sbio->count].flags = flags; |
@@ -645,8 +648,13 @@ again: | |||
645 | memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); | 648 | memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); |
646 | } | 649 | } |
647 | ++sbio->count; | 650 | ++sbio->count; |
648 | if (sbio->count == SCRUB_PAGES_PER_BIO || force) | 651 | if (sbio->count == SCRUB_PAGES_PER_BIO || force) { |
649 | scrub_submit(sdev); | 652 | int ret; |
653 | |||
654 | ret = scrub_submit(sdev); | ||
655 | if (ret) | ||
656 | return ret; | ||
657 | } | ||
650 | 658 | ||
651 | return 0; | 659 | return 0; |
652 | } | 660 | } |
@@ -727,6 +735,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
727 | struct btrfs_root *root = fs_info->extent_root; | 735 | struct btrfs_root *root = fs_info->extent_root; |
728 | struct btrfs_root *csum_root = fs_info->csum_root; | 736 | struct btrfs_root *csum_root = fs_info->csum_root; |
729 | struct btrfs_extent_item *extent; | 737 | struct btrfs_extent_item *extent; |
738 | struct blk_plug plug; | ||
730 | u64 flags; | 739 | u64 flags; |
731 | int ret; | 740 | int ret; |
732 | int slot; | 741 | int slot; |
@@ -789,18 +798,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
789 | 798 | ||
790 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 799 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
791 | if (ret < 0) | 800 | if (ret < 0) |
792 | goto out; | 801 | goto out_noplug; |
793 | |||
794 | l = path->nodes[0]; | ||
795 | slot = path->slots[0]; | ||
796 | btrfs_item_key_to_cpu(l, &key, slot); | ||
797 | if (key.objectid != logical) { | ||
798 | ret = btrfs_previous_item(root, path, 0, | ||
799 | BTRFS_EXTENT_ITEM_KEY); | ||
800 | if (ret < 0) | ||
801 | goto out; | ||
802 | } | ||
803 | 802 | ||
803 | /* | ||
804 | * we might miss half an extent here, but that doesn't matter, | ||
805 | * as it's only the prefetch | ||
806 | */ | ||
804 | while (1) { | 807 | while (1) { |
805 | l = path->nodes[0]; | 808 | l = path->nodes[0]; |
806 | slot = path->slots[0]; | 809 | slot = path->slots[0]; |
@@ -809,7 +812,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
809 | if (ret == 0) | 812 | if (ret == 0) |
810 | continue; | 813 | continue; |
811 | if (ret < 0) | 814 | if (ret < 0) |
812 | goto out; | 815 | goto out_noplug; |
813 | 816 | ||
814 | break; | 817 | break; |
815 | } | 818 | } |
@@ -831,6 +834,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
831 | * the scrub. This might currently (crc32) end up to be about 1MB | 834 | * the scrub. This might currently (crc32) end up to be about 1MB |
832 | */ | 835 | */ |
833 | start_stripe = 0; | 836 | start_stripe = 0; |
837 | blk_start_plug(&plug); | ||
834 | again: | 838 | again: |
835 | logical = base + offset + start_stripe * increment; | 839 | logical = base + offset + start_stripe * increment; |
836 | for (i = start_stripe; i < nstripes; ++i) { | 840 | for (i = start_stripe; i < nstripes; ++i) { |
@@ -890,15 +894,20 @@ again: | |||
890 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 894 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
891 | if (ret < 0) | 895 | if (ret < 0) |
892 | goto out; | 896 | goto out; |
893 | 897 | if (ret > 0) { | |
894 | l = path->nodes[0]; | ||
895 | slot = path->slots[0]; | ||
896 | btrfs_item_key_to_cpu(l, &key, slot); | ||
897 | if (key.objectid != logical) { | ||
898 | ret = btrfs_previous_item(root, path, 0, | 898 | ret = btrfs_previous_item(root, path, 0, |
899 | BTRFS_EXTENT_ITEM_KEY); | 899 | BTRFS_EXTENT_ITEM_KEY); |
900 | if (ret < 0) | 900 | if (ret < 0) |
901 | goto out; | 901 | goto out; |
902 | if (ret > 0) { | ||
903 | /* there's no smaller item, so stick with the | ||
904 | * larger one */ | ||
905 | btrfs_release_path(path); | ||
906 | ret = btrfs_search_slot(NULL, root, &key, | ||
907 | path, 0, 0); | ||
908 | if (ret < 0) | ||
909 | goto out; | ||
910 | } | ||
902 | } | 911 | } |
903 | 912 | ||
904 | while (1) { | 913 | while (1) { |
@@ -972,6 +981,8 @@ next: | |||
972 | scrub_submit(sdev); | 981 | scrub_submit(sdev); |
973 | 982 | ||
974 | out: | 983 | out: |
984 | blk_finish_plug(&plug); | ||
985 | out_noplug: | ||
975 | btrfs_free_path(path); | 986 | btrfs_free_path(path); |
976 | return ret < 0 ? ret : 0; | 987 | return ret < 0 ? ret : 0; |
977 | } | 988 | } |
@@ -1047,8 +1058,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
1047 | while (1) { | 1058 | while (1) { |
1048 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 1059 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
1049 | if (ret < 0) | 1060 | if (ret < 0) |
1050 | goto out; | 1061 | break; |
1051 | ret = 0; | 1062 | if (ret > 0) { |
1063 | if (path->slots[0] >= | ||
1064 | btrfs_header_nritems(path->nodes[0])) { | ||
1065 | ret = btrfs_next_leaf(root, path); | ||
1066 | if (ret) | ||
1067 | break; | ||
1068 | } | ||
1069 | } | ||
1052 | 1070 | ||
1053 | l = path->nodes[0]; | 1071 | l = path->nodes[0]; |
1054 | slot = path->slots[0]; | 1072 | slot = path->slots[0]; |
@@ -1058,7 +1076,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
1058 | if (found_key.objectid != sdev->dev->devid) | 1076 | if (found_key.objectid != sdev->dev->devid) |
1059 | break; | 1077 | break; |
1060 | 1078 | ||
1061 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | 1079 | if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) |
1062 | break; | 1080 | break; |
1063 | 1081 | ||
1064 | if (found_key.offset >= end) | 1082 | if (found_key.offset >= end) |
@@ -1087,7 +1105,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
1087 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); | 1105 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); |
1088 | if (!cache) { | 1106 | if (!cache) { |
1089 | ret = -ENOENT; | 1107 | ret = -ENOENT; |
1090 | goto out; | 1108 | break; |
1091 | } | 1109 | } |
1092 | ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, | 1110 | ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, |
1093 | chunk_offset, length); | 1111 | chunk_offset, length); |
@@ -1099,9 +1117,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
1099 | btrfs_release_path(path); | 1117 | btrfs_release_path(path); |
1100 | } | 1118 | } |
1101 | 1119 | ||
1102 | out: | ||
1103 | btrfs_free_path(path); | 1120 | btrfs_free_path(path); |
1104 | return ret; | 1121 | |
1122 | /* | ||
1123 | * ret can still be 1 from search_slot or next_leaf, | ||
1124 | * that's not an error | ||
1125 | */ | ||
1126 | return ret < 0 ? ret : 0; | ||
1105 | } | 1127 | } |
1106 | 1128 | ||
1107 | static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | 1129 | static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) |
@@ -1138,8 +1160,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) | |||
1138 | struct btrfs_fs_info *fs_info = root->fs_info; | 1160 | struct btrfs_fs_info *fs_info = root->fs_info; |
1139 | 1161 | ||
1140 | mutex_lock(&fs_info->scrub_lock); | 1162 | mutex_lock(&fs_info->scrub_lock); |
1141 | if (fs_info->scrub_workers_refcnt == 0) | 1163 | if (fs_info->scrub_workers_refcnt == 0) { |
1164 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | ||
1165 | fs_info->thread_pool_size, &fs_info->generic_worker); | ||
1166 | fs_info->scrub_workers.idle_thresh = 4; | ||
1142 | btrfs_start_workers(&fs_info->scrub_workers, 1); | 1167 | btrfs_start_workers(&fs_info->scrub_workers, 1); |
1168 | } | ||
1143 | ++fs_info->scrub_workers_refcnt; | 1169 | ++fs_info->scrub_workers_refcnt; |
1144 | mutex_unlock(&fs_info->scrub_lock); | 1170 | mutex_unlock(&fs_info->scrub_lock); |
1145 | 1171 | ||
@@ -1166,7 +1192,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | |||
1166 | int ret; | 1192 | int ret; |
1167 | struct btrfs_device *dev; | 1193 | struct btrfs_device *dev; |
1168 | 1194 | ||
1169 | if (root->fs_info->closing) | 1195 | if (btrfs_fs_closing(root->fs_info)) |
1170 | return -EINVAL; | 1196 | return -EINVAL; |
1171 | 1197 | ||
1172 | /* | 1198 | /* |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9b2e7e5bc3ef..0bb4ebbb71b7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -161,7 +161,8 @@ enum { | |||
161 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, | 161 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
162 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, | 162 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
163 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, | 163 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, |
164 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err, | 164 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, |
165 | Opt_inode_cache, Opt_err, | ||
165 | }; | 166 | }; |
166 | 167 | ||
167 | static match_table_t tokens = { | 168 | static match_table_t tokens = { |
@@ -193,6 +194,7 @@ static match_table_t tokens = { | |||
193 | {Opt_enospc_debug, "enospc_debug"}, | 194 | {Opt_enospc_debug, "enospc_debug"}, |
194 | {Opt_subvolrootid, "subvolrootid=%d"}, | 195 | {Opt_subvolrootid, "subvolrootid=%d"}, |
195 | {Opt_defrag, "autodefrag"}, | 196 | {Opt_defrag, "autodefrag"}, |
197 | {Opt_inode_cache, "inode_cache"}, | ||
196 | {Opt_err, NULL}, | 198 | {Opt_err, NULL}, |
197 | }; | 199 | }; |
198 | 200 | ||
@@ -361,6 +363,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
361 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); | 363 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); |
362 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | 364 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); |
363 | break; | 365 | break; |
366 | case Opt_inode_cache: | ||
367 | printk(KERN_INFO "btrfs: enabling inode map caching\n"); | ||
368 | btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); | ||
369 | break; | ||
364 | case Opt_clear_cache: | 370 | case Opt_clear_cache: |
365 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); | 371 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); |
366 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); | 372 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); |
@@ -819,7 +825,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
819 | } else { | 825 | } else { |
820 | char b[BDEVNAME_SIZE]; | 826 | char b[BDEVNAME_SIZE]; |
821 | 827 | ||
822 | s->s_flags = flags; | 828 | s->s_flags = flags | MS_NOSEC; |
823 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); | 829 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); |
824 | error = btrfs_fill_super(s, fs_devices, data, | 830 | error = btrfs_fill_super(s, fs_devices, data, |
825 | flags & MS_SILENT ? 1 : 0); | 831 | flags & MS_SILENT ? 1 : 0); |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index c3c223ae6691..daac9ae6d731 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -28,152 +28,6 @@ | |||
28 | #include "disk-io.h" | 28 | #include "disk-io.h" |
29 | #include "transaction.h" | 29 | #include "transaction.h" |
30 | 30 | ||
31 | static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) | ||
32 | { | ||
33 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
34 | (unsigned long long)btrfs_root_used(&root->root_item)); | ||
35 | } | ||
36 | |||
37 | static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) | ||
38 | { | ||
39 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
40 | (unsigned long long)btrfs_root_limit(&root->root_item)); | ||
41 | } | ||
42 | |||
43 | static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) | ||
44 | { | ||
45 | |||
46 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
47 | (unsigned long long)btrfs_super_bytes_used(&fs->super_copy)); | ||
48 | } | ||
49 | |||
50 | static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) | ||
51 | { | ||
52 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
53 | (unsigned long long)btrfs_super_total_bytes(&fs->super_copy)); | ||
54 | } | ||
55 | |||
56 | static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) | ||
57 | { | ||
58 | return snprintf(buf, PAGE_SIZE, "%llu\n", | ||
59 | (unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); | ||
60 | } | ||
61 | |||
62 | /* this is for root attrs (subvols/snapshots) */ | ||
63 | struct btrfs_root_attr { | ||
64 | struct attribute attr; | ||
65 | ssize_t (*show)(struct btrfs_root *, char *); | ||
66 | ssize_t (*store)(struct btrfs_root *, const char *, size_t); | ||
67 | }; | ||
68 | |||
69 | #define ROOT_ATTR(name, mode, show, store) \ | ||
70 | static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \ | ||
71 | show, store) | ||
72 | |||
73 | ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); | ||
74 | ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); | ||
75 | |||
76 | static struct attribute *btrfs_root_attrs[] = { | ||
77 | &btrfs_root_attr_blocks_used.attr, | ||
78 | &btrfs_root_attr_block_limit.attr, | ||
79 | NULL, | ||
80 | }; | ||
81 | |||
82 | /* this is for super attrs (actual full fs) */ | ||
83 | struct btrfs_super_attr { | ||
84 | struct attribute attr; | ||
85 | ssize_t (*show)(struct btrfs_fs_info *, char *); | ||
86 | ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t); | ||
87 | }; | ||
88 | |||
89 | #define SUPER_ATTR(name, mode, show, store) \ | ||
90 | static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \ | ||
91 | show, store) | ||
92 | |||
93 | SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); | ||
94 | SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); | ||
95 | SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL); | ||
96 | |||
97 | static struct attribute *btrfs_super_attrs[] = { | ||
98 | &btrfs_super_attr_blocks_used.attr, | ||
99 | &btrfs_super_attr_total_blocks.attr, | ||
100 | &btrfs_super_attr_blocksize.attr, | ||
101 | NULL, | ||
102 | }; | ||
103 | |||
104 | static ssize_t btrfs_super_attr_show(struct kobject *kobj, | ||
105 | struct attribute *attr, char *buf) | ||
106 | { | ||
107 | struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, | ||
108 | super_kobj); | ||
109 | struct btrfs_super_attr *a = container_of(attr, | ||
110 | struct btrfs_super_attr, | ||
111 | attr); | ||
112 | |||
113 | return a->show ? a->show(fs, buf) : 0; | ||
114 | } | ||
115 | |||
116 | static ssize_t btrfs_super_attr_store(struct kobject *kobj, | ||
117 | struct attribute *attr, | ||
118 | const char *buf, size_t len) | ||
119 | { | ||
120 | struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, | ||
121 | super_kobj); | ||
122 | struct btrfs_super_attr *a = container_of(attr, | ||
123 | struct btrfs_super_attr, | ||
124 | attr); | ||
125 | |||
126 | return a->store ? a->store(fs, buf, len) : 0; | ||
127 | } | ||
128 | |||
129 | static ssize_t btrfs_root_attr_show(struct kobject *kobj, | ||
130 | struct attribute *attr, char *buf) | ||
131 | { | ||
132 | struct btrfs_root *root = container_of(kobj, struct btrfs_root, | ||
133 | root_kobj); | ||
134 | struct btrfs_root_attr *a = container_of(attr, | ||
135 | struct btrfs_root_attr, | ||
136 | attr); | ||
137 | |||
138 | return a->show ? a->show(root, buf) : 0; | ||
139 | } | ||
140 | |||
141 | static ssize_t btrfs_root_attr_store(struct kobject *kobj, | ||
142 | struct attribute *attr, | ||
143 | const char *buf, size_t len) | ||
144 | { | ||
145 | struct btrfs_root *root = container_of(kobj, struct btrfs_root, | ||
146 | root_kobj); | ||
147 | struct btrfs_root_attr *a = container_of(attr, | ||
148 | struct btrfs_root_attr, | ||
149 | attr); | ||
150 | return a->store ? a->store(root, buf, len) : 0; | ||
151 | } | ||
152 | |||
153 | static void btrfs_super_release(struct kobject *kobj) | ||
154 | { | ||
155 | struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, | ||
156 | super_kobj); | ||
157 | complete(&fs->kobj_unregister); | ||
158 | } | ||
159 | |||
160 | static void btrfs_root_release(struct kobject *kobj) | ||
161 | { | ||
162 | struct btrfs_root *root = container_of(kobj, struct btrfs_root, | ||
163 | root_kobj); | ||
164 | complete(&root->kobj_unregister); | ||
165 | } | ||
166 | |||
167 | static const struct sysfs_ops btrfs_super_attr_ops = { | ||
168 | .show = btrfs_super_attr_show, | ||
169 | .store = btrfs_super_attr_store, | ||
170 | }; | ||
171 | |||
172 | static const struct sysfs_ops btrfs_root_attr_ops = { | ||
173 | .show = btrfs_root_attr_show, | ||
174 | .store = btrfs_root_attr_store, | ||
175 | }; | ||
176 | |||
177 | /* /sys/fs/btrfs/ entry */ | 31 | /* /sys/fs/btrfs/ entry */ |
178 | static struct kset *btrfs_kset; | 32 | static struct kset *btrfs_kset; |
179 | 33 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dc80f7156923..51dcec86757f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -35,6 +35,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) | |||
35 | { | 35 | { |
36 | WARN_ON(atomic_read(&transaction->use_count) == 0); | 36 | WARN_ON(atomic_read(&transaction->use_count) == 0); |
37 | if (atomic_dec_and_test(&transaction->use_count)) { | 37 | if (atomic_dec_and_test(&transaction->use_count)) { |
38 | BUG_ON(!list_empty(&transaction->list)); | ||
38 | memset(transaction, 0, sizeof(*transaction)); | 39 | memset(transaction, 0, sizeof(*transaction)); |
39 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 40 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
40 | } | 41 | } |
@@ -49,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
49 | /* | 50 | /* |
50 | * either allocate a new transaction or hop into the existing one | 51 | * either allocate a new transaction or hop into the existing one |
51 | */ | 52 | */ |
52 | static noinline int join_transaction(struct btrfs_root *root) | 53 | static noinline int join_transaction(struct btrfs_root *root, int nofail) |
53 | { | 54 | { |
54 | struct btrfs_transaction *cur_trans; | 55 | struct btrfs_transaction *cur_trans; |
56 | |||
57 | spin_lock(&root->fs_info->trans_lock); | ||
58 | if (root->fs_info->trans_no_join) { | ||
59 | if (!nofail) { | ||
60 | spin_unlock(&root->fs_info->trans_lock); | ||
61 | return -EBUSY; | ||
62 | } | ||
63 | } | ||
64 | |||
55 | cur_trans = root->fs_info->running_transaction; | 65 | cur_trans = root->fs_info->running_transaction; |
56 | if (!cur_trans) { | 66 | if (cur_trans) { |
57 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, | 67 | atomic_inc(&cur_trans->use_count); |
58 | GFP_NOFS); | 68 | atomic_inc(&cur_trans->num_writers); |
59 | if (!cur_trans) | 69 | cur_trans->num_joined++; |
60 | return -ENOMEM; | 70 | spin_unlock(&root->fs_info->trans_lock); |
61 | root->fs_info->generation++; | 71 | return 0; |
62 | atomic_set(&cur_trans->num_writers, 1); | 72 | } |
63 | cur_trans->num_joined = 0; | 73 | spin_unlock(&root->fs_info->trans_lock); |
64 | cur_trans->transid = root->fs_info->generation; | 74 | |
65 | init_waitqueue_head(&cur_trans->writer_wait); | 75 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
66 | init_waitqueue_head(&cur_trans->commit_wait); | 76 | if (!cur_trans) |
67 | cur_trans->in_commit = 0; | 77 | return -ENOMEM; |
68 | cur_trans->blocked = 0; | 78 | spin_lock(&root->fs_info->trans_lock); |
69 | atomic_set(&cur_trans->use_count, 1); | 79 | if (root->fs_info->running_transaction) { |
70 | cur_trans->commit_done = 0; | 80 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
71 | cur_trans->start_time = get_seconds(); | 81 | cur_trans = root->fs_info->running_transaction; |
72 | 82 | atomic_inc(&cur_trans->use_count); | |
73 | cur_trans->delayed_refs.root = RB_ROOT; | ||
74 | cur_trans->delayed_refs.num_entries = 0; | ||
75 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
76 | cur_trans->delayed_refs.num_heads = 0; | ||
77 | cur_trans->delayed_refs.flushing = 0; | ||
78 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
79 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
80 | |||
81 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
82 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
83 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
84 | root->fs_info->btree_inode->i_mapping); | ||
85 | spin_lock(&root->fs_info->new_trans_lock); | ||
86 | root->fs_info->running_transaction = cur_trans; | ||
87 | spin_unlock(&root->fs_info->new_trans_lock); | ||
88 | } else { | ||
89 | atomic_inc(&cur_trans->num_writers); | 83 | atomic_inc(&cur_trans->num_writers); |
90 | cur_trans->num_joined++; | 84 | cur_trans->num_joined++; |
85 | spin_unlock(&root->fs_info->trans_lock); | ||
86 | return 0; | ||
91 | } | 87 | } |
88 | atomic_set(&cur_trans->num_writers, 1); | ||
89 | cur_trans->num_joined = 0; | ||
90 | init_waitqueue_head(&cur_trans->writer_wait); | ||
91 | init_waitqueue_head(&cur_trans->commit_wait); | ||
92 | cur_trans->in_commit = 0; | ||
93 | cur_trans->blocked = 0; | ||
94 | /* | ||
95 | * One for this trans handle, one so it will live on until we | ||
96 | * commit the transaction. | ||
97 | */ | ||
98 | atomic_set(&cur_trans->use_count, 2); | ||
99 | cur_trans->commit_done = 0; | ||
100 | cur_trans->start_time = get_seconds(); | ||
101 | |||
102 | cur_trans->delayed_refs.root = RB_ROOT; | ||
103 | cur_trans->delayed_refs.num_entries = 0; | ||
104 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
105 | cur_trans->delayed_refs.num_heads = 0; | ||
106 | cur_trans->delayed_refs.flushing = 0; | ||
107 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
108 | spin_lock_init(&cur_trans->commit_lock); | ||
109 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
110 | |||
111 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
112 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
113 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
114 | root->fs_info->btree_inode->i_mapping); | ||
115 | root->fs_info->generation++; | ||
116 | cur_trans->transid = root->fs_info->generation; | ||
117 | root->fs_info->running_transaction = cur_trans; | ||
118 | spin_unlock(&root->fs_info->trans_lock); | ||
92 | 119 | ||
93 | return 0; | 120 | return 0; |
94 | } | 121 | } |
@@ -99,36 +126,82 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
99 | * to make sure the old root from before we joined the transaction is deleted | 126 | * to make sure the old root from before we joined the transaction is deleted |
100 | * when the transaction commits | 127 | * when the transaction commits |
101 | */ | 128 | */ |
102 | static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, | 129 | static int record_root_in_trans(struct btrfs_trans_handle *trans, |
103 | struct btrfs_root *root) | 130 | struct btrfs_root *root) |
104 | { | 131 | { |
105 | if (root->ref_cows && root->last_trans < trans->transid) { | 132 | if (root->ref_cows && root->last_trans < trans->transid) { |
106 | WARN_ON(root == root->fs_info->extent_root); | 133 | WARN_ON(root == root->fs_info->extent_root); |
107 | WARN_ON(root->commit_root != root->node); | 134 | WARN_ON(root->commit_root != root->node); |
108 | 135 | ||
136 | /* | ||
137 | * see below for in_trans_setup usage rules | ||
138 | * we have the reloc mutex held now, so there | ||
139 | * is only one writer in this function | ||
140 | */ | ||
141 | root->in_trans_setup = 1; | ||
142 | |||
143 | /* make sure readers find in_trans_setup before | ||
144 | * they find our root->last_trans update | ||
145 | */ | ||
146 | smp_wmb(); | ||
147 | |||
148 | spin_lock(&root->fs_info->fs_roots_radix_lock); | ||
149 | if (root->last_trans == trans->transid) { | ||
150 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | ||
151 | return 0; | ||
152 | } | ||
109 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 153 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
110 | (unsigned long)root->root_key.objectid, | 154 | (unsigned long)root->root_key.objectid, |
111 | BTRFS_ROOT_TRANS_TAG); | 155 | BTRFS_ROOT_TRANS_TAG); |
156 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | ||
112 | root->last_trans = trans->transid; | 157 | root->last_trans = trans->transid; |
158 | |||
159 | /* this is pretty tricky. We don't want to | ||
160 | * take the relocation lock in btrfs_record_root_in_trans | ||
161 | * unless we're really doing the first setup for this root in | ||
162 | * this transaction. | ||
163 | * | ||
164 | * Normally we'd use root->last_trans as a flag to decide | ||
165 | * if we want to take the expensive mutex. | ||
166 | * | ||
167 | * But, we have to set root->last_trans before we | ||
168 | * init the relocation root, otherwise, we trip over warnings | ||
169 | * in ctree.c. The solution used here is to flag ourselves | ||
170 | * with root->in_trans_setup. When this is 1, we're still | ||
171 | * fixing up the reloc trees and everyone must wait. | ||
172 | * | ||
173 | * When this is zero, they can trust root->last_trans and fly | ||
174 | * through btrfs_record_root_in_trans without having to take the | ||
175 | * lock. smp_wmb() makes sure that all the writes above are | ||
176 | * done before we pop in the zero below | ||
177 | */ | ||
113 | btrfs_init_reloc_root(trans, root); | 178 | btrfs_init_reloc_root(trans, root); |
179 | smp_wmb(); | ||
180 | root->in_trans_setup = 0; | ||
114 | } | 181 | } |
115 | return 0; | 182 | return 0; |
116 | } | 183 | } |
117 | 184 | ||
185 | |||
118 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 186 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
119 | struct btrfs_root *root) | 187 | struct btrfs_root *root) |
120 | { | 188 | { |
121 | if (!root->ref_cows) | 189 | if (!root->ref_cows) |
122 | return 0; | 190 | return 0; |
123 | 191 | ||
124 | mutex_lock(&root->fs_info->trans_mutex); | 192 | /* |
125 | if (root->last_trans == trans->transid) { | 193 | * see record_root_in_trans for comments about in_trans_setup usage |
126 | mutex_unlock(&root->fs_info->trans_mutex); | 194 | * and barriers |
195 | */ | ||
196 | smp_rmb(); | ||
197 | if (root->last_trans == trans->transid && | ||
198 | !root->in_trans_setup) | ||
127 | return 0; | 199 | return 0; |
128 | } | ||
129 | 200 | ||
201 | mutex_lock(&root->fs_info->reloc_mutex); | ||
130 | record_root_in_trans(trans, root); | 202 | record_root_in_trans(trans, root); |
131 | mutex_unlock(&root->fs_info->trans_mutex); | 203 | mutex_unlock(&root->fs_info->reloc_mutex); |
204 | |||
132 | return 0; | 205 | return 0; |
133 | } | 206 | } |
134 | 207 | ||
@@ -140,21 +213,23 @@ static void wait_current_trans(struct btrfs_root *root) | |||
140 | { | 213 | { |
141 | struct btrfs_transaction *cur_trans; | 214 | struct btrfs_transaction *cur_trans; |
142 | 215 | ||
216 | spin_lock(&root->fs_info->trans_lock); | ||
143 | cur_trans = root->fs_info->running_transaction; | 217 | cur_trans = root->fs_info->running_transaction; |
144 | if (cur_trans && cur_trans->blocked) { | 218 | if (cur_trans && cur_trans->blocked) { |
145 | DEFINE_WAIT(wait); | 219 | DEFINE_WAIT(wait); |
146 | atomic_inc(&cur_trans->use_count); | 220 | atomic_inc(&cur_trans->use_count); |
221 | spin_unlock(&root->fs_info->trans_lock); | ||
147 | while (1) { | 222 | while (1) { |
148 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 223 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, |
149 | TASK_UNINTERRUPTIBLE); | 224 | TASK_UNINTERRUPTIBLE); |
150 | if (!cur_trans->blocked) | 225 | if (!cur_trans->blocked) |
151 | break; | 226 | break; |
152 | mutex_unlock(&root->fs_info->trans_mutex); | ||
153 | schedule(); | 227 | schedule(); |
154 | mutex_lock(&root->fs_info->trans_mutex); | ||
155 | } | 228 | } |
156 | finish_wait(&root->fs_info->transaction_wait, &wait); | 229 | finish_wait(&root->fs_info->transaction_wait, &wait); |
157 | put_transaction(cur_trans); | 230 | put_transaction(cur_trans); |
231 | } else { | ||
232 | spin_unlock(&root->fs_info->trans_lock); | ||
158 | } | 233 | } |
159 | } | 234 | } |
160 | 235 | ||
@@ -167,10 +242,16 @@ enum btrfs_trans_type { | |||
167 | 242 | ||
168 | static int may_wait_transaction(struct btrfs_root *root, int type) | 243 | static int may_wait_transaction(struct btrfs_root *root, int type) |
169 | { | 244 | { |
170 | if (!root->fs_info->log_root_recovering && | 245 | if (root->fs_info->log_root_recovering) |
171 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | 246 | return 0; |
172 | type == TRANS_USERSPACE)) | 247 | |
248 | if (type == TRANS_USERSPACE) | ||
173 | return 1; | 249 | return 1; |
250 | |||
251 | if (type == TRANS_START && | ||
252 | !atomic_read(&root->fs_info->open_ioctl_trans)) | ||
253 | return 1; | ||
254 | |||
174 | return 0; | 255 | return 0; |
175 | } | 256 | } |
176 | 257 | ||
@@ -184,36 +265,44 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
184 | 265 | ||
185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 266 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
186 | return ERR_PTR(-EROFS); | 267 | return ERR_PTR(-EROFS); |
268 | |||
269 | if (current->journal_info) { | ||
270 | WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); | ||
271 | h = current->journal_info; | ||
272 | h->use_count++; | ||
273 | h->orig_rsv = h->block_rsv; | ||
274 | h->block_rsv = NULL; | ||
275 | goto got_it; | ||
276 | } | ||
187 | again: | 277 | again: |
188 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 278 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
189 | if (!h) | 279 | if (!h) |
190 | return ERR_PTR(-ENOMEM); | 280 | return ERR_PTR(-ENOMEM); |
191 | 281 | ||
192 | if (type != TRANS_JOIN_NOLOCK) | ||
193 | mutex_lock(&root->fs_info->trans_mutex); | ||
194 | if (may_wait_transaction(root, type)) | 282 | if (may_wait_transaction(root, type)) |
195 | wait_current_trans(root); | 283 | wait_current_trans(root); |
196 | 284 | ||
197 | ret = join_transaction(root); | 285 | do { |
286 | ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); | ||
287 | if (ret == -EBUSY) | ||
288 | wait_current_trans(root); | ||
289 | } while (ret == -EBUSY); | ||
290 | |||
198 | if (ret < 0) { | 291 | if (ret < 0) { |
199 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 292 | kmem_cache_free(btrfs_trans_handle_cachep, h); |
200 | if (type != TRANS_JOIN_NOLOCK) | ||
201 | mutex_unlock(&root->fs_info->trans_mutex); | ||
202 | return ERR_PTR(ret); | 293 | return ERR_PTR(ret); |
203 | } | 294 | } |
204 | 295 | ||
205 | cur_trans = root->fs_info->running_transaction; | 296 | cur_trans = root->fs_info->running_transaction; |
206 | atomic_inc(&cur_trans->use_count); | ||
207 | if (type != TRANS_JOIN_NOLOCK) | ||
208 | mutex_unlock(&root->fs_info->trans_mutex); | ||
209 | 297 | ||
210 | h->transid = cur_trans->transid; | 298 | h->transid = cur_trans->transid; |
211 | h->transaction = cur_trans; | 299 | h->transaction = cur_trans; |
212 | h->blocks_used = 0; | 300 | h->blocks_used = 0; |
213 | h->block_group = 0; | ||
214 | h->bytes_reserved = 0; | 301 | h->bytes_reserved = 0; |
215 | h->delayed_ref_updates = 0; | 302 | h->delayed_ref_updates = 0; |
303 | h->use_count = 1; | ||
216 | h->block_rsv = NULL; | 304 | h->block_rsv = NULL; |
305 | h->orig_rsv = NULL; | ||
217 | 306 | ||
218 | smp_mb(); | 307 | smp_mb(); |
219 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 308 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
@@ -241,11 +330,8 @@ again: | |||
241 | } | 330 | } |
242 | } | 331 | } |
243 | 332 | ||
244 | if (type != TRANS_JOIN_NOLOCK) | 333 | got_it: |
245 | mutex_lock(&root->fs_info->trans_mutex); | 334 | btrfs_record_root_in_trans(h, root); |
246 | record_root_in_trans(h, root); | ||
247 | if (type != TRANS_JOIN_NOLOCK) | ||
248 | mutex_unlock(&root->fs_info->trans_mutex); | ||
249 | 335 | ||
250 | if (!current->journal_info && type != TRANS_USERSPACE) | 336 | if (!current->journal_info && type != TRANS_USERSPACE) |
251 | current->journal_info = h; | 337 | current->journal_info = h; |
@@ -257,22 +343,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | |||
257 | { | 343 | { |
258 | return start_transaction(root, num_items, TRANS_START); | 344 | return start_transaction(root, num_items, TRANS_START); |
259 | } | 345 | } |
260 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 346 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) |
261 | int num_blocks) | ||
262 | { | 347 | { |
263 | return start_transaction(root, 0, TRANS_JOIN); | 348 | return start_transaction(root, 0, TRANS_JOIN); |
264 | } | 349 | } |
265 | 350 | ||
266 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, | 351 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) |
267 | int num_blocks) | ||
268 | { | 352 | { |
269 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK); | 353 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK); |
270 | } | 354 | } |
271 | 355 | ||
272 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 356 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) |
273 | int num_blocks) | ||
274 | { | 357 | { |
275 | return start_transaction(r, 0, TRANS_USERSPACE); | 358 | return start_transaction(root, 0, TRANS_USERSPACE); |
276 | } | 359 | } |
277 | 360 | ||
278 | /* wait for a transaction commit to be fully complete */ | 361 | /* wait for a transaction commit to be fully complete */ |
@@ -280,17 +363,13 @@ static noinline int wait_for_commit(struct btrfs_root *root, | |||
280 | struct btrfs_transaction *commit) | 363 | struct btrfs_transaction *commit) |
281 | { | 364 | { |
282 | DEFINE_WAIT(wait); | 365 | DEFINE_WAIT(wait); |
283 | mutex_lock(&root->fs_info->trans_mutex); | ||
284 | while (!commit->commit_done) { | 366 | while (!commit->commit_done) { |
285 | prepare_to_wait(&commit->commit_wait, &wait, | 367 | prepare_to_wait(&commit->commit_wait, &wait, |
286 | TASK_UNINTERRUPTIBLE); | 368 | TASK_UNINTERRUPTIBLE); |
287 | if (commit->commit_done) | 369 | if (commit->commit_done) |
288 | break; | 370 | break; |
289 | mutex_unlock(&root->fs_info->trans_mutex); | ||
290 | schedule(); | 371 | schedule(); |
291 | mutex_lock(&root->fs_info->trans_mutex); | ||
292 | } | 372 | } |
293 | mutex_unlock(&root->fs_info->trans_mutex); | ||
294 | finish_wait(&commit->commit_wait, &wait); | 373 | finish_wait(&commit->commit_wait, &wait); |
295 | return 0; | 374 | return 0; |
296 | } | 375 | } |
@@ -300,59 +379,56 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
300 | struct btrfs_transaction *cur_trans = NULL, *t; | 379 | struct btrfs_transaction *cur_trans = NULL, *t; |
301 | int ret; | 380 | int ret; |
302 | 381 | ||
303 | mutex_lock(&root->fs_info->trans_mutex); | ||
304 | |||
305 | ret = 0; | 382 | ret = 0; |
306 | if (transid) { | 383 | if (transid) { |
307 | if (transid <= root->fs_info->last_trans_committed) | 384 | if (transid <= root->fs_info->last_trans_committed) |
308 | goto out_unlock; | 385 | goto out; |
309 | 386 | ||
310 | /* find specified transaction */ | 387 | /* find specified transaction */ |
388 | spin_lock(&root->fs_info->trans_lock); | ||
311 | list_for_each_entry(t, &root->fs_info->trans_list, list) { | 389 | list_for_each_entry(t, &root->fs_info->trans_list, list) { |
312 | if (t->transid == transid) { | 390 | if (t->transid == transid) { |
313 | cur_trans = t; | 391 | cur_trans = t; |
392 | atomic_inc(&cur_trans->use_count); | ||
314 | break; | 393 | break; |
315 | } | 394 | } |
316 | if (t->transid > transid) | 395 | if (t->transid > transid) |
317 | break; | 396 | break; |
318 | } | 397 | } |
398 | spin_unlock(&root->fs_info->trans_lock); | ||
319 | ret = -EINVAL; | 399 | ret = -EINVAL; |
320 | if (!cur_trans) | 400 | if (!cur_trans) |
321 | goto out_unlock; /* bad transid */ | 401 | goto out; /* bad transid */ |
322 | } else { | 402 | } else { |
323 | /* find newest transaction that is committing | committed */ | 403 | /* find newest transaction that is committing | committed */ |
404 | spin_lock(&root->fs_info->trans_lock); | ||
324 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, | 405 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, |
325 | list) { | 406 | list) { |
326 | if (t->in_commit) { | 407 | if (t->in_commit) { |
327 | if (t->commit_done) | 408 | if (t->commit_done) |
328 | goto out_unlock; | 409 | break; |
329 | cur_trans = t; | 410 | cur_trans = t; |
411 | atomic_inc(&cur_trans->use_count); | ||
330 | break; | 412 | break; |
331 | } | 413 | } |
332 | } | 414 | } |
415 | spin_unlock(&root->fs_info->trans_lock); | ||
333 | if (!cur_trans) | 416 | if (!cur_trans) |
334 | goto out_unlock; /* nothing committing|committed */ | 417 | goto out; /* nothing committing|committed */ |
335 | } | 418 | } |
336 | 419 | ||
337 | atomic_inc(&cur_trans->use_count); | ||
338 | mutex_unlock(&root->fs_info->trans_mutex); | ||
339 | |||
340 | wait_for_commit(root, cur_trans); | 420 | wait_for_commit(root, cur_trans); |
341 | 421 | ||
342 | mutex_lock(&root->fs_info->trans_mutex); | ||
343 | put_transaction(cur_trans); | 422 | put_transaction(cur_trans); |
344 | ret = 0; | 423 | ret = 0; |
345 | out_unlock: | 424 | out: |
346 | mutex_unlock(&root->fs_info->trans_mutex); | ||
347 | return ret; | 425 | return ret; |
348 | } | 426 | } |
349 | 427 | ||
350 | void btrfs_throttle(struct btrfs_root *root) | 428 | void btrfs_throttle(struct btrfs_root *root) |
351 | { | 429 | { |
352 | mutex_lock(&root->fs_info->trans_mutex); | 430 | if (!atomic_read(&root->fs_info->open_ioctl_trans)) |
353 | if (!root->fs_info->open_ioctl_trans) | ||
354 | wait_current_trans(root); | 431 | wait_current_trans(root); |
355 | mutex_unlock(&root->fs_info->trans_mutex); | ||
356 | } | 432 | } |
357 | 433 | ||
358 | static int should_end_transaction(struct btrfs_trans_handle *trans, | 434 | static int should_end_transaction(struct btrfs_trans_handle *trans, |
@@ -370,6 +446,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
370 | struct btrfs_transaction *cur_trans = trans->transaction; | 446 | struct btrfs_transaction *cur_trans = trans->transaction; |
371 | int updates; | 447 | int updates; |
372 | 448 | ||
449 | smp_mb(); | ||
373 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | 450 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) |
374 | return 1; | 451 | return 1; |
375 | 452 | ||
@@ -388,6 +465,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
388 | struct btrfs_fs_info *info = root->fs_info; | 465 | struct btrfs_fs_info *info = root->fs_info; |
389 | int count = 0; | 466 | int count = 0; |
390 | 467 | ||
468 | if (--trans->use_count) { | ||
469 | trans->block_rsv = trans->orig_rsv; | ||
470 | return 0; | ||
471 | } | ||
472 | |||
391 | while (count < 4) { | 473 | while (count < 4) { |
392 | unsigned long cur = trans->delayed_ref_updates; | 474 | unsigned long cur = trans->delayed_ref_updates; |
393 | trans->delayed_ref_updates = 0; | 475 | trans->delayed_ref_updates = 0; |
@@ -410,9 +492,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
410 | 492 | ||
411 | btrfs_trans_release_metadata(trans, root); | 493 | btrfs_trans_release_metadata(trans, root); |
412 | 494 | ||
413 | if (lock && !root->fs_info->open_ioctl_trans && | 495 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
414 | should_end_transaction(trans, root)) | 496 | should_end_transaction(trans, root)) { |
415 | trans->transaction->blocked = 1; | 497 | trans->transaction->blocked = 1; |
498 | smp_wmb(); | ||
499 | } | ||
416 | 500 | ||
417 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | 501 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { |
418 | if (throttle) | 502 | if (throttle) |
@@ -703,9 +787,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
703 | */ | 787 | */ |
704 | int btrfs_add_dead_root(struct btrfs_root *root) | 788 | int btrfs_add_dead_root(struct btrfs_root *root) |
705 | { | 789 | { |
706 | mutex_lock(&root->fs_info->trans_mutex); | 790 | spin_lock(&root->fs_info->trans_lock); |
707 | list_add(&root->root_list, &root->fs_info->dead_roots); | 791 | list_add(&root->root_list, &root->fs_info->dead_roots); |
708 | mutex_unlock(&root->fs_info->trans_mutex); | 792 | spin_unlock(&root->fs_info->trans_lock); |
709 | return 0; | 793 | return 0; |
710 | } | 794 | } |
711 | 795 | ||
@@ -721,6 +805,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
721 | int ret; | 805 | int ret; |
722 | int err = 0; | 806 | int err = 0; |
723 | 807 | ||
808 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
724 | while (1) { | 809 | while (1) { |
725 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, | 810 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, |
726 | (void **)gang, 0, | 811 | (void **)gang, 0, |
@@ -733,6 +818,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
733 | radix_tree_tag_clear(&fs_info->fs_roots_radix, | 818 | radix_tree_tag_clear(&fs_info->fs_roots_radix, |
734 | (unsigned long)root->root_key.objectid, | 819 | (unsigned long)root->root_key.objectid, |
735 | BTRFS_ROOT_TRANS_TAG); | 820 | BTRFS_ROOT_TRANS_TAG); |
821 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
736 | 822 | ||
737 | btrfs_free_log(trans, root); | 823 | btrfs_free_log(trans, root); |
738 | btrfs_update_reloc_root(trans, root); | 824 | btrfs_update_reloc_root(trans, root); |
@@ -753,10 +839,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
753 | err = btrfs_update_root(trans, fs_info->tree_root, | 839 | err = btrfs_update_root(trans, fs_info->tree_root, |
754 | &root->root_key, | 840 | &root->root_key, |
755 | &root->root_item); | 841 | &root->root_item); |
842 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
756 | if (err) | 843 | if (err) |
757 | break; | 844 | break; |
758 | } | 845 | } |
759 | } | 846 | } |
847 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
760 | return err; | 848 | return err; |
761 | } | 849 | } |
762 | 850 | ||
@@ -786,7 +874,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
786 | btrfs_btree_balance_dirty(info->tree_root, nr); | 874 | btrfs_btree_balance_dirty(info->tree_root, nr); |
787 | cond_resched(); | 875 | cond_resched(); |
788 | 876 | ||
789 | if (root->fs_info->closing || ret != -EAGAIN) | 877 | if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) |
790 | break; | 878 | break; |
791 | } | 879 | } |
792 | root->defrag_running = 0; | 880 | root->defrag_running = 0; |
@@ -869,6 +957,15 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
869 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 957 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
870 | BUG_ON(ret); | 958 | BUG_ON(ret); |
871 | 959 | ||
960 | /* | ||
961 | * pull in the delayed directory update | ||
962 | * and the delayed inode item | ||
963 | * otherwise we corrupt the FS during | ||
964 | * snapshot | ||
965 | */ | ||
966 | ret = btrfs_run_delayed_items(trans, root); | ||
967 | BUG_ON(ret); | ||
968 | |||
872 | record_root_in_trans(trans, root); | 969 | record_root_in_trans(trans, root); |
873 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 970 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
874 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 971 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
@@ -930,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, | |||
930 | int ret; | 1027 | int ret; |
931 | 1028 | ||
932 | list_for_each_entry(pending, head, list) { | 1029 | list_for_each_entry(pending, head, list) { |
933 | /* | ||
934 | * We must deal with the delayed items before creating | ||
935 | * snapshots, or we will create a snapthot with inconsistent | ||
936 | * information. | ||
937 | */ | ||
938 | ret = btrfs_run_delayed_items(trans, fs_info->fs_root); | ||
939 | BUG_ON(ret); | ||
940 | |||
941 | ret = create_pending_snapshot(trans, fs_info, pending); | 1030 | ret = create_pending_snapshot(trans, fs_info, pending); |
942 | BUG_ON(ret); | 1031 | BUG_ON(ret); |
943 | } | 1032 | } |
@@ -967,20 +1056,20 @@ static void update_super_roots(struct btrfs_root *root) | |||
967 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | 1056 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) |
968 | { | 1057 | { |
969 | int ret = 0; | 1058 | int ret = 0; |
970 | spin_lock(&info->new_trans_lock); | 1059 | spin_lock(&info->trans_lock); |
971 | if (info->running_transaction) | 1060 | if (info->running_transaction) |
972 | ret = info->running_transaction->in_commit; | 1061 | ret = info->running_transaction->in_commit; |
973 | spin_unlock(&info->new_trans_lock); | 1062 | spin_unlock(&info->trans_lock); |
974 | return ret; | 1063 | return ret; |
975 | } | 1064 | } |
976 | 1065 | ||
977 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | 1066 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) |
978 | { | 1067 | { |
979 | int ret = 0; | 1068 | int ret = 0; |
980 | spin_lock(&info->new_trans_lock); | 1069 | spin_lock(&info->trans_lock); |
981 | if (info->running_transaction) | 1070 | if (info->running_transaction) |
982 | ret = info->running_transaction->blocked; | 1071 | ret = info->running_transaction->blocked; |
983 | spin_unlock(&info->new_trans_lock); | 1072 | spin_unlock(&info->trans_lock); |
984 | return ret; | 1073 | return ret; |
985 | } | 1074 | } |
986 | 1075 | ||
@@ -1004,9 +1093,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, | |||
1004 | &wait); | 1093 | &wait); |
1005 | break; | 1094 | break; |
1006 | } | 1095 | } |
1007 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1008 | schedule(); | 1096 | schedule(); |
1009 | mutex_lock(&root->fs_info->trans_mutex); | ||
1010 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); | 1097 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); |
1011 | } | 1098 | } |
1012 | } | 1099 | } |
@@ -1032,9 +1119,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | |||
1032 | &wait); | 1119 | &wait); |
1033 | break; | 1120 | break; |
1034 | } | 1121 | } |
1035 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1036 | schedule(); | 1122 | schedule(); |
1037 | mutex_lock(&root->fs_info->trans_mutex); | ||
1038 | finish_wait(&root->fs_info->transaction_wait, | 1123 | finish_wait(&root->fs_info->transaction_wait, |
1039 | &wait); | 1124 | &wait); |
1040 | } | 1125 | } |
@@ -1072,7 +1157,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1072 | 1157 | ||
1073 | INIT_DELAYED_WORK(&ac->work, do_async_commit); | 1158 | INIT_DELAYED_WORK(&ac->work, do_async_commit); |
1074 | ac->root = root; | 1159 | ac->root = root; |
1075 | ac->newtrans = btrfs_join_transaction(root, 0); | 1160 | ac->newtrans = btrfs_join_transaction(root); |
1076 | if (IS_ERR(ac->newtrans)) { | 1161 | if (IS_ERR(ac->newtrans)) { |
1077 | int err = PTR_ERR(ac->newtrans); | 1162 | int err = PTR_ERR(ac->newtrans); |
1078 | kfree(ac); | 1163 | kfree(ac); |
@@ -1080,23 +1165,22 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1080 | } | 1165 | } |
1081 | 1166 | ||
1082 | /* take transaction reference */ | 1167 | /* take transaction reference */ |
1083 | mutex_lock(&root->fs_info->trans_mutex); | ||
1084 | cur_trans = trans->transaction; | 1168 | cur_trans = trans->transaction; |
1085 | atomic_inc(&cur_trans->use_count); | 1169 | atomic_inc(&cur_trans->use_count); |
1086 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1087 | 1170 | ||
1088 | btrfs_end_transaction(trans, root); | 1171 | btrfs_end_transaction(trans, root); |
1089 | schedule_delayed_work(&ac->work, 0); | 1172 | schedule_delayed_work(&ac->work, 0); |
1090 | 1173 | ||
1091 | /* wait for transaction to start and unblock */ | 1174 | /* wait for transaction to start and unblock */ |
1092 | mutex_lock(&root->fs_info->trans_mutex); | ||
1093 | if (wait_for_unblock) | 1175 | if (wait_for_unblock) |
1094 | wait_current_trans_commit_start_and_unblock(root, cur_trans); | 1176 | wait_current_trans_commit_start_and_unblock(root, cur_trans); |
1095 | else | 1177 | else |
1096 | wait_current_trans_commit_start(root, cur_trans); | 1178 | wait_current_trans_commit_start(root, cur_trans); |
1097 | put_transaction(cur_trans); | ||
1098 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1099 | 1179 | ||
1180 | if (current->journal_info == trans) | ||
1181 | current->journal_info = NULL; | ||
1182 | |||
1183 | put_transaction(cur_trans); | ||
1100 | return 0; | 1184 | return 0; |
1101 | } | 1185 | } |
1102 | 1186 | ||
@@ -1139,38 +1223,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1139 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1223 | ret = btrfs_run_delayed_refs(trans, root, 0); |
1140 | BUG_ON(ret); | 1224 | BUG_ON(ret); |
1141 | 1225 | ||
1142 | mutex_lock(&root->fs_info->trans_mutex); | 1226 | spin_lock(&cur_trans->commit_lock); |
1143 | if (cur_trans->in_commit) { | 1227 | if (cur_trans->in_commit) { |
1228 | spin_unlock(&cur_trans->commit_lock); | ||
1144 | atomic_inc(&cur_trans->use_count); | 1229 | atomic_inc(&cur_trans->use_count); |
1145 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1146 | btrfs_end_transaction(trans, root); | 1230 | btrfs_end_transaction(trans, root); |
1147 | 1231 | ||
1148 | ret = wait_for_commit(root, cur_trans); | 1232 | ret = wait_for_commit(root, cur_trans); |
1149 | BUG_ON(ret); | 1233 | BUG_ON(ret); |
1150 | 1234 | ||
1151 | mutex_lock(&root->fs_info->trans_mutex); | ||
1152 | put_transaction(cur_trans); | 1235 | put_transaction(cur_trans); |
1153 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1154 | 1236 | ||
1155 | return 0; | 1237 | return 0; |
1156 | } | 1238 | } |
1157 | 1239 | ||
1158 | trans->transaction->in_commit = 1; | 1240 | trans->transaction->in_commit = 1; |
1159 | trans->transaction->blocked = 1; | 1241 | trans->transaction->blocked = 1; |
1242 | spin_unlock(&cur_trans->commit_lock); | ||
1160 | wake_up(&root->fs_info->transaction_blocked_wait); | 1243 | wake_up(&root->fs_info->transaction_blocked_wait); |
1161 | 1244 | ||
1245 | spin_lock(&root->fs_info->trans_lock); | ||
1162 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1246 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
1163 | prev_trans = list_entry(cur_trans->list.prev, | 1247 | prev_trans = list_entry(cur_trans->list.prev, |
1164 | struct btrfs_transaction, list); | 1248 | struct btrfs_transaction, list); |
1165 | if (!prev_trans->commit_done) { | 1249 | if (!prev_trans->commit_done) { |
1166 | atomic_inc(&prev_trans->use_count); | 1250 | atomic_inc(&prev_trans->use_count); |
1167 | mutex_unlock(&root->fs_info->trans_mutex); | 1251 | spin_unlock(&root->fs_info->trans_lock); |
1168 | 1252 | ||
1169 | wait_for_commit(root, prev_trans); | 1253 | wait_for_commit(root, prev_trans); |
1170 | 1254 | ||
1171 | mutex_lock(&root->fs_info->trans_mutex); | ||
1172 | put_transaction(prev_trans); | 1255 | put_transaction(prev_trans); |
1256 | } else { | ||
1257 | spin_unlock(&root->fs_info->trans_lock); | ||
1173 | } | 1258 | } |
1259 | } else { | ||
1260 | spin_unlock(&root->fs_info->trans_lock); | ||
1174 | } | 1261 | } |
1175 | 1262 | ||
1176 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | 1263 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) |
@@ -1178,12 +1265,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1178 | 1265 | ||
1179 | do { | 1266 | do { |
1180 | int snap_pending = 0; | 1267 | int snap_pending = 0; |
1268 | |||
1181 | joined = cur_trans->num_joined; | 1269 | joined = cur_trans->num_joined; |
1182 | if (!list_empty(&trans->transaction->pending_snapshots)) | 1270 | if (!list_empty(&trans->transaction->pending_snapshots)) |
1183 | snap_pending = 1; | 1271 | snap_pending = 1; |
1184 | 1272 | ||
1185 | WARN_ON(cur_trans != trans->transaction); | 1273 | WARN_ON(cur_trans != trans->transaction); |
1186 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1187 | 1274 | ||
1188 | if (flush_on_commit || snap_pending) { | 1275 | if (flush_on_commit || snap_pending) { |
1189 | btrfs_start_delalloc_inodes(root, 1); | 1276 | btrfs_start_delalloc_inodes(root, 1); |
@@ -1206,26 +1293,48 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1206 | prepare_to_wait(&cur_trans->writer_wait, &wait, | 1293 | prepare_to_wait(&cur_trans->writer_wait, &wait, |
1207 | TASK_UNINTERRUPTIBLE); | 1294 | TASK_UNINTERRUPTIBLE); |
1208 | 1295 | ||
1209 | smp_mb(); | ||
1210 | if (atomic_read(&cur_trans->num_writers) > 1) | 1296 | if (atomic_read(&cur_trans->num_writers) > 1) |
1211 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); | 1297 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); |
1212 | else if (should_grow) | 1298 | else if (should_grow) |
1213 | schedule_timeout(1); | 1299 | schedule_timeout(1); |
1214 | 1300 | ||
1215 | mutex_lock(&root->fs_info->trans_mutex); | ||
1216 | finish_wait(&cur_trans->writer_wait, &wait); | 1301 | finish_wait(&cur_trans->writer_wait, &wait); |
1217 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1302 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
1218 | (should_grow && cur_trans->num_joined != joined)); | 1303 | (should_grow && cur_trans->num_joined != joined)); |
1219 | 1304 | ||
1220 | ret = create_pending_snapshots(trans, root->fs_info); | 1305 | /* |
1221 | BUG_ON(ret); | 1306 | * Ok now we need to make sure to block out any other joins while we |
1307 | * commit the transaction. We could have started a join before setting | ||
1308 | * no_join so make sure to wait for num_writers to == 1 again. | ||
1309 | */ | ||
1310 | spin_lock(&root->fs_info->trans_lock); | ||
1311 | root->fs_info->trans_no_join = 1; | ||
1312 | spin_unlock(&root->fs_info->trans_lock); | ||
1313 | wait_event(cur_trans->writer_wait, | ||
1314 | atomic_read(&cur_trans->num_writers) == 1); | ||
1315 | |||
1316 | /* | ||
1317 | * the reloc mutex makes sure that we stop | ||
1318 | * the balancing code from coming in and moving | ||
1319 | * extents around in the middle of the commit | ||
1320 | */ | ||
1321 | mutex_lock(&root->fs_info->reloc_mutex); | ||
1222 | 1322 | ||
1223 | ret = btrfs_run_delayed_items(trans, root); | 1323 | ret = btrfs_run_delayed_items(trans, root); |
1224 | BUG_ON(ret); | 1324 | BUG_ON(ret); |
1225 | 1325 | ||
1326 | ret = create_pending_snapshots(trans, root->fs_info); | ||
1327 | BUG_ON(ret); | ||
1328 | |||
1226 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1329 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
1227 | BUG_ON(ret); | 1330 | BUG_ON(ret); |
1228 | 1331 | ||
1332 | /* | ||
1333 | * make sure none of the code above managed to slip in a | ||
1334 | * delayed item | ||
1335 | */ | ||
1336 | btrfs_assert_delayed_root_empty(root); | ||
1337 | |||
1229 | WARN_ON(cur_trans != trans->transaction); | 1338 | WARN_ON(cur_trans != trans->transaction); |
1230 | 1339 | ||
1231 | btrfs_scrub_pause(root); | 1340 | btrfs_scrub_pause(root); |
@@ -1258,9 +1367,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1258 | btrfs_prepare_extent_commit(trans, root); | 1367 | btrfs_prepare_extent_commit(trans, root); |
1259 | 1368 | ||
1260 | cur_trans = root->fs_info->running_transaction; | 1369 | cur_trans = root->fs_info->running_transaction; |
1261 | spin_lock(&root->fs_info->new_trans_lock); | ||
1262 | root->fs_info->running_transaction = NULL; | ||
1263 | spin_unlock(&root->fs_info->new_trans_lock); | ||
1264 | 1370 | ||
1265 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, | 1371 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, |
1266 | root->fs_info->tree_root->node); | 1372 | root->fs_info->tree_root->node); |
@@ -1281,10 +1387,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1281 | sizeof(root->fs_info->super_copy)); | 1387 | sizeof(root->fs_info->super_copy)); |
1282 | 1388 | ||
1283 | trans->transaction->blocked = 0; | 1389 | trans->transaction->blocked = 0; |
1390 | spin_lock(&root->fs_info->trans_lock); | ||
1391 | root->fs_info->running_transaction = NULL; | ||
1392 | root->fs_info->trans_no_join = 0; | ||
1393 | spin_unlock(&root->fs_info->trans_lock); | ||
1394 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
1284 | 1395 | ||
1285 | wake_up(&root->fs_info->transaction_wait); | 1396 | wake_up(&root->fs_info->transaction_wait); |
1286 | 1397 | ||
1287 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1288 | ret = btrfs_write_and_wait_transaction(trans, root); | 1398 | ret = btrfs_write_and_wait_transaction(trans, root); |
1289 | BUG_ON(ret); | 1399 | BUG_ON(ret); |
1290 | write_ctree_super(trans, root, 0); | 1400 | write_ctree_super(trans, root, 0); |
@@ -1297,22 +1407,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1297 | 1407 | ||
1298 | btrfs_finish_extent_commit(trans, root); | 1408 | btrfs_finish_extent_commit(trans, root); |
1299 | 1409 | ||
1300 | mutex_lock(&root->fs_info->trans_mutex); | ||
1301 | |||
1302 | cur_trans->commit_done = 1; | 1410 | cur_trans->commit_done = 1; |
1303 | 1411 | ||
1304 | root->fs_info->last_trans_committed = cur_trans->transid; | 1412 | root->fs_info->last_trans_committed = cur_trans->transid; |
1305 | 1413 | ||
1306 | wake_up(&cur_trans->commit_wait); | 1414 | wake_up(&cur_trans->commit_wait); |
1307 | 1415 | ||
1416 | spin_lock(&root->fs_info->trans_lock); | ||
1308 | list_del_init(&cur_trans->list); | 1417 | list_del_init(&cur_trans->list); |
1418 | spin_unlock(&root->fs_info->trans_lock); | ||
1419 | |||
1309 | put_transaction(cur_trans); | 1420 | put_transaction(cur_trans); |
1310 | put_transaction(cur_trans); | 1421 | put_transaction(cur_trans); |
1311 | 1422 | ||
1312 | trace_btrfs_transaction_commit(root); | 1423 | trace_btrfs_transaction_commit(root); |
1313 | 1424 | ||
1314 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1315 | |||
1316 | btrfs_scrub_continue(root); | 1425 | btrfs_scrub_continue(root); |
1317 | 1426 | ||
1318 | if (current->journal_info == trans) | 1427 | if (current->journal_info == trans) |
@@ -1334,9 +1443,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1334 | LIST_HEAD(list); | 1443 | LIST_HEAD(list); |
1335 | struct btrfs_fs_info *fs_info = root->fs_info; | 1444 | struct btrfs_fs_info *fs_info = root->fs_info; |
1336 | 1445 | ||
1337 | mutex_lock(&fs_info->trans_mutex); | 1446 | spin_lock(&fs_info->trans_lock); |
1338 | list_splice_init(&fs_info->dead_roots, &list); | 1447 | list_splice_init(&fs_info->dead_roots, &list); |
1339 | mutex_unlock(&fs_info->trans_mutex); | 1448 | spin_unlock(&fs_info->trans_lock); |
1340 | 1449 | ||
1341 | while (!list_empty(&list)) { | 1450 | while (!list_empty(&list)) { |
1342 | root = list_entry(list.next, struct btrfs_root, root_list); | 1451 | root = list_entry(list.next, struct btrfs_root, root_list); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 804c88639e5d..02564e6230ac 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -28,10 +28,12 @@ struct btrfs_transaction { | |||
28 | * transaction can end | 28 | * transaction can end |
29 | */ | 29 | */ |
30 | atomic_t num_writers; | 30 | atomic_t num_writers; |
31 | atomic_t use_count; | ||
31 | 32 | ||
32 | unsigned long num_joined; | 33 | unsigned long num_joined; |
34 | |||
35 | spinlock_t commit_lock; | ||
33 | int in_commit; | 36 | int in_commit; |
34 | atomic_t use_count; | ||
35 | int commit_done; | 37 | int commit_done; |
36 | int blocked; | 38 | int blocked; |
37 | struct list_head list; | 39 | struct list_head list; |
@@ -45,13 +47,14 @@ struct btrfs_transaction { | |||
45 | 47 | ||
46 | struct btrfs_trans_handle { | 48 | struct btrfs_trans_handle { |
47 | u64 transid; | 49 | u64 transid; |
48 | u64 block_group; | ||
49 | u64 bytes_reserved; | 50 | u64 bytes_reserved; |
51 | unsigned long use_count; | ||
50 | unsigned long blocks_reserved; | 52 | unsigned long blocks_reserved; |
51 | unsigned long blocks_used; | 53 | unsigned long blocks_used; |
52 | unsigned long delayed_ref_updates; | 54 | unsigned long delayed_ref_updates; |
53 | struct btrfs_transaction *transaction; | 55 | struct btrfs_transaction *transaction; |
54 | struct btrfs_block_rsv *block_rsv; | 56 | struct btrfs_block_rsv *block_rsv; |
57 | struct btrfs_block_rsv *orig_rsv; | ||
55 | }; | 58 | }; |
56 | 59 | ||
57 | struct btrfs_pending_snapshot { | 60 | struct btrfs_pending_snapshot { |
@@ -66,19 +69,6 @@ struct btrfs_pending_snapshot { | |||
66 | struct list_head list; | 69 | struct list_head list; |
67 | }; | 70 | }; |
68 | 71 | ||
69 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, | ||
70 | struct inode *inode) | ||
71 | { | ||
72 | trans->block_group = BTRFS_I(inode)->block_group; | ||
73 | } | ||
74 | |||
75 | static inline void btrfs_update_inode_block_group( | ||
76 | struct btrfs_trans_handle *trans, | ||
77 | struct inode *inode) | ||
78 | { | ||
79 | BTRFS_I(inode)->block_group = trans->block_group; | ||
80 | } | ||
81 | |||
82 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | 72 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, |
83 | struct inode *inode) | 73 | struct inode *inode) |
84 | { | 74 | { |
@@ -92,12 +82,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | |||
92 | struct btrfs_root *root); | 82 | struct btrfs_root *root); |
93 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 83 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
94 | int num_items); | 84 | int num_items); |
95 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 85 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); |
96 | int num_blocks); | 86 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); |
97 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, | 87 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); |
98 | int num_blocks); | ||
99 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | ||
100 | int num_blocks); | ||
101 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); | 88 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); |
102 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 89 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
103 | struct btrfs_root *root); | 90 | struct btrfs_root *root); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 592396c6dc47..4ce8a9f41d1e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -3177,7 +3177,7 @@ again: | |||
3177 | tmp_key.offset = (u64)-1; | 3177 | tmp_key.offset = (u64)-1; |
3178 | 3178 | ||
3179 | wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); | 3179 | wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); |
3180 | BUG_ON(!wc.replay_dest); | 3180 | BUG_ON(IS_ERR_OR_NULL(wc.replay_dest)); |
3181 | 3181 | ||
3182 | wc.replay_dest->log_root = log; | 3182 | wc.replay_dest->log_root = log; |
3183 | btrfs_record_root_in_trans(trans, wc.replay_dest); | 3183 | btrfs_record_root_in_trans(trans, wc.replay_dest); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c48214ef5c09..1efa56e18f9b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -504,7 +504,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
504 | BUG_ON(!new_device); | 504 | BUG_ON(!new_device); |
505 | memcpy(new_device, device, sizeof(*new_device)); | 505 | memcpy(new_device, device, sizeof(*new_device)); |
506 | new_device->name = kstrdup(device->name, GFP_NOFS); | 506 | new_device->name = kstrdup(device->name, GFP_NOFS); |
507 | BUG_ON(!new_device->name); | 507 | BUG_ON(device->name && !new_device->name); |
508 | new_device->bdev = NULL; | 508 | new_device->bdev = NULL; |
509 | new_device->writeable = 0; | 509 | new_device->writeable = 0; |
510 | new_device->in_fs_metadata = 0; | 510 | new_device->in_fs_metadata = 0; |
@@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
689 | transid = btrfs_super_generation(disk_super); | 689 | transid = btrfs_super_generation(disk_super); |
690 | if (disk_super->label[0]) | 690 | if (disk_super->label[0]) |
691 | printk(KERN_INFO "device label %s ", disk_super->label); | 691 | printk(KERN_INFO "device label %s ", disk_super->label); |
692 | else { | 692 | else |
693 | /* FIXME, make a readl uuid parser */ | 693 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); |
694 | printk(KERN_INFO "device fsid %llx-%llx ", | ||
695 | *(unsigned long long *)disk_super->fsid, | ||
696 | *(unsigned long long *)(disk_super->fsid + 8)); | ||
697 | } | ||
698 | printk(KERN_CONT "devid %llu transid %llu %s\n", | 694 | printk(KERN_CONT "devid %llu transid %llu %s\n", |
699 | (unsigned long long)devid, (unsigned long long)transid, path); | 695 | (unsigned long long)devid, (unsigned long long)transid, path); |
700 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 696 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index f3107e4b4d56..5366fe452ab0 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
158 | if (IS_ERR(trans)) | 158 | if (IS_ERR(trans)) |
159 | return PTR_ERR(trans); | 159 | return PTR_ERR(trans); |
160 | 160 | ||
161 | btrfs_set_trans_block_group(trans, inode); | ||
162 | |||
163 | ret = do_setxattr(trans, inode, name, value, size, flags); | 161 | ret = do_setxattr(trans, inode, name, value, size, flags); |
164 | if (ret) | 162 | if (ret) |
165 | goto out; | 163 | goto out; |
diff --git a/fs/buffer.c b/fs/buffer.c index 49c9aada0374..1a80b048ade8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1902,10 +1902,8 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, | |||
1902 | if (!buffer_uptodate(*wait_bh)) | 1902 | if (!buffer_uptodate(*wait_bh)) |
1903 | err = -EIO; | 1903 | err = -EIO; |
1904 | } | 1904 | } |
1905 | if (unlikely(err)) { | 1905 | if (unlikely(err)) |
1906 | page_zero_new_buffers(page, from, to); | 1906 | page_zero_new_buffers(page, from, to); |
1907 | ClearPageUptodate(page); | ||
1908 | } | ||
1909 | return err; | 1907 | return err; |
1910 | } | 1908 | } |
1911 | EXPORT_SYMBOL(__block_write_begin); | 1909 | EXPORT_SYMBOL(__block_write_begin); |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 33da49dc3cc6..5a3953db8118 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -453,7 +453,7 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) | |||
453 | int err; | 453 | int err; |
454 | struct inode *inode = page->mapping->host; | 454 | struct inode *inode = page->mapping->host; |
455 | BUG_ON(!inode); | 455 | BUG_ON(!inode); |
456 | igrab(inode); | 456 | ihold(inode); |
457 | err = writepage_nounlock(page, wbc); | 457 | err = writepage_nounlock(page, wbc); |
458 | unlock_page(page); | 458 | unlock_page(page); |
459 | iput(inode); | 459 | iput(inode); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 1f72b00447c4..f605753c8fe9 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2940,14 +2940,12 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | |||
2940 | while (!list_empty(&mdsc->cap_dirty)) { | 2940 | while (!list_empty(&mdsc->cap_dirty)) { |
2941 | ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, | 2941 | ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, |
2942 | i_dirty_item); | 2942 | i_dirty_item); |
2943 | inode = igrab(&ci->vfs_inode); | 2943 | inode = &ci->vfs_inode; |
2944 | ihold(inode); | ||
2944 | dout("flush_dirty_caps %p\n", inode); | 2945 | dout("flush_dirty_caps %p\n", inode); |
2945 | spin_unlock(&mdsc->cap_dirty_lock); | 2946 | spin_unlock(&mdsc->cap_dirty_lock); |
2946 | if (inode) { | 2947 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL); |
2947 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, | 2948 | iput(inode); |
2948 | NULL); | ||
2949 | iput(inode); | ||
2950 | } | ||
2951 | spin_lock(&mdsc->cap_dirty_lock); | 2949 | spin_lock(&mdsc->cap_dirty_lock); |
2952 | } | 2950 | } |
2953 | spin_unlock(&mdsc->cap_dirty_lock); | 2951 | spin_unlock(&mdsc->cap_dirty_lock); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 33729e822bb9..ef8f08c343e8 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -308,7 +308,8 @@ more: | |||
308 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); | 308 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); |
309 | if (IS_ERR(req)) | 309 | if (IS_ERR(req)) |
310 | return PTR_ERR(req); | 310 | return PTR_ERR(req); |
311 | req->r_inode = igrab(inode); | 311 | req->r_inode = inode; |
312 | ihold(inode); | ||
312 | req->r_dentry = dget(filp->f_dentry); | 313 | req->r_dentry = dget(filp->f_dentry); |
313 | /* hints to request -> mds selection code */ | 314 | /* hints to request -> mds selection code */ |
314 | req->r_direct_mode = USE_AUTH_MDS; | 315 | req->r_direct_mode = USE_AUTH_MDS; |
@@ -787,10 +788,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, | |||
787 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; | 788 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; |
788 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; | 789 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
789 | err = ceph_mdsc_do_request(mdsc, dir, req); | 790 | err = ceph_mdsc_do_request(mdsc, dir, req); |
790 | if (err) | 791 | if (err) { |
791 | d_drop(dentry); | 792 | d_drop(dentry); |
792 | else if (!req->r_reply_info.head->is_dentry) | 793 | } else if (!req->r_reply_info.head->is_dentry) { |
793 | d_instantiate(dentry, igrab(old_dentry->d_inode)); | 794 | ihold(old_dentry->d_inode); |
795 | d_instantiate(dentry, old_dentry->d_inode); | ||
796 | } | ||
794 | ceph_mdsc_put_request(req); | 797 | ceph_mdsc_put_request(req); |
795 | return err; | 798 | return err; |
796 | } | 799 | } |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index a610d3d67488..f67b687550de 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -109,7 +109,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
109 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 109 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
110 | inode = req->r_target_inode; | 110 | inode = req->r_target_inode; |
111 | if (inode) | 111 | if (inode) |
112 | igrab(inode); | 112 | ihold(inode); |
113 | ceph_mdsc_put_request(req); | 113 | ceph_mdsc_put_request(req); |
114 | if (!inode) | 114 | if (!inode) |
115 | return ERR_PTR(-ESTALE); | 115 | return ERR_PTR(-ESTALE); |
@@ -167,7 +167,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
167 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 167 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
168 | inode = req->r_target_inode; | 168 | inode = req->r_target_inode; |
169 | if (inode) | 169 | if (inode) |
170 | igrab(inode); | 170 | ihold(inode); |
171 | ceph_mdsc_put_request(req); | 171 | ceph_mdsc_put_request(req); |
172 | if (!inode) | 172 | if (!inode) |
173 | return ERR_PTR(err ? err : -ESTALE); | 173 | return ERR_PTR(err ? err : -ESTALE); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 203252d88d9f..9542f07d0b93 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -191,7 +191,8 @@ int ceph_open(struct inode *inode, struct file *file) | |||
191 | err = PTR_ERR(req); | 191 | err = PTR_ERR(req); |
192 | goto out; | 192 | goto out; |
193 | } | 193 | } |
194 | req->r_inode = igrab(inode); | 194 | req->r_inode = inode; |
195 | ihold(inode); | ||
195 | req->r_num_caps = 1; | 196 | req->r_num_caps = 1; |
196 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 197 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); |
197 | if (!err) | 198 | if (!err) |
@@ -282,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file) | |||
282 | static int striped_read(struct inode *inode, | 283 | static int striped_read(struct inode *inode, |
283 | u64 off, u64 len, | 284 | u64 off, u64 len, |
284 | struct page **pages, int num_pages, | 285 | struct page **pages, int num_pages, |
285 | int *checkeof, bool align_to_pages, | 286 | int *checkeof, bool o_direct, |
286 | unsigned long buf_align) | 287 | unsigned long buf_align) |
287 | { | 288 | { |
288 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 289 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
@@ -307,7 +308,7 @@ static int striped_read(struct inode *inode, | |||
307 | io_align = off & ~PAGE_MASK; | 308 | io_align = off & ~PAGE_MASK; |
308 | 309 | ||
309 | more: | 310 | more: |
310 | if (align_to_pages) | 311 | if (o_direct) |
311 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | 312 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; |
312 | else | 313 | else |
313 | page_align = pos & ~PAGE_MASK; | 314 | page_align = pos & ~PAGE_MASK; |
@@ -317,10 +318,10 @@ more: | |||
317 | ci->i_truncate_seq, | 318 | ci->i_truncate_seq, |
318 | ci->i_truncate_size, | 319 | ci->i_truncate_size, |
319 | page_pos, pages_left, page_align); | 320 | page_pos, pages_left, page_align); |
320 | hit_stripe = this_len < left; | ||
321 | was_short = ret >= 0 && ret < this_len; | ||
322 | if (ret == -ENOENT) | 321 | if (ret == -ENOENT) |
323 | ret = 0; | 322 | ret = 0; |
323 | hit_stripe = this_len < left; | ||
324 | was_short = ret >= 0 && ret < this_len; | ||
324 | dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, | 325 | dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, |
325 | ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); | 326 | ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); |
326 | 327 | ||
@@ -345,20 +346,22 @@ more: | |||
345 | } | 346 | } |
346 | 347 | ||
347 | if (was_short) { | 348 | if (was_short) { |
348 | /* was original extent fully inside i_size? */ | 349 | /* did we bounce off eof? */ |
349 | if (pos + left <= inode->i_size) { | 350 | if (pos + left > inode->i_size) |
350 | dout("zero tail\n"); | 351 | *checkeof = 1; |
351 | ceph_zero_page_vector_range(page_off + read, len - read, | 352 | |
353 | /* zero trailing bytes (inside i_size) */ | ||
354 | if (left > 0 && pos < inode->i_size) { | ||
355 | if (pos + left > inode->i_size) | ||
356 | left = inode->i_size - pos; | ||
357 | |||
358 | dout("zero tail %d\n", left); | ||
359 | ceph_zero_page_vector_range(page_off + read, left, | ||
352 | pages); | 360 | pages); |
353 | read = len; | 361 | read += left; |
354 | goto out; | ||
355 | } | 362 | } |
356 | |||
357 | /* check i_size */ | ||
358 | *checkeof = 1; | ||
359 | } | 363 | } |
360 | 364 | ||
361 | out: | ||
362 | if (ret >= 0) | 365 | if (ret >= 0) |
363 | ret = read; | 366 | ret = read; |
364 | dout("striped_read returns %d\n", ret); | 367 | dout("striped_read returns %d\n", ret); |
@@ -658,7 +661,7 @@ out: | |||
658 | 661 | ||
659 | /* hit EOF or hole? */ | 662 | /* hit EOF or hole? */ |
660 | if (statret == 0 && *ppos < inode->i_size) { | 663 | if (statret == 0 && *ppos < inode->i_size) { |
661 | dout("aio_read sync_read hit hole, reading more\n"); | 664 | dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size); |
662 | read += ret; | 665 | read += ret; |
663 | base += ret; | 666 | base += ret; |
664 | len -= ret; | 667 | len -= ret; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 70b6a4839c38..d8858e96ab18 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -1101,10 +1101,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1101 | goto done; | 1101 | goto done; |
1102 | } | 1102 | } |
1103 | req->r_dentry = dn; /* may have spliced */ | 1103 | req->r_dentry = dn; /* may have spliced */ |
1104 | igrab(in); | 1104 | ihold(in); |
1105 | } else if (ceph_ino(in) == vino.ino && | 1105 | } else if (ceph_ino(in) == vino.ino && |
1106 | ceph_snap(in) == vino.snap) { | 1106 | ceph_snap(in) == vino.snap) { |
1107 | igrab(in); | 1107 | ihold(in); |
1108 | } else { | 1108 | } else { |
1109 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", | 1109 | dout(" %p links to %p %llx.%llx, not %llx.%llx\n", |
1110 | dn, in, ceph_ino(in), ceph_snap(in), | 1110 | dn, in, ceph_ino(in), ceph_snap(in), |
@@ -1144,7 +1144,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1144 | goto done; | 1144 | goto done; |
1145 | } | 1145 | } |
1146 | req->r_dentry = dn; /* may have spliced */ | 1146 | req->r_dentry = dn; /* may have spliced */ |
1147 | igrab(in); | 1147 | ihold(in); |
1148 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ | 1148 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ |
1149 | } | 1149 | } |
1150 | 1150 | ||
@@ -1328,7 +1328,7 @@ void ceph_queue_writeback(struct inode *inode) | |||
1328 | if (queue_work(ceph_inode_to_client(inode)->wb_wq, | 1328 | if (queue_work(ceph_inode_to_client(inode)->wb_wq, |
1329 | &ceph_inode(inode)->i_wb_work)) { | 1329 | &ceph_inode(inode)->i_wb_work)) { |
1330 | dout("ceph_queue_writeback %p\n", inode); | 1330 | dout("ceph_queue_writeback %p\n", inode); |
1331 | igrab(inode); | 1331 | ihold(inode); |
1332 | } else { | 1332 | } else { |
1333 | dout("ceph_queue_writeback %p failed\n", inode); | 1333 | dout("ceph_queue_writeback %p failed\n", inode); |
1334 | } | 1334 | } |
@@ -1353,7 +1353,7 @@ void ceph_queue_invalidate(struct inode *inode) | |||
1353 | if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, | 1353 | if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, |
1354 | &ceph_inode(inode)->i_pg_inv_work)) { | 1354 | &ceph_inode(inode)->i_pg_inv_work)) { |
1355 | dout("ceph_queue_invalidate %p\n", inode); | 1355 | dout("ceph_queue_invalidate %p\n", inode); |
1356 | igrab(inode); | 1356 | ihold(inode); |
1357 | } else { | 1357 | } else { |
1358 | dout("ceph_queue_invalidate %p failed\n", inode); | 1358 | dout("ceph_queue_invalidate %p failed\n", inode); |
1359 | } | 1359 | } |
@@ -1477,7 +1477,7 @@ void ceph_queue_vmtruncate(struct inode *inode) | |||
1477 | if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, | 1477 | if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, |
1478 | &ci->i_vmtruncate_work)) { | 1478 | &ci->i_vmtruncate_work)) { |
1479 | dout("ceph_queue_vmtruncate %p\n", inode); | 1479 | dout("ceph_queue_vmtruncate %p\n", inode); |
1480 | igrab(inode); | 1480 | ihold(inode); |
1481 | } else { | 1481 | } else { |
1482 | dout("ceph_queue_vmtruncate %p failed, pending=%d\n", | 1482 | dout("ceph_queue_vmtruncate %p failed, pending=%d\n", |
1483 | inode, ci->i_truncate_pending); | 1483 | inode, ci->i_truncate_pending); |
@@ -1738,7 +1738,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1738 | __mark_inode_dirty(inode, inode_dirty_flags); | 1738 | __mark_inode_dirty(inode, inode_dirty_flags); |
1739 | 1739 | ||
1740 | if (mask) { | 1740 | if (mask) { |
1741 | req->r_inode = igrab(inode); | 1741 | req->r_inode = inode; |
1742 | ihold(inode); | ||
1742 | req->r_inode_drop = release; | 1743 | req->r_inode_drop = release; |
1743 | req->r_args.setattr.mask = cpu_to_le32(mask); | 1744 | req->r_args.setattr.mask = cpu_to_le32(mask); |
1744 | req->r_num_caps = 1; | 1745 | req->r_num_caps = 1; |
@@ -1779,7 +1780,8 @@ int ceph_do_getattr(struct inode *inode, int mask) | |||
1779 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); | 1780 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); |
1780 | if (IS_ERR(req)) | 1781 | if (IS_ERR(req)) |
1781 | return PTR_ERR(req); | 1782 | return PTR_ERR(req); |
1782 | req->r_inode = igrab(inode); | 1783 | req->r_inode = inode; |
1784 | ihold(inode); | ||
1783 | req->r_num_caps = 1; | 1785 | req->r_num_caps = 1; |
1784 | req->r_args.getattr.mask = cpu_to_le32(mask); | 1786 | req->r_args.getattr.mask = cpu_to_le32(mask); |
1785 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 1787 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8888c9ba68db..ef0b5f48e13a 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -73,7 +73,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
73 | USE_AUTH_MDS); | 73 | USE_AUTH_MDS); |
74 | if (IS_ERR(req)) | 74 | if (IS_ERR(req)) |
75 | return PTR_ERR(req); | 75 | return PTR_ERR(req); |
76 | req->r_inode = igrab(inode); | 76 | req->r_inode = inode; |
77 | ihold(inode); | ||
77 | req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; | 78 | req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; |
78 | 79 | ||
79 | req->r_args.setlayout.layout.fl_stripe_unit = | 80 | req->r_args.setlayout.layout.fl_stripe_unit = |
@@ -135,7 +136,8 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) | |||
135 | 136 | ||
136 | if (IS_ERR(req)) | 137 | if (IS_ERR(req)) |
137 | return PTR_ERR(req); | 138 | return PTR_ERR(req); |
138 | req->r_inode = igrab(inode); | 139 | req->r_inode = inode; |
140 | ihold(inode); | ||
139 | 141 | ||
140 | req->r_args.setlayout.layout.fl_stripe_unit = | 142 | req->r_args.setlayout.layout.fl_stripe_unit = |
141 | cpu_to_le32(l.stripe_unit); | 143 | cpu_to_le32(l.stripe_unit); |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 476b329867d4..80576d05d687 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -23,7 +23,8 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | 23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); |
24 | if (IS_ERR(req)) | 24 | if (IS_ERR(req)) |
25 | return PTR_ERR(req); | 25 | return PTR_ERR(req); |
26 | req->r_inode = igrab(inode); | 26 | req->r_inode = inode; |
27 | ihold(inode); | ||
27 | 28 | ||
28 | /* mds requires start and length rather than start and end */ | 29 | /* mds requires start and length rather than start and end */ |
29 | if (LLONG_MAX == fl->fl_end) | 30 | if (LLONG_MAX == fl->fl_end) |
@@ -32,11 +33,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
32 | length = fl->fl_end - fl->fl_start + 1; | 33 | length = fl->fl_end - fl->fl_start + 1; |
33 | 34 | ||
34 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 35 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " |
35 | "length: %llu, wait: %d, type`: %d", (int)lock_type, | 36 | "length: %llu, wait: %d, type: %d", (int)lock_type, |
36 | (int)operation, (u64)fl->fl_pid, fl->fl_start, | 37 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
37 | length, wait, fl->fl_type); | 38 | length, wait, fl->fl_type); |
38 | 39 | ||
39 | |||
40 | req->r_args.filelock_change.rule = lock_type; | 40 | req->r_args.filelock_change.rule = lock_type; |
41 | req->r_args.filelock_change.type = cmd; | 41 | req->r_args.filelock_change.type = cmd; |
42 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); | 42 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); |
@@ -70,7 +70,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
70 | } | 70 | } |
71 | ceph_mdsc_put_request(req); | 71 | ceph_mdsc_put_request(req); |
72 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 72 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " |
73 | "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type, | 73 | "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, |
74 | (int)operation, (u64)fl->fl_pid, fl->fl_start, | 74 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
75 | length, wait, fl->fl_type, err); | 75 | length, wait, fl->fl_type, err); |
76 | return err; | 76 | return err; |
@@ -109,16 +109,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
109 | dout("mds locked, locking locally"); | 109 | dout("mds locked, locking locally"); |
110 | err = posix_lock_file(file, fl, NULL); | 110 | err = posix_lock_file(file, fl, NULL); |
111 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | 111 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { |
112 | /* undo! This should only happen if the kernel detects | 112 | /* undo! This should only happen if |
113 | * local deadlock. */ | 113 | * the kernel detects local |
114 | * deadlock. */ | ||
114 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 115 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
115 | CEPH_LOCK_UNLOCK, 0, fl); | 116 | CEPH_LOCK_UNLOCK, 0, fl); |
116 | dout("got %d on posix_lock_file, undid lock", err); | 117 | dout("got %d on posix_lock_file, undid lock", |
118 | err); | ||
117 | } | 119 | } |
118 | } | 120 | } |
119 | 121 | ||
120 | } else { | 122 | } else if (err == -ERESTARTSYS) { |
121 | dout("mds returned error code %d", err); | 123 | dout("undoing lock\n"); |
124 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
125 | CEPH_LOCK_UNLOCK, 0, fl); | ||
122 | } | 126 | } |
123 | return err; | 127 | return err; |
124 | } | 128 | } |
@@ -155,8 +159,11 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
155 | file, CEPH_LOCK_UNLOCK, 0, fl); | 159 | file, CEPH_LOCK_UNLOCK, 0, fl); |
156 | dout("got %d on flock_lock_file_wait, undid lock", err); | 160 | dout("got %d on flock_lock_file_wait, undid lock", err); |
157 | } | 161 | } |
158 | } else { | 162 | } else if (err == -ERESTARTSYS) { |
159 | dout("mds error code %d", err); | 163 | dout("undoing lock\n"); |
164 | ceph_lock_message(CEPH_LOCK_FLOCK, | ||
165 | CEPH_MDS_OP_SETFILELOCK, | ||
166 | file, CEPH_LOCK_UNLOCK, 0, fl); | ||
160 | } | 167 | } |
161 | return err; | 168 | return err; |
162 | } | 169 | } |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 24067d68a554..54b14de2e729 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -722,7 +722,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) | |||
722 | ci = list_first_entry(&mdsc->snap_flush_list, | 722 | ci = list_first_entry(&mdsc->snap_flush_list, |
723 | struct ceph_inode_info, i_snap_flush_item); | 723 | struct ceph_inode_info, i_snap_flush_item); |
724 | inode = &ci->vfs_inode; | 724 | inode = &ci->vfs_inode; |
725 | igrab(inode); | 725 | ihold(inode); |
726 | spin_unlock(&mdsc->snap_flush_lock); | 726 | spin_unlock(&mdsc->snap_flush_lock); |
727 | spin_lock(&inode->i_lock); | 727 | spin_lock(&inode->i_lock); |
728 | __ceph_flush_snaps(ci, &session, 0); | 728 | __ceph_flush_snaps(ci, &session, 0); |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index f2b628696180..f42d730f1b66 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -665,7 +665,8 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
665 | err = PTR_ERR(req); | 665 | err = PTR_ERR(req); |
666 | goto out; | 666 | goto out; |
667 | } | 667 | } |
668 | req->r_inode = igrab(inode); | 668 | req->r_inode = inode; |
669 | ihold(inode); | ||
669 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; | 670 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; |
670 | req->r_num_caps = 1; | 671 | req->r_num_caps = 1; |
671 | req->r_args.setxattr.flags = cpu_to_le32(flags); | 672 | req->r_args.setxattr.flags = cpu_to_le32(flags); |
@@ -795,7 +796,8 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
795 | USE_AUTH_MDS); | 796 | USE_AUTH_MDS); |
796 | if (IS_ERR(req)) | 797 | if (IS_ERR(req)) |
797 | return PTR_ERR(req); | 798 | return PTR_ERR(req); |
798 | req->r_inode = igrab(inode); | 799 | req->r_inode = inode; |
800 | ihold(inode); | ||
799 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; | 801 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; |
800 | req->r_num_caps = 1; | 802 | req->r_num_caps = 1; |
801 | req->r_path2 = kstrdup(name, GFP_NOFS); | 803 | req->r_path2 = kstrdup(name, GFP_NOFS); |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 1cd4c3a1862d..f66cc1625150 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -7,6 +7,7 @@ config CIFS | |||
7 | select CRYPTO_MD5 | 7 | select CRYPTO_MD5 |
8 | select CRYPTO_HMAC | 8 | select CRYPTO_HMAC |
9 | select CRYPTO_ARC4 | 9 | select CRYPTO_ARC4 |
10 | select CRYPTO_ECB | ||
10 | select CRYPTO_DES | 11 | select CRYPTO_DES |
11 | help | 12 | help |
12 | This is the client VFS module for the Common Internet File System | 13 | This is the client VFS module for the Common Internet File System |
@@ -148,13 +149,13 @@ config CIFS_FSCACHE | |||
148 | 149 | ||
149 | config CIFS_ACL | 150 | config CIFS_ACL |
150 | bool "Provide CIFS ACL support (EXPERIMENTAL)" | 151 | bool "Provide CIFS ACL support (EXPERIMENTAL)" |
151 | depends on EXPERIMENTAL && CIFS_XATTR | 152 | depends on EXPERIMENTAL && CIFS_XATTR && KEYS |
152 | help | 153 | help |
153 | Allows to fetch CIFS/NTFS ACL from the server. The DACL blob | 154 | Allows to fetch CIFS/NTFS ACL from the server. The DACL blob |
154 | is handed over to the application/caller. | 155 | is handed over to the application/caller. |
155 | 156 | ||
156 | config CIFS_NFSD_EXPORT | 157 | config CIFS_NFSD_EXPORT |
157 | bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" | 158 | bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" |
158 | depends on CIFS && EXPERIMENTAL | 159 | depends on CIFS && EXPERIMENTAL && BROKEN |
159 | help | 160 | help |
160 | Allows NFS server to export a CIFS mounted share (nfsd over cifs) | 161 | Allows NFS server to export a CIFS mounted share (nfsd over cifs) |
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index dd8584d35a14..545509c3313b 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c | |||
@@ -92,7 +92,7 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data, | |||
92 | break; | 92 | break; |
93 | 93 | ||
94 | default: | 94 | default: |
95 | cERROR(1, "CIFS: Unknown network family '%d'", sa->sa_family); | 95 | cERROR(1, "Unknown network family '%d'", sa->sa_family); |
96 | key_len = 0; | 96 | key_len = 0; |
97 | break; | 97 | break; |
98 | } | 98 | } |
@@ -152,7 +152,7 @@ static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer, | |||
152 | 152 | ||
153 | sharename = extract_sharename(tcon->treeName); | 153 | sharename = extract_sharename(tcon->treeName); |
154 | if (IS_ERR(sharename)) { | 154 | if (IS_ERR(sharename)) { |
155 | cFYI(1, "CIFS: couldn't extract sharename\n"); | 155 | cFYI(1, "%s: couldn't extract sharename\n", __func__); |
156 | sharename = NULL; | 156 | sharename = NULL; |
157 | return 0; | 157 | return 0; |
158 | } | 158 | } |
@@ -302,7 +302,7 @@ static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data) | |||
302 | pagevec_init(&pvec, 0); | 302 | pagevec_init(&pvec, 0); |
303 | first = 0; | 303 | first = 0; |
304 | 304 | ||
305 | cFYI(1, "cifs inode 0x%p now uncached", cifsi); | 305 | cFYI(1, "%s: cifs inode 0x%p now uncached", __func__, cifsi); |
306 | 306 | ||
307 | for (;;) { | 307 | for (;;) { |
308 | nr_pages = pagevec_lookup(&pvec, | 308 | nr_pages = pagevec_lookup(&pvec, |
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index ffb1459dc6ec..7260e11e21f8 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h | |||
@@ -42,6 +42,7 @@ | |||
42 | #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ | 42 | #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ |
43 | #define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ | 43 | #define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ |
44 | #define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */ | 44 | #define CIFS_MOUNT_RWPIDFORWARD 0x80000 /* use pid forwarding for rw */ |
45 | #define CIFS_MOUNT_POSIXACL 0x100000 /* mirror of MS_POSIXACL in mnt_cifs_flags */ | ||
45 | 46 | ||
46 | struct cifs_sb_info { | 47 | struct cifs_sb_info { |
47 | struct rb_root tlink_tree; | 48 | struct rb_root tlink_tree; |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index dfbd9f1f373d..5a0ee7f2af06 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -184,7 +184,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, | |||
184 | if (cifs_pdu == NULL || server == NULL) | 184 | if (cifs_pdu == NULL || server == NULL) |
185 | return -EINVAL; | 185 | return -EINVAL; |
186 | 186 | ||
187 | if (cifs_pdu->Command == SMB_COM_NEGOTIATE) | 187 | if (!server->session_estab) |
188 | return 0; | 188 | return 0; |
189 | 189 | ||
190 | if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) { | 190 | if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) { |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 989442dcfb45..35f9154615fa 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -104,8 +104,7 @@ cifs_sb_deactive(struct super_block *sb) | |||
104 | } | 104 | } |
105 | 105 | ||
106 | static int | 106 | static int |
107 | cifs_read_super(struct super_block *sb, struct smb_vol *volume_info, | 107 | cifs_read_super(struct super_block *sb) |
108 | const char *devname, int silent) | ||
109 | { | 108 | { |
110 | struct inode *inode; | 109 | struct inode *inode; |
111 | struct cifs_sb_info *cifs_sb; | 110 | struct cifs_sb_info *cifs_sb; |
@@ -113,22 +112,16 @@ cifs_read_super(struct super_block *sb, struct smb_vol *volume_info, | |||
113 | 112 | ||
114 | cifs_sb = CIFS_SB(sb); | 113 | cifs_sb = CIFS_SB(sb); |
115 | 114 | ||
116 | spin_lock_init(&cifs_sb->tlink_tree_lock); | 115 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) |
117 | cifs_sb->tlink_tree = RB_ROOT; | 116 | sb->s_flags |= MS_POSIXACL; |
118 | 117 | ||
119 | rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); | 118 | if (cifs_sb_master_tcon(cifs_sb)->ses->capabilities & CAP_LARGE_FILES) |
120 | if (rc) | 119 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
121 | return rc; | 120 | else |
122 | 121 | sb->s_maxbytes = MAX_NON_LFS; | |
123 | cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; | ||
124 | 122 | ||
125 | rc = cifs_mount(sb, cifs_sb, volume_info, devname); | 123 | /* BB FIXME fix time_gran to be larger for LANMAN sessions */ |
126 | 124 | sb->s_time_gran = 100; | |
127 | if (rc) { | ||
128 | if (!silent) | ||
129 | cERROR(1, "cifs_mount failed w/return code = %d", rc); | ||
130 | goto out_mount_failed; | ||
131 | } | ||
132 | 125 | ||
133 | sb->s_magic = CIFS_MAGIC_NUMBER; | 126 | sb->s_magic = CIFS_MAGIC_NUMBER; |
134 | sb->s_op = &cifs_super_ops; | 127 | sb->s_op = &cifs_super_ops; |
@@ -170,37 +163,14 @@ out_no_root: | |||
170 | if (inode) | 163 | if (inode) |
171 | iput(inode); | 164 | iput(inode); |
172 | 165 | ||
173 | cifs_umount(sb, cifs_sb); | ||
174 | |||
175 | out_mount_failed: | ||
176 | bdi_destroy(&cifs_sb->bdi); | ||
177 | return rc; | 166 | return rc; |
178 | } | 167 | } |
179 | 168 | ||
180 | static void | 169 | static void cifs_kill_sb(struct super_block *sb) |
181 | cifs_put_super(struct super_block *sb) | ||
182 | { | 170 | { |
183 | int rc = 0; | 171 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
184 | struct cifs_sb_info *cifs_sb; | 172 | kill_anon_super(sb); |
185 | 173 | cifs_umount(cifs_sb); | |
186 | cFYI(1, "In cifs_put_super"); | ||
187 | cifs_sb = CIFS_SB(sb); | ||
188 | if (cifs_sb == NULL) { | ||
189 | cFYI(1, "Empty cifs superblock info passed to unmount"); | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | rc = cifs_umount(sb, cifs_sb); | ||
194 | if (rc) | ||
195 | cERROR(1, "cifs_umount failed with return code %d", rc); | ||
196 | if (cifs_sb->mountdata) { | ||
197 | kfree(cifs_sb->mountdata); | ||
198 | cifs_sb->mountdata = NULL; | ||
199 | } | ||
200 | |||
201 | unload_nls(cifs_sb->local_nls); | ||
202 | bdi_destroy(&cifs_sb->bdi); | ||
203 | kfree(cifs_sb); | ||
204 | } | 174 | } |
205 | 175 | ||
206 | static int | 176 | static int |
@@ -257,9 +227,6 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags) | |||
257 | { | 227 | { |
258 | struct cifs_sb_info *cifs_sb; | 228 | struct cifs_sb_info *cifs_sb; |
259 | 229 | ||
260 | if (flags & IPERM_FLAG_RCU) | ||
261 | return -ECHILD; | ||
262 | |||
263 | cifs_sb = CIFS_SB(inode->i_sb); | 230 | cifs_sb = CIFS_SB(inode->i_sb); |
264 | 231 | ||
265 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { | 232 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { |
@@ -352,6 +319,37 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) | |||
352 | } | 319 | } |
353 | } | 320 | } |
354 | 321 | ||
322 | static void | ||
323 | cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server) | ||
324 | { | ||
325 | seq_printf(s, ",sec="); | ||
326 | |||
327 | switch (server->secType) { | ||
328 | case LANMAN: | ||
329 | seq_printf(s, "lanman"); | ||
330 | break; | ||
331 | case NTLMv2: | ||
332 | seq_printf(s, "ntlmv2"); | ||
333 | break; | ||
334 | case NTLM: | ||
335 | seq_printf(s, "ntlm"); | ||
336 | break; | ||
337 | case Kerberos: | ||
338 | seq_printf(s, "krb5"); | ||
339 | break; | ||
340 | case RawNTLMSSP: | ||
341 | seq_printf(s, "ntlmssp"); | ||
342 | break; | ||
343 | default: | ||
344 | /* shouldn't ever happen */ | ||
345 | seq_printf(s, "unknown"); | ||
346 | break; | ||
347 | } | ||
348 | |||
349 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
350 | seq_printf(s, "i"); | ||
351 | } | ||
352 | |||
355 | /* | 353 | /* |
356 | * cifs_show_options() is for displaying mount options in /proc/mounts. | 354 | * cifs_show_options() is for displaying mount options in /proc/mounts. |
357 | * Not all settable options are displayed but most of the important | 355 | * Not all settable options are displayed but most of the important |
@@ -365,6 +363,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) | |||
365 | struct sockaddr *srcaddr; | 363 | struct sockaddr *srcaddr; |
366 | srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; | 364 | srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; |
367 | 365 | ||
366 | cifs_show_security(s, tcon->ses->server); | ||
367 | |||
368 | seq_printf(s, ",unc=%s", tcon->treeName); | 368 | seq_printf(s, ",unc=%s", tcon->treeName); |
369 | 369 | ||
370 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) | 370 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) |
@@ -518,7 +518,6 @@ static int cifs_drop_inode(struct inode *inode) | |||
518 | } | 518 | } |
519 | 519 | ||
520 | static const struct super_operations cifs_super_ops = { | 520 | static const struct super_operations cifs_super_ops = { |
521 | .put_super = cifs_put_super, | ||
522 | .statfs = cifs_statfs, | 521 | .statfs = cifs_statfs, |
523 | .alloc_inode = cifs_alloc_inode, | 522 | .alloc_inode = cifs_alloc_inode, |
524 | .destroy_inode = cifs_destroy_inode, | 523 | .destroy_inode = cifs_destroy_inode, |
@@ -555,7 +554,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
555 | full_path = cifs_build_path_to_root(vol, cifs_sb, | 554 | full_path = cifs_build_path_to_root(vol, cifs_sb, |
556 | cifs_sb_master_tcon(cifs_sb)); | 555 | cifs_sb_master_tcon(cifs_sb)); |
557 | if (full_path == NULL) | 556 | if (full_path == NULL) |
558 | return NULL; | 557 | return ERR_PTR(-ENOMEM); |
559 | 558 | ||
560 | cFYI(1, "Get root dentry for %s", full_path); | 559 | cFYI(1, "Get root dentry for %s", full_path); |
561 | 560 | ||
@@ -584,7 +583,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
584 | dchild = d_alloc(dparent, &name); | 583 | dchild = d_alloc(dparent, &name); |
585 | if (dchild == NULL) { | 584 | if (dchild == NULL) { |
586 | dput(dparent); | 585 | dput(dparent); |
587 | dparent = NULL; | 586 | dparent = ERR_PTR(-ENOMEM); |
588 | goto out; | 587 | goto out; |
589 | } | 588 | } |
590 | } | 589 | } |
@@ -602,7 +601,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
602 | if (rc) { | 601 | if (rc) { |
603 | dput(dchild); | 602 | dput(dchild); |
604 | dput(dparent); | 603 | dput(dparent); |
605 | dparent = NULL; | 604 | dparent = ERR_PTR(rc); |
606 | goto out; | 605 | goto out; |
607 | } | 606 | } |
608 | alias = d_materialise_unique(dchild, inode); | 607 | alias = d_materialise_unique(dchild, inode); |
@@ -610,7 +609,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
610 | dput(dchild); | 609 | dput(dchild); |
611 | if (IS_ERR(alias)) { | 610 | if (IS_ERR(alias)) { |
612 | dput(dparent); | 611 | dput(dparent); |
613 | dparent = NULL; | 612 | dparent = ERR_PTR(-EINVAL); /* XXX */ |
614 | goto out; | 613 | goto out; |
615 | } | 614 | } |
616 | dchild = alias; | 615 | dchild = alias; |
@@ -630,6 +629,13 @@ out: | |||
630 | return dparent; | 629 | return dparent; |
631 | } | 630 | } |
632 | 631 | ||
632 | static int cifs_set_super(struct super_block *sb, void *data) | ||
633 | { | ||
634 | struct cifs_mnt_data *mnt_data = data; | ||
635 | sb->s_fs_info = mnt_data->cifs_sb; | ||
636 | return set_anon_super(sb, NULL); | ||
637 | } | ||
638 | |||
633 | static struct dentry * | 639 | static struct dentry * |
634 | cifs_do_mount(struct file_system_type *fs_type, | 640 | cifs_do_mount(struct file_system_type *fs_type, |
635 | int flags, const char *dev_name, void *data) | 641 | int flags, const char *dev_name, void *data) |
@@ -650,75 +656,73 @@ cifs_do_mount(struct file_system_type *fs_type, | |||
650 | cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); | 656 | cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); |
651 | if (cifs_sb == NULL) { | 657 | if (cifs_sb == NULL) { |
652 | root = ERR_PTR(-ENOMEM); | 658 | root = ERR_PTR(-ENOMEM); |
653 | goto out; | 659 | goto out_nls; |
660 | } | ||
661 | |||
662 | cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); | ||
663 | if (cifs_sb->mountdata == NULL) { | ||
664 | root = ERR_PTR(-ENOMEM); | ||
665 | goto out_cifs_sb; | ||
654 | } | 666 | } |
655 | 667 | ||
656 | cifs_setup_cifs_sb(volume_info, cifs_sb); | 668 | cifs_setup_cifs_sb(volume_info, cifs_sb); |
657 | 669 | ||
670 | rc = cifs_mount(cifs_sb, volume_info); | ||
671 | if (rc) { | ||
672 | if (!(flags & MS_SILENT)) | ||
673 | cERROR(1, "cifs_mount failed w/return code = %d", rc); | ||
674 | root = ERR_PTR(rc); | ||
675 | goto out_mountdata; | ||
676 | } | ||
677 | |||
658 | mnt_data.vol = volume_info; | 678 | mnt_data.vol = volume_info; |
659 | mnt_data.cifs_sb = cifs_sb; | 679 | mnt_data.cifs_sb = cifs_sb; |
660 | mnt_data.flags = flags; | 680 | mnt_data.flags = flags; |
661 | 681 | ||
662 | sb = sget(fs_type, cifs_match_super, set_anon_super, &mnt_data); | 682 | sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data); |
663 | if (IS_ERR(sb)) { | 683 | if (IS_ERR(sb)) { |
664 | root = ERR_CAST(sb); | 684 | root = ERR_CAST(sb); |
665 | goto out_cifs_sb; | 685 | cifs_umount(cifs_sb); |
686 | goto out; | ||
666 | } | 687 | } |
667 | 688 | ||
668 | if (sb->s_fs_info) { | 689 | if (sb->s_root) { |
669 | cFYI(1, "Use existing superblock"); | 690 | cFYI(1, "Use existing superblock"); |
670 | goto out_shared; | 691 | cifs_umount(cifs_sb); |
671 | } | 692 | } else { |
672 | 693 | sb->s_flags = flags; | |
673 | /* | 694 | /* BB should we make this contingent on mount parm? */ |
674 | * Copy mount params for use in submounts. Better to do | 695 | sb->s_flags |= MS_NODIRATIME | MS_NOATIME; |
675 | * the copy here and deal with the error before cleanup gets | 696 | |
676 | * complicated post-mount. | 697 | rc = cifs_read_super(sb); |
677 | */ | 698 | if (rc) { |
678 | cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); | 699 | root = ERR_PTR(rc); |
679 | if (cifs_sb->mountdata == NULL) { | 700 | goto out_super; |
680 | root = ERR_PTR(-ENOMEM); | 701 | } |
681 | goto out_super; | ||
682 | } | ||
683 | |||
684 | sb->s_flags = flags; | ||
685 | /* BB should we make this contingent on mount parm? */ | ||
686 | sb->s_flags |= MS_NODIRATIME | MS_NOATIME; | ||
687 | sb->s_fs_info = cifs_sb; | ||
688 | 702 | ||
689 | rc = cifs_read_super(sb, volume_info, dev_name, | 703 | sb->s_flags |= MS_ACTIVE; |
690 | flags & MS_SILENT ? 1 : 0); | ||
691 | if (rc) { | ||
692 | root = ERR_PTR(rc); | ||
693 | goto out_super; | ||
694 | } | 704 | } |
695 | 705 | ||
696 | sb->s_flags |= MS_ACTIVE; | ||
697 | |||
698 | root = cifs_get_root(volume_info, sb); | 706 | root = cifs_get_root(volume_info, sb); |
699 | if (root == NULL) | 707 | if (IS_ERR(root)) |
700 | goto out_super; | 708 | goto out_super; |
701 | 709 | ||
702 | cFYI(1, "dentry root is: %p", root); | 710 | cFYI(1, "dentry root is: %p", root); |
703 | goto out; | 711 | goto out; |
704 | 712 | ||
705 | out_shared: | ||
706 | root = cifs_get_root(volume_info, sb); | ||
707 | if (root) | ||
708 | cFYI(1, "dentry root is: %p", root); | ||
709 | goto out; | ||
710 | |||
711 | out_super: | 713 | out_super: |
712 | kfree(cifs_sb->mountdata); | ||
713 | deactivate_locked_super(sb); | 714 | deactivate_locked_super(sb); |
714 | |||
715 | out_cifs_sb: | ||
716 | unload_nls(cifs_sb->local_nls); | ||
717 | kfree(cifs_sb); | ||
718 | |||
719 | out: | 715 | out: |
720 | cifs_cleanup_volume_info(&volume_info); | 716 | cifs_cleanup_volume_info(&volume_info); |
721 | return root; | 717 | return root; |
718 | |||
719 | out_mountdata: | ||
720 | kfree(cifs_sb->mountdata); | ||
721 | out_cifs_sb: | ||
722 | kfree(cifs_sb); | ||
723 | out_nls: | ||
724 | unload_nls(volume_info->local_nls); | ||
725 | goto out; | ||
722 | } | 726 | } |
723 | 727 | ||
724 | static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 728 | static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, |
@@ -807,7 +811,7 @@ struct file_system_type cifs_fs_type = { | |||
807 | .owner = THIS_MODULE, | 811 | .owner = THIS_MODULE, |
808 | .name = "cifs", | 812 | .name = "cifs", |
809 | .mount = cifs_do_mount, | 813 | .mount = cifs_do_mount, |
810 | .kill_sb = kill_anon_super, | 814 | .kill_sb = cifs_kill_sb, |
811 | /* .fs_flags */ | 815 | /* .fs_flags */ |
812 | }; | 816 | }; |
813 | const struct inode_operations cifs_dir_inode_ops = { | 817 | const struct inode_operations cifs_dir_inode_ops = { |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 64313f778ebf..0900e1658c96 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -129,5 +129,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
129 | extern const struct export_operations cifs_export_ops; | 129 | extern const struct export_operations cifs_export_ops; |
130 | #endif /* CIFS_NFSD_EXPORT */ | 130 | #endif /* CIFS_NFSD_EXPORT */ |
131 | 131 | ||
132 | #define CIFS_VERSION "1.72" | 132 | #define CIFS_VERSION "1.73" |
133 | #endif /* _CIFSFS_H */ | 133 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 953f84413c77..257f312ede42 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -157,9 +157,8 @@ extern int cifs_match_super(struct super_block *, void *); | |||
157 | extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info); | 157 | extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info); |
158 | extern int cifs_setup_volume_info(struct smb_vol **pvolume_info, | 158 | extern int cifs_setup_volume_info(struct smb_vol **pvolume_info, |
159 | char *mount_data, const char *devname); | 159 | char *mount_data, const char *devname); |
160 | extern int cifs_mount(struct super_block *, struct cifs_sb_info *, | 160 | extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); |
161 | struct smb_vol *, const char *); | 161 | extern void cifs_umount(struct cifs_sb_info *); |
162 | extern int cifs_umount(struct super_block *, struct cifs_sb_info *); | ||
163 | extern void cifs_dfs_release_automount_timer(void); | 162 | extern void cifs_dfs_release_automount_timer(void); |
164 | void cifs_proc_init(void); | 163 | void cifs_proc_init(void); |
165 | void cifs_proc_clean(void); | 164 | void cifs_proc_clean(void); |
@@ -218,7 +217,8 @@ extern int get_dfs_path(int xid, struct cifs_ses *pSesInfo, | |||
218 | struct dfs_info3_param **preferrals, | 217 | struct dfs_info3_param **preferrals, |
219 | int remap); | 218 | int remap); |
220 | extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, | 219 | extern void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, |
221 | struct super_block *sb, struct smb_vol *vol); | 220 | struct cifs_sb_info *cifs_sb, |
221 | struct smb_vol *vol); | ||
222 | extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon, | 222 | extern int CIFSSMBQFSInfo(const int xid, struct cifs_tcon *tcon, |
223 | struct kstatfs *FSData); | 223 | struct kstatfs *FSData); |
224 | extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon, | 224 | extern int SMBOldQFSInfo(const int xid, struct cifs_tcon *tcon, |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6d88b82537c3..7f540df52527 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -152,7 +152,7 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
152 | mid_entry->callback(mid_entry); | 152 | mid_entry->callback(mid_entry); |
153 | } | 153 | } |
154 | 154 | ||
155 | while (server->tcpStatus == CifsNeedReconnect) { | 155 | do { |
156 | try_to_freeze(); | 156 | try_to_freeze(); |
157 | 157 | ||
158 | /* we should try only the port we connected to before */ | 158 | /* we should try only the port we connected to before */ |
@@ -167,7 +167,7 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
167 | server->tcpStatus = CifsNeedNegotiate; | 167 | server->tcpStatus = CifsNeedNegotiate; |
168 | spin_unlock(&GlobalMid_Lock); | 168 | spin_unlock(&GlobalMid_Lock); |
169 | } | 169 | } |
170 | } | 170 | } while (server->tcpStatus == CifsNeedReconnect); |
171 | 171 | ||
172 | return rc; | 172 | return rc; |
173 | } | 173 | } |
@@ -784,7 +784,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
784 | struct smb_vol *vol) | 784 | struct smb_vol *vol) |
785 | { | 785 | { |
786 | char *value, *data, *end; | 786 | char *value, *data, *end; |
787 | char *mountdata_copy, *options; | 787 | char *mountdata_copy = NULL, *options; |
788 | unsigned int temp_len, i, j; | 788 | unsigned int temp_len, i, j; |
789 | char separator[2]; | 789 | char separator[2]; |
790 | short int override_uid = -1; | 790 | short int override_uid = -1; |
@@ -1391,7 +1391,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1391 | "/proc/fs/cifs/LookupCacheEnabled to 0\n"); | 1391 | "/proc/fs/cifs/LookupCacheEnabled to 0\n"); |
1392 | } else if (strnicmp(data, "fsc", 3) == 0) { | 1392 | } else if (strnicmp(data, "fsc", 3) == 0) { |
1393 | #ifndef CONFIG_CIFS_FSCACHE | 1393 | #ifndef CONFIG_CIFS_FSCACHE |
1394 | cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" | 1394 | cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE " |
1395 | "kernel config option set"); | 1395 | "kernel config option set"); |
1396 | goto cifs_parse_mount_err; | 1396 | goto cifs_parse_mount_err; |
1397 | #endif | 1397 | #endif |
@@ -1976,7 +1976,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
1976 | warned_on_ntlm = true; | 1976 | warned_on_ntlm = true; |
1977 | cERROR(1, "default security mechanism requested. The default " | 1977 | cERROR(1, "default security mechanism requested. The default " |
1978 | "security mechanism will be upgraded from ntlm to " | 1978 | "security mechanism will be upgraded from ntlm to " |
1979 | "ntlmv2 in kernel release 2.6.41"); | 1979 | "ntlmv2 in kernel release 3.1"); |
1980 | } | 1980 | } |
1981 | ses->overrideSecFlg = volume_info->secFlg; | 1981 | ses->overrideSecFlg = volume_info->secFlg; |
1982 | 1982 | ||
@@ -2149,7 +2149,10 @@ cifs_put_tlink(struct tcon_link *tlink) | |||
2149 | } | 2149 | } |
2150 | 2150 | ||
2151 | static inline struct tcon_link * | 2151 | static inline struct tcon_link * |
2152 | cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb); | 2152 | cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) |
2153 | { | ||
2154 | return cifs_sb->master_tlink; | ||
2155 | } | ||
2153 | 2156 | ||
2154 | static int | 2157 | static int |
2155 | compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) | 2158 | compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) |
@@ -2543,7 +2546,7 @@ ip_connect(struct TCP_Server_Info *server) | |||
2543 | } | 2546 | } |
2544 | 2547 | ||
2545 | void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, | 2548 | void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, |
2546 | struct super_block *sb, struct smb_vol *vol_info) | 2549 | struct cifs_sb_info *cifs_sb, struct smb_vol *vol_info) |
2547 | { | 2550 | { |
2548 | /* if we are reconnecting then should we check to see if | 2551 | /* if we are reconnecting then should we check to see if |
2549 | * any requested capabilities changed locally e.g. via | 2552 | * any requested capabilities changed locally e.g. via |
@@ -2597,22 +2600,23 @@ void reset_cifs_unix_caps(int xid, struct cifs_tcon *tcon, | |||
2597 | cap &= ~CIFS_UNIX_POSIX_ACL_CAP; | 2600 | cap &= ~CIFS_UNIX_POSIX_ACL_CAP; |
2598 | else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { | 2601 | else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { |
2599 | cFYI(1, "negotiated posix acl support"); | 2602 | cFYI(1, "negotiated posix acl support"); |
2600 | if (sb) | 2603 | if (cifs_sb) |
2601 | sb->s_flags |= MS_POSIXACL; | 2604 | cifs_sb->mnt_cifs_flags |= |
2605 | CIFS_MOUNT_POSIXACL; | ||
2602 | } | 2606 | } |
2603 | 2607 | ||
2604 | if (vol_info && vol_info->posix_paths == 0) | 2608 | if (vol_info && vol_info->posix_paths == 0) |
2605 | cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; | 2609 | cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; |
2606 | else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { | 2610 | else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { |
2607 | cFYI(1, "negotiate posix pathnames"); | 2611 | cFYI(1, "negotiate posix pathnames"); |
2608 | if (sb) | 2612 | if (cifs_sb) |
2609 | CIFS_SB(sb)->mnt_cifs_flags |= | 2613 | cifs_sb->mnt_cifs_flags |= |
2610 | CIFS_MOUNT_POSIX_PATHS; | 2614 | CIFS_MOUNT_POSIX_PATHS; |
2611 | } | 2615 | } |
2612 | 2616 | ||
2613 | if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) { | 2617 | if (cifs_sb && (cifs_sb->rsize > 127 * 1024)) { |
2614 | if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { | 2618 | if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { |
2615 | CIFS_SB(sb)->rsize = 127 * 1024; | 2619 | cifs_sb->rsize = 127 * 1024; |
2616 | cFYI(DBG2, "larger reads not supported by srv"); | 2620 | cFYI(DBG2, "larger reads not supported by srv"); |
2617 | } | 2621 | } |
2618 | } | 2622 | } |
@@ -2659,6 +2663,9 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | |||
2659 | { | 2663 | { |
2660 | INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); | 2664 | INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); |
2661 | 2665 | ||
2666 | spin_lock_init(&cifs_sb->tlink_tree_lock); | ||
2667 | cifs_sb->tlink_tree = RB_ROOT; | ||
2668 | |||
2662 | if (pvolume_info->rsize > CIFSMaxBufSize) { | 2669 | if (pvolume_info->rsize > CIFSMaxBufSize) { |
2663 | cERROR(1, "rsize %d too large, using MaxBufSize", | 2670 | cERROR(1, "rsize %d too large, using MaxBufSize", |
2664 | pvolume_info->rsize); | 2671 | pvolume_info->rsize); |
@@ -2747,21 +2754,21 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | |||
2747 | 2754 | ||
2748 | /* | 2755 | /* |
2749 | * When the server supports very large writes via POSIX extensions, we can | 2756 | * When the server supports very large writes via POSIX extensions, we can |
2750 | * allow up to 2^24 - PAGE_CACHE_SIZE. | 2757 | * allow up to 2^24-1, minus the size of a WRITE_AND_X header, not including |
2758 | * the RFC1001 length. | ||
2751 | * | 2759 | * |
2752 | * Note that this might make for "interesting" allocation problems during | 2760 | * Note that this might make for "interesting" allocation problems during |
2753 | * writeback however (as we have to allocate an array of pointers for the | 2761 | * writeback however as we have to allocate an array of pointers for the |
2754 | * pages). A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. | 2762 | * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096. |
2755 | */ | 2763 | */ |
2756 | #define CIFS_MAX_WSIZE ((1<<24) - PAGE_CACHE_SIZE) | 2764 | #define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) |
2757 | 2765 | ||
2758 | /* | 2766 | /* |
2759 | * When the server doesn't allow large posix writes, default to a wsize of | 2767 | * When the server doesn't allow large posix writes, only allow a wsize of |
2760 | * 128k - PAGE_CACHE_SIZE -- one page less than the largest frame size | 2768 | * 128k minus the size of the WRITE_AND_X header. That allows for a write up |
2761 | * described in RFC1001. This allows space for the header without going over | 2769 | * to the maximum size described by RFC1002. |
2762 | * that by default. | ||
2763 | */ | 2770 | */ |
2764 | #define CIFS_MAX_RFC1001_WSIZE (128 * 1024 - PAGE_CACHE_SIZE) | 2771 | #define CIFS_MAX_RFC1002_WSIZE (128 * 1024 - sizeof(WRITE_REQ) + 4) |
2765 | 2772 | ||
2766 | /* | 2773 | /* |
2767 | * The default wsize is 1M. find_get_pages seems to return a maximum of 256 | 2774 | * The default wsize is 1M. find_get_pages seems to return a maximum of 256 |
@@ -2780,11 +2787,18 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) | |||
2780 | 2787 | ||
2781 | /* can server support 24-bit write sizes? (via UNIX extensions) */ | 2788 | /* can server support 24-bit write sizes? (via UNIX extensions) */ |
2782 | if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) | 2789 | if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) |
2783 | wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1001_WSIZE); | 2790 | wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE); |
2784 | 2791 | ||
2785 | /* no CAP_LARGE_WRITE_X? Limit it to 16 bits */ | 2792 | /* |
2786 | if (!(server->capabilities & CAP_LARGE_WRITE_X)) | 2793 | * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set? |
2787 | wsize = min_t(unsigned int, wsize, USHRT_MAX); | 2794 | * Limit it to max buffer offered by the server, minus the size of the |
2795 | * WRITEX header, not including the 4 byte RFC1001 length. | ||
2796 | */ | ||
2797 | if (!(server->capabilities & CAP_LARGE_WRITE_X) || | ||
2798 | (!(server->capabilities & CAP_UNIX) && | ||
2799 | (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) | ||
2800 | wsize = min_t(unsigned int, wsize, | ||
2801 | server->maxBuf - sizeof(WRITE_REQ) + 4); | ||
2788 | 2802 | ||
2789 | /* hard limit of CIFS_MAX_WSIZE */ | 2803 | /* hard limit of CIFS_MAX_WSIZE */ |
2790 | wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); | 2804 | wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); |
@@ -2934,7 +2948,11 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, | |||
2934 | 2948 | ||
2935 | if (volume_info->nullauth) { | 2949 | if (volume_info->nullauth) { |
2936 | cFYI(1, "null user"); | 2950 | cFYI(1, "null user"); |
2937 | volume_info->username = ""; | 2951 | volume_info->username = kzalloc(1, GFP_KERNEL); |
2952 | if (volume_info->username == NULL) { | ||
2953 | rc = -ENOMEM; | ||
2954 | goto out; | ||
2955 | } | ||
2938 | } else if (volume_info->username) { | 2956 | } else if (volume_info->username) { |
2939 | /* BB fixme parse for domain name here */ | 2957 | /* BB fixme parse for domain name here */ |
2940 | cFYI(1, "Username: %s", volume_info->username); | 2958 | cFYI(1, "Username: %s", volume_info->username); |
@@ -2968,8 +2986,7 @@ out: | |||
2968 | } | 2986 | } |
2969 | 2987 | ||
2970 | int | 2988 | int |
2971 | cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | 2989 | cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) |
2972 | struct smb_vol *volume_info, const char *devname) | ||
2973 | { | 2990 | { |
2974 | int rc = 0; | 2991 | int rc = 0; |
2975 | int xid; | 2992 | int xid; |
@@ -2980,6 +2997,13 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2980 | struct tcon_link *tlink; | 2997 | struct tcon_link *tlink; |
2981 | #ifdef CONFIG_CIFS_DFS_UPCALL | 2998 | #ifdef CONFIG_CIFS_DFS_UPCALL |
2982 | int referral_walks_count = 0; | 2999 | int referral_walks_count = 0; |
3000 | |||
3001 | rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); | ||
3002 | if (rc) | ||
3003 | return rc; | ||
3004 | |||
3005 | cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; | ||
3006 | |||
2983 | try_mount_again: | 3007 | try_mount_again: |
2984 | /* cleanup activities if we're chasing a referral */ | 3008 | /* cleanup activities if we're chasing a referral */ |
2985 | if (referral_walks_count) { | 3009 | if (referral_walks_count) { |
@@ -3004,6 +3028,7 @@ try_mount_again: | |||
3004 | srvTcp = cifs_get_tcp_session(volume_info); | 3028 | srvTcp = cifs_get_tcp_session(volume_info); |
3005 | if (IS_ERR(srvTcp)) { | 3029 | if (IS_ERR(srvTcp)) { |
3006 | rc = PTR_ERR(srvTcp); | 3030 | rc = PTR_ERR(srvTcp); |
3031 | bdi_destroy(&cifs_sb->bdi); | ||
3007 | goto out; | 3032 | goto out; |
3008 | } | 3033 | } |
3009 | 3034 | ||
@@ -3015,14 +3040,6 @@ try_mount_again: | |||
3015 | goto mount_fail_check; | 3040 | goto mount_fail_check; |
3016 | } | 3041 | } |
3017 | 3042 | ||
3018 | if (pSesInfo->capabilities & CAP_LARGE_FILES) | ||
3019 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
3020 | else | ||
3021 | sb->s_maxbytes = MAX_NON_LFS; | ||
3022 | |||
3023 | /* BB FIXME fix time_gran to be larger for LANMAN sessions */ | ||
3024 | sb->s_time_gran = 100; | ||
3025 | |||
3026 | /* search for existing tcon to this server share */ | 3043 | /* search for existing tcon to this server share */ |
3027 | tcon = cifs_get_tcon(pSesInfo, volume_info); | 3044 | tcon = cifs_get_tcon(pSesInfo, volume_info); |
3028 | if (IS_ERR(tcon)) { | 3045 | if (IS_ERR(tcon)) { |
@@ -3035,7 +3052,7 @@ try_mount_again: | |||
3035 | if (tcon->ses->capabilities & CAP_UNIX) { | 3052 | if (tcon->ses->capabilities & CAP_UNIX) { |
3036 | /* reset of caps checks mount to see if unix extensions | 3053 | /* reset of caps checks mount to see if unix extensions |
3037 | disabled for just this mount */ | 3054 | disabled for just this mount */ |
3038 | reset_cifs_unix_caps(xid, tcon, sb, volume_info); | 3055 | reset_cifs_unix_caps(xid, tcon, cifs_sb, volume_info); |
3039 | if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && | 3056 | if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && |
3040 | (le64_to_cpu(tcon->fsUnixInfo.Capability) & | 3057 | (le64_to_cpu(tcon->fsUnixInfo.Capability) & |
3041 | CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) { | 3058 | CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) { |
@@ -3158,6 +3175,7 @@ mount_fail_check: | |||
3158 | cifs_put_smb_ses(pSesInfo); | 3175 | cifs_put_smb_ses(pSesInfo); |
3159 | else | 3176 | else |
3160 | cifs_put_tcp_session(srvTcp); | 3177 | cifs_put_tcp_session(srvTcp); |
3178 | bdi_destroy(&cifs_sb->bdi); | ||
3161 | goto out; | 3179 | goto out; |
3162 | } | 3180 | } |
3163 | 3181 | ||
@@ -3171,6 +3189,10 @@ out: | |||
3171 | return rc; | 3189 | return rc; |
3172 | } | 3190 | } |
3173 | 3191 | ||
3192 | /* | ||
3193 | * Issue a TREE_CONNECT request. Note that for IPC$ shares, that the tcon | ||
3194 | * pointer may be NULL. | ||
3195 | */ | ||
3174 | int | 3196 | int |
3175 | CIFSTCon(unsigned int xid, struct cifs_ses *ses, | 3197 | CIFSTCon(unsigned int xid, struct cifs_ses *ses, |
3176 | const char *tree, struct cifs_tcon *tcon, | 3198 | const char *tree, struct cifs_tcon *tcon, |
@@ -3205,7 +3227,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, | |||
3205 | pSMB->AndXCommand = 0xFF; | 3227 | pSMB->AndXCommand = 0xFF; |
3206 | pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); | 3228 | pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); |
3207 | bcc_ptr = &pSMB->Password[0]; | 3229 | bcc_ptr = &pSMB->Password[0]; |
3208 | if ((ses->server->sec_mode) & SECMODE_USER) { | 3230 | if (!tcon || (ses->server->sec_mode & SECMODE_USER)) { |
3209 | pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ | 3231 | pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ |
3210 | *bcc_ptr = 0; /* password is null byte */ | 3232 | *bcc_ptr = 0; /* password is null byte */ |
3211 | bcc_ptr++; /* skip password */ | 3233 | bcc_ptr++; /* skip password */ |
@@ -3328,8 +3350,8 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, | |||
3328 | return rc; | 3350 | return rc; |
3329 | } | 3351 | } |
3330 | 3352 | ||
3331 | int | 3353 | void |
3332 | cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) | 3354 | cifs_umount(struct cifs_sb_info *cifs_sb) |
3333 | { | 3355 | { |
3334 | struct rb_root *root = &cifs_sb->tlink_tree; | 3356 | struct rb_root *root = &cifs_sb->tlink_tree; |
3335 | struct rb_node *node; | 3357 | struct rb_node *node; |
@@ -3350,7 +3372,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) | |||
3350 | } | 3372 | } |
3351 | spin_unlock(&cifs_sb->tlink_tree_lock); | 3373 | spin_unlock(&cifs_sb->tlink_tree_lock); |
3352 | 3374 | ||
3353 | return 0; | 3375 | bdi_destroy(&cifs_sb->bdi); |
3376 | kfree(cifs_sb->mountdata); | ||
3377 | unload_nls(cifs_sb->local_nls); | ||
3378 | kfree(cifs_sb); | ||
3354 | } | 3379 | } |
3355 | 3380 | ||
3356 | int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses) | 3381 | int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses) |
@@ -3371,7 +3396,7 @@ int cifs_negotiate_protocol(unsigned int xid, struct cifs_ses *ses) | |||
3371 | } | 3396 | } |
3372 | if (rc == 0) { | 3397 | if (rc == 0) { |
3373 | spin_lock(&GlobalMid_Lock); | 3398 | spin_lock(&GlobalMid_Lock); |
3374 | if (server->tcpStatus != CifsExiting) | 3399 | if (server->tcpStatus == CifsNeedNegotiate) |
3375 | server->tcpStatus = CifsGood; | 3400 | server->tcpStatus = CifsGood; |
3376 | else | 3401 | else |
3377 | rc = -EHOSTDOWN; | 3402 | rc = -EHOSTDOWN; |
@@ -3484,12 +3509,6 @@ out: | |||
3484 | return tcon; | 3509 | return tcon; |
3485 | } | 3510 | } |
3486 | 3511 | ||
3487 | static inline struct tcon_link * | ||
3488 | cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) | ||
3489 | { | ||
3490 | return cifs_sb->master_tlink; | ||
3491 | } | ||
3492 | |||
3493 | struct cifs_tcon * | 3512 | struct cifs_tcon * |
3494 | cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) | 3513 | cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) |
3495 | { | 3514 | { |
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index d368a47ba5eb..816696621ec9 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c | |||
@@ -28,14 +28,14 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) | |||
28 | server->fscache = | 28 | server->fscache = |
29 | fscache_acquire_cookie(cifs_fscache_netfs.primary_index, | 29 | fscache_acquire_cookie(cifs_fscache_netfs.primary_index, |
30 | &cifs_fscache_server_index_def, server); | 30 | &cifs_fscache_server_index_def, server); |
31 | cFYI(1, "CIFS: get client cookie (0x%p/0x%p)", server, | 31 | cFYI(1, "%s: (0x%p/0x%p)", __func__, server, |
32 | server->fscache); | 32 | server->fscache); |
33 | } | 33 | } |
34 | 34 | ||
35 | void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) | 35 | void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) |
36 | { | 36 | { |
37 | cFYI(1, "CIFS: release client cookie (0x%p/0x%p)", server, | 37 | cFYI(1, "%s: (0x%p/0x%p)", __func__, server, |
38 | server->fscache); | 38 | server->fscache); |
39 | fscache_relinquish_cookie(server->fscache, 0); | 39 | fscache_relinquish_cookie(server->fscache, 0); |
40 | server->fscache = NULL; | 40 | server->fscache = NULL; |
41 | } | 41 | } |
@@ -47,13 +47,13 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) | |||
47 | tcon->fscache = | 47 | tcon->fscache = |
48 | fscache_acquire_cookie(server->fscache, | 48 | fscache_acquire_cookie(server->fscache, |
49 | &cifs_fscache_super_index_def, tcon); | 49 | &cifs_fscache_super_index_def, tcon); |
50 | cFYI(1, "CIFS: get superblock cookie (0x%p/0x%p)", | 50 | cFYI(1, "%s: (0x%p/0x%p)", __func__, server->fscache, |
51 | server->fscache, tcon->fscache); | 51 | tcon->fscache); |
52 | } | 52 | } |
53 | 53 | ||
54 | void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) | 54 | void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) |
55 | { | 55 | { |
56 | cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache); | 56 | cFYI(1, "%s: (0x%p)", __func__, tcon->fscache); |
57 | fscache_relinquish_cookie(tcon->fscache, 0); | 57 | fscache_relinquish_cookie(tcon->fscache, 0); |
58 | tcon->fscache = NULL; | 58 | tcon->fscache = NULL; |
59 | } | 59 | } |
@@ -70,8 +70,8 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode) | |||
70 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) { | 70 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) { |
71 | cifsi->fscache = fscache_acquire_cookie(tcon->fscache, | 71 | cifsi->fscache = fscache_acquire_cookie(tcon->fscache, |
72 | &cifs_fscache_inode_object_def, cifsi); | 72 | &cifs_fscache_inode_object_def, cifsi); |
73 | cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache, | 73 | cFYI(1, "%s: got FH cookie (0x%p/0x%p)", __func__, |
74 | cifsi->fscache); | 74 | tcon->fscache, cifsi->fscache); |
75 | } | 75 | } |
76 | } | 76 | } |
77 | 77 | ||
@@ -80,8 +80,7 @@ void cifs_fscache_release_inode_cookie(struct inode *inode) | |||
80 | struct cifsInodeInfo *cifsi = CIFS_I(inode); | 80 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
81 | 81 | ||
82 | if (cifsi->fscache) { | 82 | if (cifsi->fscache) { |
83 | cFYI(1, "CIFS releasing inode cookie (0x%p)", | 83 | cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache); |
84 | cifsi->fscache); | ||
85 | fscache_relinquish_cookie(cifsi->fscache, 0); | 84 | fscache_relinquish_cookie(cifsi->fscache, 0); |
86 | cifsi->fscache = NULL; | 85 | cifsi->fscache = NULL; |
87 | } | 86 | } |
@@ -92,8 +91,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode) | |||
92 | struct cifsInodeInfo *cifsi = CIFS_I(inode); | 91 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
93 | 92 | ||
94 | if (cifsi->fscache) { | 93 | if (cifsi->fscache) { |
95 | cFYI(1, "CIFS disabling inode cookie (0x%p)", | 94 | cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache); |
96 | cifsi->fscache); | ||
97 | fscache_relinquish_cookie(cifsi->fscache, 1); | 95 | fscache_relinquish_cookie(cifsi->fscache, 1); |
98 | cifsi->fscache = NULL; | 96 | cifsi->fscache = NULL; |
99 | } | 97 | } |
@@ -121,8 +119,8 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode) | |||
121 | cifs_sb_master_tcon(cifs_sb)->fscache, | 119 | cifs_sb_master_tcon(cifs_sb)->fscache, |
122 | &cifs_fscache_inode_object_def, | 120 | &cifs_fscache_inode_object_def, |
123 | cifsi); | 121 | cifsi); |
124 | cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p", | 122 | cFYI(1, "%s: new cookie 0x%p oldcookie 0x%p", |
125 | cifsi->fscache, old); | 123 | __func__, cifsi->fscache, old); |
126 | } | 124 | } |
127 | } | 125 | } |
128 | 126 | ||
@@ -132,8 +130,8 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp) | |||
132 | struct inode *inode = page->mapping->host; | 130 | struct inode *inode = page->mapping->host; |
133 | struct cifsInodeInfo *cifsi = CIFS_I(inode); | 131 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
134 | 132 | ||
135 | cFYI(1, "CIFS: fscache release page (0x%p/0x%p)", | 133 | cFYI(1, "%s: (0x%p/0x%p)", __func__, page, |
136 | page, cifsi->fscache); | 134 | cifsi->fscache); |
137 | if (!fscache_maybe_release_page(cifsi->fscache, page, gfp)) | 135 | if (!fscache_maybe_release_page(cifsi->fscache, page, gfp)) |
138 | return 0; | 136 | return 0; |
139 | } | 137 | } |
@@ -144,8 +142,7 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp) | |||
144 | static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx, | 142 | static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx, |
145 | int error) | 143 | int error) |
146 | { | 144 | { |
147 | cFYI(1, "CFS: readpage_from_fscache_complete (0x%p/%d)", | 145 | cFYI(1, "%s: (0x%p/%d)", __func__, page, error); |
148 | page, error); | ||
149 | if (!error) | 146 | if (!error) |
150 | SetPageUptodate(page); | 147 | SetPageUptodate(page); |
151 | unlock_page(page); | 148 | unlock_page(page); |
@@ -158,7 +155,7 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) | |||
158 | { | 155 | { |
159 | int ret; | 156 | int ret; |
160 | 157 | ||
161 | cFYI(1, "CIFS: readpage_from_fscache(fsc:%p, p:%p, i:0x%p", | 158 | cFYI(1, "%s: (fsc:%p, p:%p, i:0x%p", __func__, |
162 | CIFS_I(inode)->fscache, page, inode); | 159 | CIFS_I(inode)->fscache, page, inode); |
163 | ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page, | 160 | ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page, |
164 | cifs_readpage_from_fscache_complete, | 161 | cifs_readpage_from_fscache_complete, |
@@ -167,11 +164,11 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) | |||
167 | switch (ret) { | 164 | switch (ret) { |
168 | 165 | ||
169 | case 0: /* page found in fscache, read submitted */ | 166 | case 0: /* page found in fscache, read submitted */ |
170 | cFYI(1, "CIFS: readpage_from_fscache: submitted"); | 167 | cFYI(1, "%s: submitted", __func__); |
171 | return ret; | 168 | return ret; |
172 | case -ENOBUFS: /* page won't be cached */ | 169 | case -ENOBUFS: /* page won't be cached */ |
173 | case -ENODATA: /* page not in cache */ | 170 | case -ENODATA: /* page not in cache */ |
174 | cFYI(1, "CIFS: readpage_from_fscache %d", ret); | 171 | cFYI(1, "%s: %d", __func__, ret); |
175 | return 1; | 172 | return 1; |
176 | 173 | ||
177 | default: | 174 | default: |
@@ -190,7 +187,7 @@ int __cifs_readpages_from_fscache(struct inode *inode, | |||
190 | { | 187 | { |
191 | int ret; | 188 | int ret; |
192 | 189 | ||
193 | cFYI(1, "CIFS: __cifs_readpages_from_fscache (0x%p/%u/0x%p)", | 190 | cFYI(1, "%s: (0x%p/%u/0x%p)", __func__, |
194 | CIFS_I(inode)->fscache, *nr_pages, inode); | 191 | CIFS_I(inode)->fscache, *nr_pages, inode); |
195 | ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping, | 192 | ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping, |
196 | pages, nr_pages, | 193 | pages, nr_pages, |
@@ -199,12 +196,12 @@ int __cifs_readpages_from_fscache(struct inode *inode, | |||
199 | mapping_gfp_mask(mapping)); | 196 | mapping_gfp_mask(mapping)); |
200 | switch (ret) { | 197 | switch (ret) { |
201 | case 0: /* read submitted to the cache for all pages */ | 198 | case 0: /* read submitted to the cache for all pages */ |
202 | cFYI(1, "CIFS: readpages_from_fscache: submitted"); | 199 | cFYI(1, "%s: submitted", __func__); |
203 | return ret; | 200 | return ret; |
204 | 201 | ||
205 | case -ENOBUFS: /* some pages are not cached and can't be */ | 202 | case -ENOBUFS: /* some pages are not cached and can't be */ |
206 | case -ENODATA: /* some pages are not cached */ | 203 | case -ENODATA: /* some pages are not cached */ |
207 | cFYI(1, "CIFS: readpages_from_fscache: no page"); | 204 | cFYI(1, "%s: no page", __func__); |
208 | return 1; | 205 | return 1; |
209 | 206 | ||
210 | default: | 207 | default: |
@@ -218,7 +215,7 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page) | |||
218 | { | 215 | { |
219 | int ret; | 216 | int ret; |
220 | 217 | ||
221 | cFYI(1, "CIFS: readpage_to_fscache(fsc: %p, p: %p, i: %p", | 218 | cFYI(1, "%s: (fsc: %p, p: %p, i: %p)", __func__, |
222 | CIFS_I(inode)->fscache, page, inode); | 219 | CIFS_I(inode)->fscache, page, inode); |
223 | ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL); | 220 | ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL); |
224 | if (ret != 0) | 221 | if (ret != 0) |
@@ -230,7 +227,7 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode) | |||
230 | struct cifsInodeInfo *cifsi = CIFS_I(inode); | 227 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
231 | struct fscache_cookie *cookie = cifsi->fscache; | 228 | struct fscache_cookie *cookie = cifsi->fscache; |
232 | 229 | ||
233 | cFYI(1, "CIFS: fscache invalidatepage (0x%p/0x%p)", page, cookie); | 230 | cFYI(1, "%s: (0x%p/0x%p)", __func__, page, cookie); |
234 | fscache_wait_on_page_write(cookie, page); | 231 | fscache_wait_on_page_write(cookie, page); |
235 | fscache_uncache_page(cookie, page); | 232 | fscache_uncache_page(cookie, page); |
236 | } | 233 | } |
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 1525d5e662b6..1c5b770c3141 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c | |||
@@ -90,12 +90,10 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key) | |||
90 | sg_init_one(&sgout, out, 8); | 90 | sg_init_one(&sgout, out, 8); |
91 | 91 | ||
92 | rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8); | 92 | rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8); |
93 | if (rc) { | 93 | if (rc) |
94 | cERROR(1, "could not encrypt crypt key rc: %d\n", rc); | 94 | cERROR(1, "could not encrypt crypt key rc: %d\n", rc); |
95 | crypto_free_blkcipher(tfm_des); | ||
96 | goto smbhash_err; | ||
97 | } | ||
98 | 95 | ||
96 | crypto_free_blkcipher(tfm_des); | ||
99 | smbhash_err: | 97 | smbhash_err: |
100 | return rc; | 98 | return rc; |
101 | } | 99 | } |
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 6cbb3afb36dc..cb140ef293e4 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c | |||
@@ -43,8 +43,6 @@ const struct file_operations coda_ioctl_operations = { | |||
43 | /* the coda pioctl inode ops */ | 43 | /* the coda pioctl inode ops */ |
44 | static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags) | 44 | static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags) |
45 | { | 45 | { |
46 | if (flags & IPERM_FLAG_RCU) | ||
47 | return -ECHILD; | ||
48 | return (mask & MAY_EXEC) ? -EACCES : 0; | 46 | return (mask & MAY_EXEC) ? -EACCES : 0; |
49 | } | 47 | } |
50 | 48 | ||
diff --git a/fs/dcookies.c b/fs/dcookies.c index a21cabdbd87b..dda0dc702d1b 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c | |||
@@ -178,6 +178,8 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len) | |||
178 | /* FIXME: (deleted) ? */ | 178 | /* FIXME: (deleted) ? */ |
179 | path = d_path(&dcs->path, kbuf, PAGE_SIZE); | 179 | path = d_path(&dcs->path, kbuf, PAGE_SIZE); |
180 | 180 | ||
181 | mutex_unlock(&dcookie_mutex); | ||
182 | |||
181 | if (IS_ERR(path)) { | 183 | if (IS_ERR(path)) { |
182 | err = PTR_ERR(path); | 184 | err = PTR_ERR(path); |
183 | goto out_free; | 185 | goto out_free; |
@@ -194,6 +196,7 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len) | |||
194 | 196 | ||
195 | out_free: | 197 | out_free: |
196 | kfree(kbuf); | 198 | kfree(kbuf); |
199 | return err; | ||
197 | out: | 200 | out: |
198 | mutex_unlock(&dcookie_mutex); | 201 | mutex_unlock(&dcookie_mutex); |
199 | return err; | 202 | return err; |
@@ -1093,6 +1093,7 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
1093 | 1093 | ||
1094 | bprm->mm = NULL; /* We're using it now */ | 1094 | bprm->mm = NULL; /* We're using it now */ |
1095 | 1095 | ||
1096 | set_fs(USER_DS); | ||
1096 | current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); | 1097 | current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); |
1097 | flush_thread(); | 1098 | flush_thread(); |
1098 | current->personality &= ~bprm->per_clear; | 1099 | current->personality &= ~bprm->per_clear; |
@@ -1357,10 +1358,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
1357 | if (retval) | 1358 | if (retval) |
1358 | return retval; | 1359 | return retval; |
1359 | 1360 | ||
1360 | /* kernel module loader fixup */ | ||
1361 | /* so we don't try to load run modprobe in kernel space. */ | ||
1362 | set_fs(USER_DS); | ||
1363 | |||
1364 | retval = audit_bprm(bprm); | 1361 | retval = audit_bprm(bprm); |
1365 | if (retval) | 1362 | if (retval) |
1366 | return retval; | 1363 | return retval; |
@@ -1999,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file) | |||
1999 | * is a special value that we use to trap recursive | 1996 | * is a special value that we use to trap recursive |
2000 | * core dumps | 1997 | * core dumps |
2001 | */ | 1998 | */ |
2002 | static int umh_pipe_setup(struct subprocess_info *info) | 1999 | static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) |
2003 | { | 2000 | { |
2004 | struct file *rp, *wp; | 2001 | struct file *rp, *wp; |
2005 | struct fdtable *fdt; | 2002 | struct fdtable *fdt; |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 2e29abb30f76..095c36f3b612 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -125,7 +125,7 @@ struct ext4_ext_path { | |||
125 | * positive retcode - signal for ext4_ext_walk_space(), see below | 125 | * positive retcode - signal for ext4_ext_walk_space(), see below |
126 | * callback must return valid extent (passed or newly created) | 126 | * callback must return valid extent (passed or newly created) |
127 | */ | 127 | */ |
128 | typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | 128 | typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t, |
129 | struct ext4_ext_cache *, | 129 | struct ext4_ext_cache *, |
130 | struct ext4_extent *, void *); | 130 | struct ext4_extent *, void *); |
131 | 131 | ||
@@ -133,8 +133,11 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | |||
133 | #define EXT_BREAK 1 | 133 | #define EXT_BREAK 1 |
134 | #define EXT_REPEAT 2 | 134 | #define EXT_REPEAT 2 |
135 | 135 | ||
136 | /* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ | 136 | /* |
137 | #define EXT_MAX_BLOCK 0xffffffff | 137 | * Maximum number of logical blocks in a file; ext4_extent's ee_block is |
138 | * __le32. | ||
139 | */ | ||
140 | #define EXT_MAX_BLOCKS 0xffffffff | ||
138 | 141 | ||
139 | /* | 142 | /* |
140 | * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an | 143 | * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5199bac7fc62..f815cc81e7a2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1408,7 +1408,7 @@ got_index: | |||
1408 | 1408 | ||
1409 | /* | 1409 | /* |
1410 | * ext4_ext_next_allocated_block: | 1410 | * ext4_ext_next_allocated_block: |
1411 | * returns allocated block in subsequent extent or EXT_MAX_BLOCK. | 1411 | * returns allocated block in subsequent extent or EXT_MAX_BLOCKS. |
1412 | * NOTE: it considers block number from index entry as | 1412 | * NOTE: it considers block number from index entry as |
1413 | * allocated block. Thus, index entries have to be consistent | 1413 | * allocated block. Thus, index entries have to be consistent |
1414 | * with leaves. | 1414 | * with leaves. |
@@ -1422,7 +1422,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
1422 | depth = path->p_depth; | 1422 | depth = path->p_depth; |
1423 | 1423 | ||
1424 | if (depth == 0 && path->p_ext == NULL) | 1424 | if (depth == 0 && path->p_ext == NULL) |
1425 | return EXT_MAX_BLOCK; | 1425 | return EXT_MAX_BLOCKS; |
1426 | 1426 | ||
1427 | while (depth >= 0) { | 1427 | while (depth >= 0) { |
1428 | if (depth == path->p_depth) { | 1428 | if (depth == path->p_depth) { |
@@ -1439,12 +1439,12 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) | |||
1439 | depth--; | 1439 | depth--; |
1440 | } | 1440 | } |
1441 | 1441 | ||
1442 | return EXT_MAX_BLOCK; | 1442 | return EXT_MAX_BLOCKS; |
1443 | } | 1443 | } |
1444 | 1444 | ||
1445 | /* | 1445 | /* |
1446 | * ext4_ext_next_leaf_block: | 1446 | * ext4_ext_next_leaf_block: |
1447 | * returns first allocated block from next leaf or EXT_MAX_BLOCK | 1447 | * returns first allocated block from next leaf or EXT_MAX_BLOCKS |
1448 | */ | 1448 | */ |
1449 | static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | 1449 | static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, |
1450 | struct ext4_ext_path *path) | 1450 | struct ext4_ext_path *path) |
@@ -1456,7 +1456,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | |||
1456 | 1456 | ||
1457 | /* zero-tree has no leaf blocks at all */ | 1457 | /* zero-tree has no leaf blocks at all */ |
1458 | if (depth == 0) | 1458 | if (depth == 0) |
1459 | return EXT_MAX_BLOCK; | 1459 | return EXT_MAX_BLOCKS; |
1460 | 1460 | ||
1461 | /* go to index block */ | 1461 | /* go to index block */ |
1462 | depth--; | 1462 | depth--; |
@@ -1469,7 +1469,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, | |||
1469 | depth--; | 1469 | depth--; |
1470 | } | 1470 | } |
1471 | 1471 | ||
1472 | return EXT_MAX_BLOCK; | 1472 | return EXT_MAX_BLOCKS; |
1473 | } | 1473 | } |
1474 | 1474 | ||
1475 | /* | 1475 | /* |
@@ -1677,13 +1677,13 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
1677 | */ | 1677 | */ |
1678 | if (b2 < b1) { | 1678 | if (b2 < b1) { |
1679 | b2 = ext4_ext_next_allocated_block(path); | 1679 | b2 = ext4_ext_next_allocated_block(path); |
1680 | if (b2 == EXT_MAX_BLOCK) | 1680 | if (b2 == EXT_MAX_BLOCKS) |
1681 | goto out; | 1681 | goto out; |
1682 | } | 1682 | } |
1683 | 1683 | ||
1684 | /* check for wrap through zero on extent logical start block*/ | 1684 | /* check for wrap through zero on extent logical start block*/ |
1685 | if (b1 + len1 < b1) { | 1685 | if (b1 + len1 < b1) { |
1686 | len1 = EXT_MAX_BLOCK - b1; | 1686 | len1 = EXT_MAX_BLOCKS - b1; |
1687 | newext->ee_len = cpu_to_le16(len1); | 1687 | newext->ee_len = cpu_to_le16(len1); |
1688 | ret = 1; | 1688 | ret = 1; |
1689 | } | 1689 | } |
@@ -1767,7 +1767,7 @@ repeat: | |||
1767 | fex = EXT_LAST_EXTENT(eh); | 1767 | fex = EXT_LAST_EXTENT(eh); |
1768 | next = ext4_ext_next_leaf_block(inode, path); | 1768 | next = ext4_ext_next_leaf_block(inode, path); |
1769 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) | 1769 | if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) |
1770 | && next != EXT_MAX_BLOCK) { | 1770 | && next != EXT_MAX_BLOCKS) { |
1771 | ext_debug("next leaf block - %d\n", next); | 1771 | ext_debug("next leaf block - %d\n", next); |
1772 | BUG_ON(npath != NULL); | 1772 | BUG_ON(npath != NULL); |
1773 | npath = ext4_ext_find_extent(inode, next, NULL); | 1773 | npath = ext4_ext_find_extent(inode, next, NULL); |
@@ -1887,7 +1887,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1887 | BUG_ON(func == NULL); | 1887 | BUG_ON(func == NULL); |
1888 | BUG_ON(inode == NULL); | 1888 | BUG_ON(inode == NULL); |
1889 | 1889 | ||
1890 | while (block < last && block != EXT_MAX_BLOCK) { | 1890 | while (block < last && block != EXT_MAX_BLOCKS) { |
1891 | num = last - block; | 1891 | num = last - block; |
1892 | /* find extent for this block */ | 1892 | /* find extent for this block */ |
1893 | down_read(&EXT4_I(inode)->i_data_sem); | 1893 | down_read(&EXT4_I(inode)->i_data_sem); |
@@ -1958,7 +1958,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | |||
1958 | err = -EIO; | 1958 | err = -EIO; |
1959 | break; | 1959 | break; |
1960 | } | 1960 | } |
1961 | err = func(inode, path, &cbex, ex, cbdata); | 1961 | err = func(inode, next, &cbex, ex, cbdata); |
1962 | ext4_ext_drop_refs(path); | 1962 | ext4_ext_drop_refs(path); |
1963 | 1963 | ||
1964 | if (err < 0) | 1964 | if (err < 0) |
@@ -2020,7 +2020,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2020 | if (ex == NULL) { | 2020 | if (ex == NULL) { |
2021 | /* there is no extent yet, so gap is [0;-] */ | 2021 | /* there is no extent yet, so gap is [0;-] */ |
2022 | lblock = 0; | 2022 | lblock = 0; |
2023 | len = EXT_MAX_BLOCK; | 2023 | len = EXT_MAX_BLOCKS; |
2024 | ext_debug("cache gap(whole file):"); | 2024 | ext_debug("cache gap(whole file):"); |
2025 | } else if (block < le32_to_cpu(ex->ee_block)) { | 2025 | } else if (block < le32_to_cpu(ex->ee_block)) { |
2026 | lblock = block; | 2026 | lblock = block; |
@@ -2350,7 +2350,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2350 | * never happen because at least one of the end points | 2350 | * never happen because at least one of the end points |
2351 | * needs to be on the edge of the extent. | 2351 | * needs to be on the edge of the extent. |
2352 | */ | 2352 | */ |
2353 | if (end == EXT_MAX_BLOCK) { | 2353 | if (end == EXT_MAX_BLOCKS - 1) { |
2354 | ext_debug(" bad truncate %u:%u\n", | 2354 | ext_debug(" bad truncate %u:%u\n", |
2355 | start, end); | 2355 | start, end); |
2356 | block = 0; | 2356 | block = 0; |
@@ -2398,7 +2398,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2398 | * If this is a truncate, this condition | 2398 | * If this is a truncate, this condition |
2399 | * should never happen | 2399 | * should never happen |
2400 | */ | 2400 | */ |
2401 | if (end == EXT_MAX_BLOCK) { | 2401 | if (end == EXT_MAX_BLOCKS - 1) { |
2402 | ext_debug(" bad truncate %u:%u\n", | 2402 | ext_debug(" bad truncate %u:%u\n", |
2403 | start, end); | 2403 | start, end); |
2404 | err = -EIO; | 2404 | err = -EIO; |
@@ -2478,7 +2478,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2478 | * we need to remove it from the leaf | 2478 | * we need to remove it from the leaf |
2479 | */ | 2479 | */ |
2480 | if (num == 0) { | 2480 | if (num == 0) { |
2481 | if (end != EXT_MAX_BLOCK) { | 2481 | if (end != EXT_MAX_BLOCKS - 1) { |
2482 | /* | 2482 | /* |
2483 | * For hole punching, we need to scoot all the | 2483 | * For hole punching, we need to scoot all the |
2484 | * extents up when an extent is removed so that | 2484 | * extents up when an extent is removed so that |
@@ -3699,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
3699 | 3699 | ||
3700 | last_block = (inode->i_size + sb->s_blocksize - 1) | 3700 | last_block = (inode->i_size + sb->s_blocksize - 1) |
3701 | >> EXT4_BLOCK_SIZE_BITS(sb); | 3701 | >> EXT4_BLOCK_SIZE_BITS(sb); |
3702 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK); | 3702 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); |
3703 | 3703 | ||
3704 | /* In a multi-transaction truncate, we only make the final | 3704 | /* In a multi-transaction truncate, we only make the final |
3705 | * transaction synchronous. | 3705 | * transaction synchronous. |
@@ -3914,14 +3914,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
3914 | /* | 3914 | /* |
3915 | * Callback function called for each extent to gather FIEMAP information. | 3915 | * Callback function called for each extent to gather FIEMAP information. |
3916 | */ | 3916 | */ |
3917 | static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | 3917 | static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next, |
3918 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | 3918 | struct ext4_ext_cache *newex, struct ext4_extent *ex, |
3919 | void *data) | 3919 | void *data) |
3920 | { | 3920 | { |
3921 | __u64 logical; | 3921 | __u64 logical; |
3922 | __u64 physical; | 3922 | __u64 physical; |
3923 | __u64 length; | 3923 | __u64 length; |
3924 | loff_t size; | ||
3925 | __u32 flags = 0; | 3924 | __u32 flags = 0; |
3926 | int ret = 0; | 3925 | int ret = 0; |
3927 | struct fiemap_extent_info *fieinfo = data; | 3926 | struct fiemap_extent_info *fieinfo = data; |
@@ -4103,8 +4102,7 @@ found_delayed_extent: | |||
4103 | if (ex && ext4_ext_is_uninitialized(ex)) | 4102 | if (ex && ext4_ext_is_uninitialized(ex)) |
4104 | flags |= FIEMAP_EXTENT_UNWRITTEN; | 4103 | flags |= FIEMAP_EXTENT_UNWRITTEN; |
4105 | 4104 | ||
4106 | size = i_size_read(inode); | 4105 | if (next == EXT_MAX_BLOCKS) |
4107 | if (logical + length >= size) | ||
4108 | flags |= FIEMAP_EXTENT_LAST; | 4106 | flags |= FIEMAP_EXTENT_LAST; |
4109 | 4107 | ||
4110 | ret = fiemap_fill_next_extent(fieinfo, logical, physical, | 4108 | ret = fiemap_fill_next_extent(fieinfo, logical, physical, |
@@ -4347,8 +4345,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4347 | 4345 | ||
4348 | start_blk = start >> inode->i_sb->s_blocksize_bits; | 4346 | start_blk = start >> inode->i_sb->s_blocksize_bits; |
4349 | last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; | 4347 | last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; |
4350 | if (last_blk >= EXT_MAX_BLOCK) | 4348 | if (last_blk >= EXT_MAX_BLOCKS) |
4351 | last_blk = EXT_MAX_BLOCK-1; | 4349 | last_blk = EXT_MAX_BLOCKS-1; |
4352 | len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; | 4350 | len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; |
4353 | 4351 | ||
4354 | /* | 4352 | /* |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a5763e3505ba..e3126c051006 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2634,7 +2634,7 @@ static int ext4_writepage(struct page *page, | |||
2634 | struct buffer_head *page_bufs = NULL; | 2634 | struct buffer_head *page_bufs = NULL; |
2635 | struct inode *inode = page->mapping->host; | 2635 | struct inode *inode = page->mapping->host; |
2636 | 2636 | ||
2637 | trace_ext4_writepage(inode, page); | 2637 | trace_ext4_writepage(page); |
2638 | size = i_size_read(inode); | 2638 | size = i_size_read(inode); |
2639 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2639 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2640 | len = size & ~PAGE_CACHE_MASK; | 2640 | len = size & ~PAGE_CACHE_MASK; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 859f2ae8864e..6ed859d56850 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3578,8 +3578,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3578 | free += next - bit; | 3578 | free += next - bit; |
3579 | 3579 | ||
3580 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); | 3580 | trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); |
3581 | trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa, | 3581 | trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, |
3582 | grp_blk_start + bit, next - bit); | 3582 | next - bit); |
3583 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3583 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3584 | bit = next + 1; | 3584 | bit = next + 1; |
3585 | } | 3585 | } |
@@ -3608,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, | |||
3608 | ext4_group_t group; | 3608 | ext4_group_t group; |
3609 | ext4_grpblk_t bit; | 3609 | ext4_grpblk_t bit; |
3610 | 3610 | ||
3611 | trace_ext4_mb_release_group_pa(sb, pa); | 3611 | trace_ext4_mb_release_group_pa(pa); |
3612 | BUG_ON(pa->pa_deleted == 0); | 3612 | BUG_ON(pa->pa_deleted == 0); |
3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
@@ -4448,7 +4448,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4448 | * @inode: inode | 4448 | * @inode: inode |
4449 | * @block: start physical block to free | 4449 | * @block: start physical block to free |
4450 | * @count: number of blocks to count | 4450 | * @count: number of blocks to count |
4451 | * @metadata: Are these metadata blocks | 4451 | * @flags: flags used by ext4_free_blocks |
4452 | */ | 4452 | */ |
4453 | void ext4_free_blocks(handle_t *handle, struct inode *inode, | 4453 | void ext4_free_blocks(handle_t *handle, struct inode *inode, |
4454 | struct buffer_head *bh, ext4_fsblk_t block, | 4454 | struct buffer_head *bh, ext4_fsblk_t block, |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 2b8304bf3c50..f57455a1b1b2 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -1002,12 +1002,12 @@ mext_check_arguments(struct inode *orig_inode, | |||
1002 | return -EINVAL; | 1002 | return -EINVAL; |
1003 | } | 1003 | } |
1004 | 1004 | ||
1005 | if ((orig_start > EXT_MAX_BLOCK) || | 1005 | if ((orig_start >= EXT_MAX_BLOCKS) || |
1006 | (donor_start > EXT_MAX_BLOCK) || | 1006 | (donor_start >= EXT_MAX_BLOCKS) || |
1007 | (*len > EXT_MAX_BLOCK) || | 1007 | (*len > EXT_MAX_BLOCKS) || |
1008 | (orig_start + *len > EXT_MAX_BLOCK)) { | 1008 | (orig_start + *len >= EXT_MAX_BLOCKS)) { |
1009 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " | 1009 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
1010 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, | 1010 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, |
1011 | orig_inode->i_ino, donor_inode->i_ino); | 1011 | orig_inode->i_ino, donor_inode->i_ino); |
1012 | return -EINVAL; | 1012 | return -EINVAL; |
1013 | } | 1013 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cc5c157aa11d..9ea71aa864b3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -2243,6 +2243,12 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2243 | * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, | 2243 | * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, |
2244 | * so that won't be a limiting factor. | 2244 | * so that won't be a limiting factor. |
2245 | * | 2245 | * |
2246 | * However there is other limiting factor. We do store extents in the form | ||
2247 | * of starting block and length, hence the resulting length of the extent | ||
2248 | * covering maximum file size must fit into on-disk format containers as | ||
2249 | * well. Given that length is always by 1 unit bigger than max unit (because | ||
2250 | * we count 0 as well) we have to lower the s_maxbytes by one fs block. | ||
2251 | * | ||
2246 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. | 2252 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. |
2247 | */ | 2253 | */ |
2248 | static loff_t ext4_max_size(int blkbits, int has_huge_files) | 2254 | static loff_t ext4_max_size(int blkbits, int has_huge_files) |
@@ -2264,10 +2270,13 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) | |||
2264 | upper_limit <<= blkbits; | 2270 | upper_limit <<= blkbits; |
2265 | } | 2271 | } |
2266 | 2272 | ||
2267 | /* 32-bit extent-start container, ee_block */ | 2273 | /* |
2268 | res = 1LL << 32; | 2274 | * 32-bit extent-start container, ee_block. We lower the maxbytes |
2275 | * by one fs block, so ee_len can cover the extent of maximum file | ||
2276 | * size | ||
2277 | */ | ||
2278 | res = (1LL << 32) - 1; | ||
2269 | res <<= blkbits; | 2279 | res <<= blkbits; |
2270 | res -= 1; | ||
2271 | 2280 | ||
2272 | /* Sanity check against vm- & vfs- imposed limits */ | 2281 | /* Sanity check against vm- & vfs- imposed limits */ |
2273 | if (res > upper_limit) | 2282 | if (res > upper_limit) |
diff --git a/fs/fat/file.c b/fs/fat/file.c index 7257752b6d5d..7018e1d8902d 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -102,7 +102,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) | |||
102 | if (attr & ATTR_SYS) | 102 | if (attr & ATTR_SYS) |
103 | inode->i_flags |= S_IMMUTABLE; | 103 | inode->i_flags |= S_IMMUTABLE; |
104 | else | 104 | else |
105 | inode->i_flags &= S_IMMUTABLE; | 105 | inode->i_flags &= ~S_IMMUTABLE; |
106 | } | 106 | } |
107 | 107 | ||
108 | fat_save_attrs(inode, attr); | 108 | fat_save_attrs(inode, attr); |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cc6ec4b2f0ff..38f84cd48b67 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -921,6 +921,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
921 | if (sb->s_flags & MS_MANDLOCK) | 921 | if (sb->s_flags & MS_MANDLOCK) |
922 | goto err; | 922 | goto err; |
923 | 923 | ||
924 | sb->s_flags &= ~MS_NOSEC; | ||
925 | |||
924 | if (!parse_fuse_opt((char *) data, &d, is_bdev)) | 926 | if (!parse_fuse_opt((char *) data, &d, is_bdev)) |
925 | goto err; | 927 | goto err; |
926 | 928 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2792a790e50b..1c1336e7b3b2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -663,14 +663,19 @@ static void glock_work_func(struct work_struct *work) | |||
663 | drop_ref = 1; | 663 | drop_ref = 1; |
664 | } | 664 | } |
665 | spin_lock(&gl->gl_spin); | 665 | spin_lock(&gl->gl_spin); |
666 | if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && | 666 | if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && |
667 | gl->gl_state != LM_ST_UNLOCKED && | 667 | gl->gl_state != LM_ST_UNLOCKED && |
668 | gl->gl_demote_state != LM_ST_EXCLUSIVE) { | 668 | gl->gl_demote_state != LM_ST_EXCLUSIVE) { |
669 | unsigned long holdtime, now = jiffies; | 669 | unsigned long holdtime, now = jiffies; |
670 | |||
670 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 671 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
671 | if (time_before(now, holdtime)) | 672 | if (time_before(now, holdtime)) |
672 | delay = holdtime - now; | 673 | delay = holdtime - now; |
673 | set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); | 674 | |
675 | if (!delay) { | ||
676 | clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); | ||
677 | set_bit(GLF_DEMOTE, &gl->gl_flags); | ||
678 | } | ||
674 | } | 679 | } |
675 | run_queue(gl, 0); | 680 | run_queue(gl, 0); |
676 | spin_unlock(&gl->gl_spin); | 681 | spin_unlock(&gl->gl_spin); |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 3db5ba4568fc..b3cc8586984e 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -974,7 +974,7 @@ out_no_inode: | |||
974 | out_no_read: | 974 | out_no_read: |
975 | printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n", | 975 | printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n", |
976 | __func__, s->s_id, iso_blknum, block); | 976 | __func__, s->s_id, iso_blknum, block); |
977 | goto out_freesbi; | 977 | goto out_freebh; |
978 | out_bad_zone_size: | 978 | out_bad_zone_size: |
979 | printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n", | 979 | printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n", |
980 | sbi->s_log_zone_size); | 980 | sbi->s_log_zone_size); |
@@ -989,6 +989,7 @@ out_unknown_format: | |||
989 | 989 | ||
990 | out_freebh: | 990 | out_freebh: |
991 | brelse(bh); | 991 | brelse(bh); |
992 | brelse(pri_bh); | ||
992 | out_freesbi: | 993 | out_freesbi: |
993 | kfree(opt.iocharset); | 994 | kfree(opt.iocharset); |
994 | kfree(sbi); | 995 | kfree(sbi); |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6a79fd0a1a32..2c62c5aae82f 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
97 | 97 | ||
98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
100 | /* | ||
101 | * Get our reference so that bh cannot be freed before | ||
102 | * we unlock it | ||
103 | */ | ||
104 | get_bh(bh); | ||
100 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
101 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 106 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
102 | jbd_unlock_bh_state(bh); | 107 | jbd_unlock_bh_state(bh); |
103 | jbd2_journal_remove_journal_head(bh); | ||
104 | BUFFER_TRACE(bh, "release"); | 108 | BUFFER_TRACE(bh, "release"); |
105 | __brelse(bh); | 109 | __brelse(bh); |
106 | } else { | 110 | } else { |
@@ -223,8 +227,8 @@ restart: | |||
223 | spin_lock(&journal->j_list_lock); | 227 | spin_lock(&journal->j_list_lock); |
224 | goto restart; | 228 | goto restart; |
225 | } | 229 | } |
230 | get_bh(bh); | ||
226 | if (buffer_locked(bh)) { | 231 | if (buffer_locked(bh)) { |
227 | atomic_inc(&bh->b_count); | ||
228 | spin_unlock(&journal->j_list_lock); | 232 | spin_unlock(&journal->j_list_lock); |
229 | jbd_unlock_bh_state(bh); | 233 | jbd_unlock_bh_state(bh); |
230 | wait_on_buffer(bh); | 234 | wait_on_buffer(bh); |
@@ -243,7 +247,6 @@ restart: | |||
243 | */ | 247 | */ |
244 | released = __jbd2_journal_remove_checkpoint(jh); | 248 | released = __jbd2_journal_remove_checkpoint(jh); |
245 | jbd_unlock_bh_state(bh); | 249 | jbd_unlock_bh_state(bh); |
246 | jbd2_journal_remove_journal_head(bh); | ||
247 | __brelse(bh); | 250 | __brelse(bh); |
248 | } | 251 | } |
249 | 252 | ||
@@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
284 | int ret = 0; | 287 | int ret = 0; |
285 | 288 | ||
286 | if (buffer_locked(bh)) { | 289 | if (buffer_locked(bh)) { |
287 | atomic_inc(&bh->b_count); | 290 | get_bh(bh); |
288 | spin_unlock(&journal->j_list_lock); | 291 | spin_unlock(&journal->j_list_lock); |
289 | jbd_unlock_bh_state(bh); | 292 | jbd_unlock_bh_state(bh); |
290 | wait_on_buffer(bh); | 293 | wait_on_buffer(bh); |
@@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
316 | ret = 1; | 319 | ret = 1; |
317 | if (unlikely(buffer_write_io_error(bh))) | 320 | if (unlikely(buffer_write_io_error(bh))) |
318 | ret = -EIO; | 321 | ret = -EIO; |
322 | get_bh(bh); | ||
319 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 323 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
320 | BUFFER_TRACE(bh, "remove from checkpoint"); | 324 | BUFFER_TRACE(bh, "remove from checkpoint"); |
321 | __jbd2_journal_remove_checkpoint(jh); | 325 | __jbd2_journal_remove_checkpoint(jh); |
322 | spin_unlock(&journal->j_list_lock); | 326 | spin_unlock(&journal->j_list_lock); |
323 | jbd_unlock_bh_state(bh); | 327 | jbd_unlock_bh_state(bh); |
324 | jbd2_journal_remove_journal_head(bh); | ||
325 | __brelse(bh); | 328 | __brelse(bh); |
326 | } else { | 329 | } else { |
327 | /* | 330 | /* |
@@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
554 | /* | 557 | /* |
555 | * journal_clean_one_cp_list | 558 | * journal_clean_one_cp_list |
556 | * | 559 | * |
557 | * Find all the written-back checkpoint buffers in the given list and release them. | 560 | * Find all the written-back checkpoint buffers in the given list and |
561 | * release them. | ||
558 | * | 562 | * |
559 | * Called with the journal locked. | 563 | * Called with the journal locked. |
560 | * Called with j_list_lock held. | 564 | * Called with j_list_lock held. |
@@ -663,8 +667,8 @@ out: | |||
663 | * checkpoint lists. | 667 | * checkpoint lists. |
664 | * | 668 | * |
665 | * The function returns 1 if it frees the transaction, 0 otherwise. | 669 | * The function returns 1 if it frees the transaction, 0 otherwise. |
670 | * The function can free jh and bh. | ||
666 | * | 671 | * |
667 | * This function is called with the journal locked. | ||
668 | * This function is called with j_list_lock held. | 672 | * This function is called with j_list_lock held. |
669 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | 673 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) |
670 | */ | 674 | */ |
@@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
684 | } | 688 | } |
685 | journal = transaction->t_journal; | 689 | journal = transaction->t_journal; |
686 | 690 | ||
691 | JBUFFER_TRACE(jh, "removing from transaction"); | ||
687 | __buffer_unlink(jh); | 692 | __buffer_unlink(jh); |
688 | jh->b_cp_transaction = NULL; | 693 | jh->b_cp_transaction = NULL; |
694 | jbd2_journal_put_journal_head(jh); | ||
689 | 695 | ||
690 | if (transaction->t_checkpoint_list != NULL || | 696 | if (transaction->t_checkpoint_list != NULL || |
691 | transaction->t_checkpoint_io_list != NULL) | 697 | transaction->t_checkpoint_io_list != NULL) |
692 | goto out; | 698 | goto out; |
693 | JBUFFER_TRACE(jh, "transaction has no more buffers"); | ||
694 | 699 | ||
695 | /* | 700 | /* |
696 | * There is one special case to worry about: if we have just pulled the | 701 | * There is one special case to worry about: if we have just pulled the |
@@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
701 | * The locking here around t_state is a bit sleazy. | 706 | * The locking here around t_state is a bit sleazy. |
702 | * See the comment at the end of jbd2_journal_commit_transaction(). | 707 | * See the comment at the end of jbd2_journal_commit_transaction(). |
703 | */ | 708 | */ |
704 | if (transaction->t_state != T_FINISHED) { | 709 | if (transaction->t_state != T_FINISHED) |
705 | JBUFFER_TRACE(jh, "belongs to running/committing transaction"); | ||
706 | goto out; | 710 | goto out; |
707 | } | ||
708 | 711 | ||
709 | /* OK, that was the last buffer for the transaction: we can now | 712 | /* OK, that was the last buffer for the transaction: we can now |
710 | safely remove this transaction from the log */ | 713 | safely remove this transaction from the log */ |
@@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
723 | wake_up(&journal->j_wait_logspace); | 726 | wake_up(&journal->j_wait_logspace); |
724 | ret = 1; | 727 | ret = 1; |
725 | out: | 728 | out: |
726 | JBUFFER_TRACE(jh, "exit"); | ||
727 | return ret; | 729 | return ret; |
728 | } | 730 | } |
729 | 731 | ||
@@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh, | |||
742 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); | 744 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); |
743 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); | 745 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
744 | 746 | ||
747 | /* Get reference for checkpointing transaction */ | ||
748 | jbd2_journal_grab_journal_head(jh2bh(jh)); | ||
745 | jh->b_cp_transaction = transaction; | 749 | jh->b_cp_transaction = transaction; |
746 | 750 | ||
747 | if (!transaction->t_checkpoint_list) { | 751 | if (!transaction->t_checkpoint_list) { |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7f21cf3aaf92..eef6979821a4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -848,10 +848,16 @@ restart_loop: | |||
848 | while (commit_transaction->t_forget) { | 848 | while (commit_transaction->t_forget) { |
849 | transaction_t *cp_transaction; | 849 | transaction_t *cp_transaction; |
850 | struct buffer_head *bh; | 850 | struct buffer_head *bh; |
851 | int try_to_free = 0; | ||
851 | 852 | ||
852 | jh = commit_transaction->t_forget; | 853 | jh = commit_transaction->t_forget; |
853 | spin_unlock(&journal->j_list_lock); | 854 | spin_unlock(&journal->j_list_lock); |
854 | bh = jh2bh(jh); | 855 | bh = jh2bh(jh); |
856 | /* | ||
857 | * Get a reference so that bh cannot be freed before we are | ||
858 | * done with it. | ||
859 | */ | ||
860 | get_bh(bh); | ||
855 | jbd_lock_bh_state(bh); | 861 | jbd_lock_bh_state(bh); |
856 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); | 862 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); |
857 | 863 | ||
@@ -914,28 +920,27 @@ restart_loop: | |||
914 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); | 920 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); |
915 | if (is_journal_aborted(journal)) | 921 | if (is_journal_aborted(journal)) |
916 | clear_buffer_jbddirty(bh); | 922 | clear_buffer_jbddirty(bh); |
917 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | ||
918 | __jbd2_journal_refile_buffer(jh); | ||
919 | jbd_unlock_bh_state(bh); | ||
920 | } else { | 923 | } else { |
921 | J_ASSERT_BH(bh, !buffer_dirty(bh)); | 924 | J_ASSERT_BH(bh, !buffer_dirty(bh)); |
922 | /* The buffer on BJ_Forget list and not jbddirty means | 925 | /* |
926 | * The buffer on BJ_Forget list and not jbddirty means | ||
923 | * it has been freed by this transaction and hence it | 927 | * it has been freed by this transaction and hence it |
924 | * could not have been reallocated until this | 928 | * could not have been reallocated until this |
925 | * transaction has committed. *BUT* it could be | 929 | * transaction has committed. *BUT* it could be |
926 | * reallocated once we have written all the data to | 930 | * reallocated once we have written all the data to |
927 | * disk and before we process the buffer on BJ_Forget | 931 | * disk and before we process the buffer on BJ_Forget |
928 | * list. */ | 932 | * list. |
929 | JBUFFER_TRACE(jh, "refile or unfile freed buffer"); | 933 | */ |
930 | __jbd2_journal_refile_buffer(jh); | 934 | if (!jh->b_next_transaction) |
931 | if (!jh->b_transaction) { | 935 | try_to_free = 1; |
932 | jbd_unlock_bh_state(bh); | ||
933 | /* needs a brelse */ | ||
934 | jbd2_journal_remove_journal_head(bh); | ||
935 | release_buffer_page(bh); | ||
936 | } else | ||
937 | jbd_unlock_bh_state(bh); | ||
938 | } | 936 | } |
937 | JBUFFER_TRACE(jh, "refile or unfile buffer"); | ||
938 | __jbd2_journal_refile_buffer(jh); | ||
939 | jbd_unlock_bh_state(bh); | ||
940 | if (try_to_free) | ||
941 | release_buffer_page(bh); /* Drops bh reference */ | ||
942 | else | ||
943 | __brelse(bh); | ||
939 | cond_resched_lock(&journal->j_list_lock); | 944 | cond_resched_lock(&journal->j_list_lock); |
940 | } | 945 | } |
941 | spin_unlock(&journal->j_list_lock); | 946 | spin_unlock(&journal->j_list_lock); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9a7826990304..0dfa5b598e68 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
2078 | * When a buffer has its BH_JBD bit set it is immune from being released by | 2078 | * When a buffer has its BH_JBD bit set it is immune from being released by |
2079 | * core kernel code, mainly via ->b_count. | 2079 | * core kernel code, mainly via ->b_count. |
2080 | * | 2080 | * |
2081 | * A journal_head may be detached from its buffer_head when the journal_head's | 2081 | * A journal_head is detached from its buffer_head when the journal_head's |
2082 | * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. | 2082 | * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint |
2083 | * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the | 2083 | * transaction (b_cp_transaction) hold their references to b_jcount. |
2084 | * journal_head can be dropped if needed. | ||
2085 | * | 2084 | * |
2086 | * Various places in the kernel want to attach a journal_head to a buffer_head | 2085 | * Various places in the kernel want to attach a journal_head to a buffer_head |
2087 | * _before_ attaching the journal_head to a transaction. To protect the | 2086 | * _before_ attaching the journal_head to a transaction. To protect the |
@@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
2094 | * (Attach a journal_head if needed. Increments b_jcount) | 2093 | * (Attach a journal_head if needed. Increments b_jcount) |
2095 | * struct journal_head *jh = jbd2_journal_add_journal_head(bh); | 2094 | * struct journal_head *jh = jbd2_journal_add_journal_head(bh); |
2096 | * ... | 2095 | * ... |
2096 | * (Get another reference for transaction) | ||
2097 | * jbd2_journal_grab_journal_head(bh); | ||
2097 | * jh->b_transaction = xxx; | 2098 | * jh->b_transaction = xxx; |
2099 | * (Put original reference) | ||
2098 | * jbd2_journal_put_journal_head(jh); | 2100 | * jbd2_journal_put_journal_head(jh); |
2099 | * | ||
2100 | * Now, the journal_head's b_jcount is zero, but it is safe from being released | ||
2101 | * because it has a non-zero b_transaction. | ||
2102 | */ | 2101 | */ |
2103 | 2102 | ||
2104 | /* | 2103 | /* |
2105 | * Give a buffer_head a journal_head. | 2104 | * Give a buffer_head a journal_head. |
2106 | * | 2105 | * |
2107 | * Doesn't need the journal lock. | ||
2108 | * May sleep. | 2106 | * May sleep. |
2109 | */ | 2107 | */ |
2110 | struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) | 2108 | struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) |
@@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
2168 | struct journal_head *jh = bh2jh(bh); | 2166 | struct journal_head *jh = bh2jh(bh); |
2169 | 2167 | ||
2170 | J_ASSERT_JH(jh, jh->b_jcount >= 0); | 2168 | J_ASSERT_JH(jh, jh->b_jcount >= 0); |
2171 | 2169 | J_ASSERT_JH(jh, jh->b_transaction == NULL); | |
2172 | get_bh(bh); | 2170 | J_ASSERT_JH(jh, jh->b_next_transaction == NULL); |
2173 | if (jh->b_jcount == 0) { | 2171 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
2174 | if (jh->b_transaction == NULL && | 2172 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); |
2175 | jh->b_next_transaction == NULL && | 2173 | J_ASSERT_BH(bh, buffer_jbd(bh)); |
2176 | jh->b_cp_transaction == NULL) { | 2174 | J_ASSERT_BH(bh, jh2bh(jh) == bh); |
2177 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); | 2175 | BUFFER_TRACE(bh, "remove journal_head"); |
2178 | J_ASSERT_BH(bh, buffer_jbd(bh)); | 2176 | if (jh->b_frozen_data) { |
2179 | J_ASSERT_BH(bh, jh2bh(jh) == bh); | 2177 | printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); |
2180 | BUFFER_TRACE(bh, "remove journal_head"); | 2178 | jbd2_free(jh->b_frozen_data, bh->b_size); |
2181 | if (jh->b_frozen_data) { | ||
2182 | printk(KERN_WARNING "%s: freeing " | ||
2183 | "b_frozen_data\n", | ||
2184 | __func__); | ||
2185 | jbd2_free(jh->b_frozen_data, bh->b_size); | ||
2186 | } | ||
2187 | if (jh->b_committed_data) { | ||
2188 | printk(KERN_WARNING "%s: freeing " | ||
2189 | "b_committed_data\n", | ||
2190 | __func__); | ||
2191 | jbd2_free(jh->b_committed_data, bh->b_size); | ||
2192 | } | ||
2193 | bh->b_private = NULL; | ||
2194 | jh->b_bh = NULL; /* debug, really */ | ||
2195 | clear_buffer_jbd(bh); | ||
2196 | __brelse(bh); | ||
2197 | journal_free_journal_head(jh); | ||
2198 | } else { | ||
2199 | BUFFER_TRACE(bh, "journal_head was locked"); | ||
2200 | } | ||
2201 | } | 2179 | } |
2180 | if (jh->b_committed_data) { | ||
2181 | printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); | ||
2182 | jbd2_free(jh->b_committed_data, bh->b_size); | ||
2183 | } | ||
2184 | bh->b_private = NULL; | ||
2185 | jh->b_bh = NULL; /* debug, really */ | ||
2186 | clear_buffer_jbd(bh); | ||
2187 | journal_free_journal_head(jh); | ||
2202 | } | 2188 | } |
2203 | 2189 | ||
2204 | /* | 2190 | /* |
2205 | * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction | 2191 | * Drop a reference on the passed journal_head. If it fell to zero then |
2206 | * and has a zero b_jcount then remove and release its journal_head. If we did | ||
2207 | * see that the buffer is not used by any transaction we also "logically" | ||
2208 | * decrement ->b_count. | ||
2209 | * | ||
2210 | * We in fact take an additional increment on ->b_count as a convenience, | ||
2211 | * because the caller usually wants to do additional things with the bh | ||
2212 | * after calling here. | ||
2213 | * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some | ||
2214 | * time. Once the caller has run __brelse(), the buffer is eligible for | ||
2215 | * reaping by try_to_free_buffers(). | ||
2216 | */ | ||
2217 | void jbd2_journal_remove_journal_head(struct buffer_head *bh) | ||
2218 | { | ||
2219 | jbd_lock_bh_journal_head(bh); | ||
2220 | __journal_remove_journal_head(bh); | ||
2221 | jbd_unlock_bh_journal_head(bh); | ||
2222 | } | ||
2223 | |||
2224 | /* | ||
2225 | * Drop a reference on the passed journal_head. If it fell to zero then try to | ||
2226 | * release the journal_head from the buffer_head. | 2192 | * release the journal_head from the buffer_head. |
2227 | */ | 2193 | */ |
2228 | void jbd2_journal_put_journal_head(struct journal_head *jh) | 2194 | void jbd2_journal_put_journal_head(struct journal_head *jh) |
@@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) | |||
2232 | jbd_lock_bh_journal_head(bh); | 2198 | jbd_lock_bh_journal_head(bh); |
2233 | J_ASSERT_JH(jh, jh->b_jcount > 0); | 2199 | J_ASSERT_JH(jh, jh->b_jcount > 0); |
2234 | --jh->b_jcount; | 2200 | --jh->b_jcount; |
2235 | if (!jh->b_jcount && !jh->b_transaction) { | 2201 | if (!jh->b_jcount) { |
2236 | __journal_remove_journal_head(bh); | 2202 | __journal_remove_journal_head(bh); |
2203 | jbd_unlock_bh_journal_head(bh); | ||
2237 | __brelse(bh); | 2204 | __brelse(bh); |
2238 | } | 2205 | } else |
2239 | jbd_unlock_bh_journal_head(bh); | 2206 | jbd_unlock_bh_journal_head(bh); |
2240 | } | 2207 | } |
2241 | 2208 | ||
2242 | /* | 2209 | /* |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 3eec82d32fd4..2d7109414cdd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | 31 | ||
32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
33 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); | ||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * jbd2_get_transaction: obtain a new transaction_t object. | 36 | * jbd2_get_transaction: obtain a new transaction_t object. |
@@ -764,7 +765,6 @@ repeat: | |||
764 | if (!jh->b_transaction) { | 765 | if (!jh->b_transaction) { |
765 | JBUFFER_TRACE(jh, "no transaction"); | 766 | JBUFFER_TRACE(jh, "no transaction"); |
766 | J_ASSERT_JH(jh, !jh->b_next_transaction); | 767 | J_ASSERT_JH(jh, !jh->b_next_transaction); |
767 | jh->b_transaction = transaction; | ||
768 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 768 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
769 | spin_lock(&journal->j_list_lock); | 769 | spin_lock(&journal->j_list_lock); |
770 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); | 770 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); |
@@ -814,7 +814,6 @@ out: | |||
814 | * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. | 814 | * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. |
815 | * @handle: transaction to add buffer modifications to | 815 | * @handle: transaction to add buffer modifications to |
816 | * @bh: bh to be used for metadata writes | 816 | * @bh: bh to be used for metadata writes |
817 | * @credits: variable that will receive credits for the buffer | ||
818 | * | 817 | * |
819 | * Returns an error code or 0 on success. | 818 | * Returns an error code or 0 on success. |
820 | * | 819 | * |
@@ -896,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
896 | * committed and so it's safe to clear the dirty bit. | 895 | * committed and so it's safe to clear the dirty bit. |
897 | */ | 896 | */ |
898 | clear_buffer_dirty(jh2bh(jh)); | 897 | clear_buffer_dirty(jh2bh(jh)); |
899 | jh->b_transaction = transaction; | ||
900 | |||
901 | /* first access by this transaction */ | 898 | /* first access by this transaction */ |
902 | jh->b_modified = 0; | 899 | jh->b_modified = 0; |
903 | 900 | ||
@@ -932,7 +929,6 @@ out: | |||
932 | * non-rewindable consequences | 929 | * non-rewindable consequences |
933 | * @handle: transaction | 930 | * @handle: transaction |
934 | * @bh: buffer to undo | 931 | * @bh: buffer to undo |
935 | * @credits: store the number of taken credits here (if not NULL) | ||
936 | * | 932 | * |
937 | * Sometimes there is a need to distinguish between metadata which has | 933 | * Sometimes there is a need to distinguish between metadata which has |
938 | * been committed to disk and that which has not. The ext3fs code uses | 934 | * been committed to disk and that which has not. The ext3fs code uses |
@@ -1232,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1232 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); | 1228 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); |
1233 | } else { | 1229 | } else { |
1234 | __jbd2_journal_unfile_buffer(jh); | 1230 | __jbd2_journal_unfile_buffer(jh); |
1235 | jbd2_journal_remove_journal_head(bh); | ||
1236 | __brelse(bh); | ||
1237 | if (!buffer_jbd(bh)) { | 1231 | if (!buffer_jbd(bh)) { |
1238 | spin_unlock(&journal->j_list_lock); | 1232 | spin_unlock(&journal->j_list_lock); |
1239 | jbd_unlock_bh_state(bh); | 1233 | jbd_unlock_bh_state(bh); |
@@ -1556,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1556 | mark_buffer_dirty(bh); /* Expose it to the VM */ | 1550 | mark_buffer_dirty(bh); /* Expose it to the VM */ |
1557 | } | 1551 | } |
1558 | 1552 | ||
1559 | void __jbd2_journal_unfile_buffer(struct journal_head *jh) | 1553 | /* |
1554 | * Remove buffer from all transactions. | ||
1555 | * | ||
1556 | * Called with bh_state lock and j_list_lock | ||
1557 | * | ||
1558 | * jh and bh may be already freed when this function returns. | ||
1559 | */ | ||
1560 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh) | ||
1560 | { | 1561 | { |
1561 | __jbd2_journal_temp_unlink_buffer(jh); | 1562 | __jbd2_journal_temp_unlink_buffer(jh); |
1562 | jh->b_transaction = NULL; | 1563 | jh->b_transaction = NULL; |
1564 | jbd2_journal_put_journal_head(jh); | ||
1563 | } | 1565 | } |
1564 | 1566 | ||
1565 | void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) | 1567 | void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) |
1566 | { | 1568 | { |
1567 | jbd_lock_bh_state(jh2bh(jh)); | 1569 | struct buffer_head *bh = jh2bh(jh); |
1570 | |||
1571 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
1572 | get_bh(bh); | ||
1573 | jbd_lock_bh_state(bh); | ||
1568 | spin_lock(&journal->j_list_lock); | 1574 | spin_lock(&journal->j_list_lock); |
1569 | __jbd2_journal_unfile_buffer(jh); | 1575 | __jbd2_journal_unfile_buffer(jh); |
1570 | spin_unlock(&journal->j_list_lock); | 1576 | spin_unlock(&journal->j_list_lock); |
1571 | jbd_unlock_bh_state(jh2bh(jh)); | 1577 | jbd_unlock_bh_state(bh); |
1578 | __brelse(bh); | ||
1572 | } | 1579 | } |
1573 | 1580 | ||
1574 | /* | 1581 | /* |
@@ -1595,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1595 | if (jh->b_jlist == BJ_None) { | 1602 | if (jh->b_jlist == BJ_None) { |
1596 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1603 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1597 | __jbd2_journal_remove_checkpoint(jh); | 1604 | __jbd2_journal_remove_checkpoint(jh); |
1598 | jbd2_journal_remove_journal_head(bh); | ||
1599 | __brelse(bh); | ||
1600 | } | 1605 | } |
1601 | } | 1606 | } |
1602 | spin_unlock(&journal->j_list_lock); | 1607 | spin_unlock(&journal->j_list_lock); |
@@ -1659,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, | |||
1659 | /* | 1664 | /* |
1660 | * We take our own ref against the journal_head here to avoid | 1665 | * We take our own ref against the journal_head here to avoid |
1661 | * having to add tons of locking around each instance of | 1666 | * having to add tons of locking around each instance of |
1662 | * jbd2_journal_remove_journal_head() and | ||
1663 | * jbd2_journal_put_journal_head(). | 1667 | * jbd2_journal_put_journal_head(). |
1664 | */ | 1668 | */ |
1665 | jh = jbd2_journal_grab_journal_head(bh); | 1669 | jh = jbd2_journal_grab_journal_head(bh); |
@@ -1697,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1697 | int may_free = 1; | 1701 | int may_free = 1; |
1698 | struct buffer_head *bh = jh2bh(jh); | 1702 | struct buffer_head *bh = jh2bh(jh); |
1699 | 1703 | ||
1700 | __jbd2_journal_unfile_buffer(jh); | ||
1701 | |||
1702 | if (jh->b_cp_transaction) { | 1704 | if (jh->b_cp_transaction) { |
1703 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1705 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
1706 | __jbd2_journal_temp_unlink_buffer(jh); | ||
1704 | /* | 1707 | /* |
1705 | * We don't want to write the buffer anymore, clear the | 1708 | * We don't want to write the buffer anymore, clear the |
1706 | * bit so that we don't confuse checks in | 1709 | * bit so that we don't confuse checks in |
@@ -1711,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1711 | may_free = 0; | 1714 | may_free = 0; |
1712 | } else { | 1715 | } else { |
1713 | JBUFFER_TRACE(jh, "on running transaction"); | 1716 | JBUFFER_TRACE(jh, "on running transaction"); |
1714 | jbd2_journal_remove_journal_head(bh); | 1717 | __jbd2_journal_unfile_buffer(jh); |
1715 | __brelse(bh); | ||
1716 | } | 1718 | } |
1717 | return may_free; | 1719 | return may_free; |
1718 | } | 1720 | } |
@@ -1990,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
1990 | 1992 | ||
1991 | if (jh->b_transaction) | 1993 | if (jh->b_transaction) |
1992 | __jbd2_journal_temp_unlink_buffer(jh); | 1994 | __jbd2_journal_temp_unlink_buffer(jh); |
1995 | else | ||
1996 | jbd2_journal_grab_journal_head(bh); | ||
1993 | jh->b_transaction = transaction; | 1997 | jh->b_transaction = transaction; |
1994 | 1998 | ||
1995 | switch (jlist) { | 1999 | switch (jlist) { |
@@ -2041,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh, | |||
2041 | * already started to be used by a subsequent transaction, refile the | 2045 | * already started to be used by a subsequent transaction, refile the |
2042 | * buffer on that transaction's metadata list. | 2046 | * buffer on that transaction's metadata list. |
2043 | * | 2047 | * |
2044 | * Called under journal->j_list_lock | 2048 | * Called under j_list_lock |
2045 | * | ||
2046 | * Called under jbd_lock_bh_state(jh2bh(jh)) | 2049 | * Called under jbd_lock_bh_state(jh2bh(jh)) |
2050 | * | ||
2051 | * jh and bh may be already free when this function returns | ||
2047 | */ | 2052 | */ |
2048 | void __jbd2_journal_refile_buffer(struct journal_head *jh) | 2053 | void __jbd2_journal_refile_buffer(struct journal_head *jh) |
2049 | { | 2054 | { |
@@ -2067,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) | |||
2067 | 2072 | ||
2068 | was_dirty = test_clear_buffer_jbddirty(bh); | 2073 | was_dirty = test_clear_buffer_jbddirty(bh); |
2069 | __jbd2_journal_temp_unlink_buffer(jh); | 2074 | __jbd2_journal_temp_unlink_buffer(jh); |
2075 | /* | ||
2076 | * We set b_transaction here because b_next_transaction will inherit | ||
2077 | * our jh reference and thus __jbd2_journal_file_buffer() must not | ||
2078 | * take a new one. | ||
2079 | */ | ||
2070 | jh->b_transaction = jh->b_next_transaction; | 2080 | jh->b_transaction = jh->b_next_transaction; |
2071 | jh->b_next_transaction = NULL; | 2081 | jh->b_next_transaction = NULL; |
2072 | if (buffer_freed(bh)) | 2082 | if (buffer_freed(bh)) |
@@ -2083,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) | |||
2083 | } | 2093 | } |
2084 | 2094 | ||
2085 | /* | 2095 | /* |
2086 | * For the unlocked version of this call, also make sure that any | 2096 | * __jbd2_journal_refile_buffer() with necessary locking added. We take our |
2087 | * hanging journal_head is cleaned up if necessary. | 2097 | * bh reference so that we can safely unlock bh. |
2088 | * | 2098 | * |
2089 | * __jbd2_journal_refile_buffer is usually called as part of a single locked | 2099 | * The jh and bh may be freed by this call. |
2090 | * operation on a buffer_head, in which the caller is probably going to | ||
2091 | * be hooking the journal_head onto other lists. In that case it is up | ||
2092 | * to the caller to remove the journal_head if necessary. For the | ||
2093 | * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be | ||
2094 | * doing anything else to the buffer so we need to do the cleanup | ||
2095 | * ourselves to avoid a jh leak. | ||
2096 | * | ||
2097 | * *** The journal_head may be freed by this call! *** | ||
2098 | */ | 2100 | */ |
2099 | void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | 2101 | void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) |
2100 | { | 2102 | { |
2101 | struct buffer_head *bh = jh2bh(jh); | 2103 | struct buffer_head *bh = jh2bh(jh); |
2102 | 2104 | ||
2105 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
2106 | get_bh(bh); | ||
2103 | jbd_lock_bh_state(bh); | 2107 | jbd_lock_bh_state(bh); |
2104 | spin_lock(&journal->j_list_lock); | 2108 | spin_lock(&journal->j_list_lock); |
2105 | |||
2106 | __jbd2_journal_refile_buffer(jh); | 2109 | __jbd2_journal_refile_buffer(jh); |
2107 | jbd_unlock_bh_state(bh); | 2110 | jbd_unlock_bh_state(bh); |
2108 | jbd2_journal_remove_journal_head(bh); | ||
2109 | |||
2110 | spin_unlock(&journal->j_list_lock); | 2111 | spin_unlock(&journal->j_list_lock); |
2111 | __brelse(bh); | 2112 | __brelse(bh); |
2112 | } | 2113 | } |
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index c5ce6c1d1ff4..2f3f531f3606 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
@@ -66,9 +66,9 @@ static int jfs_open(struct inode *inode, struct file *file) | |||
66 | struct jfs_inode_info *ji = JFS_IP(inode); | 66 | struct jfs_inode_info *ji = JFS_IP(inode); |
67 | spin_lock_irq(&ji->ag_lock); | 67 | spin_lock_irq(&ji->ag_lock); |
68 | if (ji->active_ag == -1) { | 68 | if (ji->active_ag == -1) { |
69 | ji->active_ag = ji->agno; | 69 | struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); |
70 | atomic_inc( | 70 | ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); |
71 | &JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]); | 71 | atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); |
72 | } | 72 | } |
73 | spin_unlock_irq(&ji->ag_lock); | 73 | spin_unlock_irq(&ji->ag_lock); |
74 | } | 74 | } |
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index ed53a4740168..b78b2f978f04 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -397,7 +397,7 @@ int diRead(struct inode *ip) | |||
397 | release_metapage(mp); | 397 | release_metapage(mp); |
398 | 398 | ||
399 | /* set the ag for the inode */ | 399 | /* set the ag for the inode */ |
400 | JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); | 400 | JFS_IP(ip)->agstart = agstart; |
401 | JFS_IP(ip)->active_ag = -1; | 401 | JFS_IP(ip)->active_ag = -1; |
402 | 402 | ||
403 | return (rc); | 403 | return (rc); |
@@ -901,7 +901,7 @@ int diFree(struct inode *ip) | |||
901 | 901 | ||
902 | /* get the allocation group for this ino. | 902 | /* get the allocation group for this ino. |
903 | */ | 903 | */ |
904 | agno = JFS_IP(ip)->agno; | 904 | agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb)); |
905 | 905 | ||
906 | /* Lock the AG specific inode map information | 906 | /* Lock the AG specific inode map information |
907 | */ | 907 | */ |
@@ -1315,12 +1315,11 @@ int diFree(struct inode *ip) | |||
1315 | static inline void | 1315 | static inline void |
1316 | diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) | 1316 | diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) |
1317 | { | 1317 | { |
1318 | struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); | ||
1319 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | 1318 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); |
1320 | 1319 | ||
1321 | ip->i_ino = (iagno << L2INOSPERIAG) + ino; | 1320 | ip->i_ino = (iagno << L2INOSPERIAG) + ino; |
1322 | jfs_ip->ixpxd = iagp->inoext[extno]; | 1321 | jfs_ip->ixpxd = iagp->inoext[extno]; |
1323 | jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); | 1322 | jfs_ip->agstart = le64_to_cpu(iagp->agstart); |
1324 | jfs_ip->active_ag = -1; | 1323 | jfs_ip->active_ag = -1; |
1325 | } | 1324 | } |
1326 | 1325 | ||
@@ -1379,7 +1378,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
1379 | */ | 1378 | */ |
1380 | 1379 | ||
1381 | /* get the ag number of this iag */ | 1380 | /* get the ag number of this iag */ |
1382 | agno = JFS_IP(pip)->agno; | 1381 | agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); |
1383 | 1382 | ||
1384 | if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { | 1383 | if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { |
1385 | /* | 1384 | /* |
@@ -2921,10 +2920,9 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
2921 | continue; | 2920 | continue; |
2922 | } | 2921 | } |
2923 | 2922 | ||
2924 | /* agstart that computes to the same ag is treated as same; */ | ||
2925 | agstart = le64_to_cpu(iagp->agstart); | 2923 | agstart = le64_to_cpu(iagp->agstart); |
2926 | /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */ | ||
2927 | n = agstart >> mp->db_agl2size; | 2924 | n = agstart >> mp->db_agl2size; |
2925 | iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size); | ||
2928 | 2926 | ||
2929 | /* compute backed inodes */ | 2927 | /* compute backed inodes */ |
2930 | numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) | 2928 | numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) |
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 1439f119ec83..584a4a1a6e81 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h | |||
@@ -50,8 +50,9 @@ struct jfs_inode_info { | |||
50 | short btindex; /* btpage entry index*/ | 50 | short btindex; /* btpage entry index*/ |
51 | struct inode *ipimap; /* inode map */ | 51 | struct inode *ipimap; /* inode map */ |
52 | unsigned long cflag; /* commit flags */ | 52 | unsigned long cflag; /* commit flags */ |
53 | u64 agstart; /* agstart of the containing IAG */ | ||
53 | u16 bxflag; /* xflag of pseudo buffer? */ | 54 | u16 bxflag; /* xflag of pseudo buffer? */ |
54 | unchar agno; /* ag number */ | 55 | unchar pad; |
55 | signed char active_ag; /* ag currently allocating from */ | 56 | signed char active_ag; /* ag currently allocating from */ |
56 | lid_t blid; /* lid of pseudo buffer? */ | 57 | lid_t blid; /* lid of pseudo buffer? */ |
57 | lid_t atlhead; /* anonymous tlock list head */ | 58 | lid_t atlhead; /* anonymous tlock list head */ |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 278e3fb40b71..583636f745e5 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
@@ -1123,7 +1123,7 @@ int lmLogOpen(struct super_block *sb) | |||
1123 | bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, | 1123 | bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, |
1124 | log); | 1124 | log); |
1125 | if (IS_ERR(bdev)) { | 1125 | if (IS_ERR(bdev)) { |
1126 | rc = -PTR_ERR(bdev); | 1126 | rc = PTR_ERR(bdev); |
1127 | goto free; | 1127 | goto free; |
1128 | } | 1128 | } |
1129 | 1129 | ||
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 8ea5efb5a34e..8d0c1c7c0820 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c | |||
@@ -80,7 +80,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
80 | int log_formatted = 0; | 80 | int log_formatted = 0; |
81 | struct inode *iplist[1]; | 81 | struct inode *iplist[1]; |
82 | struct jfs_superblock *j_sb, *j_sb2; | 82 | struct jfs_superblock *j_sb, *j_sb2; |
83 | uint old_agsize; | 83 | s64 old_agsize; |
84 | int agsizechanged = 0; | 84 | int agsizechanged = 0; |
85 | struct buffer_head *bh, *bh2; | 85 | struct buffer_head *bh, *bh2; |
86 | 86 | ||
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index adb45ec9038c..e374050a911c 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -708,7 +708,13 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) | |||
708 | 708 | ||
709 | if (task->tk_status < 0) { | 709 | if (task->tk_status < 0) { |
710 | dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status); | 710 | dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status); |
711 | goto retry_rebind; | 711 | switch (task->tk_status) { |
712 | case -EACCES: | ||
713 | case -EIO: | ||
714 | goto die; | ||
715 | default: | ||
716 | goto retry_rebind; | ||
717 | } | ||
712 | } | 718 | } |
713 | if (status == NLM_LCK_DENIED_GRACE_PERIOD) { | 719 | if (status == NLM_LCK_DENIED_GRACE_PERIOD) { |
714 | rpc_delay(task, NLMCLNT_GRACE_WAIT); | 720 | rpc_delay(task, NLMCLNT_GRACE_WAIT); |
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 9ed89d1663f8..1afae26cf236 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -555,13 +555,6 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry, | |||
555 | return __logfs_create(dir, dentry, inode, target, destlen); | 555 | return __logfs_create(dir, dentry, inode, target, destlen); |
556 | } | 556 | } |
557 | 557 | ||
558 | static int logfs_permission(struct inode *inode, int mask, unsigned int flags) | ||
559 | { | ||
560 | if (flags & IPERM_FLAG_RCU) | ||
561 | return -ECHILD; | ||
562 | return generic_permission(inode, mask, flags, NULL); | ||
563 | } | ||
564 | |||
565 | static int logfs_link(struct dentry *old_dentry, struct inode *dir, | 558 | static int logfs_link(struct dentry *old_dentry, struct inode *dir, |
566 | struct dentry *dentry) | 559 | struct dentry *dentry) |
567 | { | 560 | { |
@@ -820,7 +813,6 @@ const struct inode_operations logfs_dir_iops = { | |||
820 | .mknod = logfs_mknod, | 813 | .mknod = logfs_mknod, |
821 | .rename = logfs_rename, | 814 | .rename = logfs_rename, |
822 | .rmdir = logfs_rmdir, | 815 | .rmdir = logfs_rmdir, |
823 | .permission = logfs_permission, | ||
824 | .symlink = logfs_symlink, | 816 | .symlink = logfs_symlink, |
825 | .unlink = logfs_unlink, | 817 | .unlink = logfs_unlink, |
826 | }; | 818 | }; |
diff --git a/fs/namei.c b/fs/namei.c index e2e4e8d032ee..0223c41fb114 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -238,7 +238,8 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags, | |||
238 | 238 | ||
239 | /* | 239 | /* |
240 | * Read/write DACs are always overridable. | 240 | * Read/write DACs are always overridable. |
241 | * Executable DACs are overridable if at least one exec bit is set. | 241 | * Executable DACs are overridable for all directories and |
242 | * for non-directories that have least one exec bit set. | ||
242 | */ | 243 | */ |
243 | if (!(mask & MAY_EXEC) || execute_ok(inode)) | 244 | if (!(mask & MAY_EXEC) || execute_ok(inode)) |
244 | if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) | 245 | if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) |
@@ -812,6 +813,11 @@ static int follow_automount(struct path *path, unsigned flags, | |||
812 | if (!mnt) /* mount collision */ | 813 | if (!mnt) /* mount collision */ |
813 | return 0; | 814 | return 0; |
814 | 815 | ||
816 | if (!*need_mntput) { | ||
817 | /* lock_mount() may release path->mnt on error */ | ||
818 | mntget(path->mnt); | ||
819 | *need_mntput = true; | ||
820 | } | ||
815 | err = finish_automount(mnt, path); | 821 | err = finish_automount(mnt, path); |
816 | 822 | ||
817 | switch (err) { | 823 | switch (err) { |
@@ -819,12 +825,9 @@ static int follow_automount(struct path *path, unsigned flags, | |||
819 | /* Someone else made a mount here whilst we were busy */ | 825 | /* Someone else made a mount here whilst we were busy */ |
820 | return 0; | 826 | return 0; |
821 | case 0: | 827 | case 0: |
822 | dput(path->dentry); | 828 | path_put(path); |
823 | if (*need_mntput) | ||
824 | mntput(path->mnt); | ||
825 | path->mnt = mnt; | 829 | path->mnt = mnt; |
826 | path->dentry = dget(mnt->mnt_root); | 830 | path->dentry = dget(mnt->mnt_root); |
827 | *need_mntput = true; | ||
828 | return 0; | 831 | return 0; |
829 | default: | 832 | default: |
830 | return err; | 833 | return err; |
@@ -844,9 +847,10 @@ static int follow_automount(struct path *path, unsigned flags, | |||
844 | */ | 847 | */ |
845 | static int follow_managed(struct path *path, unsigned flags) | 848 | static int follow_managed(struct path *path, unsigned flags) |
846 | { | 849 | { |
850 | struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */ | ||
847 | unsigned managed; | 851 | unsigned managed; |
848 | bool need_mntput = false; | 852 | bool need_mntput = false; |
849 | int ret; | 853 | int ret = 0; |
850 | 854 | ||
851 | /* Given that we're not holding a lock here, we retain the value in a | 855 | /* Given that we're not holding a lock here, we retain the value in a |
852 | * local variable for each dentry as we look at it so that we don't see | 856 | * local variable for each dentry as we look at it so that we don't see |
@@ -861,7 +865,7 @@ static int follow_managed(struct path *path, unsigned flags) | |||
861 | BUG_ON(!path->dentry->d_op->d_manage); | 865 | BUG_ON(!path->dentry->d_op->d_manage); |
862 | ret = path->dentry->d_op->d_manage(path->dentry, false); | 866 | ret = path->dentry->d_op->d_manage(path->dentry, false); |
863 | if (ret < 0) | 867 | if (ret < 0) |
864 | return ret == -EISDIR ? 0 : ret; | 868 | break; |
865 | } | 869 | } |
866 | 870 | ||
867 | /* Transit to a mounted filesystem. */ | 871 | /* Transit to a mounted filesystem. */ |
@@ -887,14 +891,19 @@ static int follow_managed(struct path *path, unsigned flags) | |||
887 | if (managed & DCACHE_NEED_AUTOMOUNT) { | 891 | if (managed & DCACHE_NEED_AUTOMOUNT) { |
888 | ret = follow_automount(path, flags, &need_mntput); | 892 | ret = follow_automount(path, flags, &need_mntput); |
889 | if (ret < 0) | 893 | if (ret < 0) |
890 | return ret == -EISDIR ? 0 : ret; | 894 | break; |
891 | continue; | 895 | continue; |
892 | } | 896 | } |
893 | 897 | ||
894 | /* We didn't change the current path point */ | 898 | /* We didn't change the current path point */ |
895 | break; | 899 | break; |
896 | } | 900 | } |
897 | return 0; | 901 | |
902 | if (need_mntput && path->mnt == mnt) | ||
903 | mntput(path->mnt); | ||
904 | if (ret == -EISDIR) | ||
905 | ret = 0; | ||
906 | return ret; | ||
898 | } | 907 | } |
899 | 908 | ||
900 | int follow_down_one(struct path *path) | 909 | int follow_down_one(struct path *path) |
@@ -1003,9 +1012,6 @@ failed: | |||
1003 | * Follow down to the covering mount currently visible to userspace. At each | 1012 | * Follow down to the covering mount currently visible to userspace. At each |
1004 | * point, the filesystem owning that dentry may be queried as to whether the | 1013 | * point, the filesystem owning that dentry may be queried as to whether the |
1005 | * caller is permitted to proceed or not. | 1014 | * caller is permitted to proceed or not. |
1006 | * | ||
1007 | * Care must be taken as namespace_sem may be held (indicated by mounting_here | ||
1008 | * being true). | ||
1009 | */ | 1015 | */ |
1010 | int follow_down(struct path *path) | 1016 | int follow_down(struct path *path) |
1011 | { | 1017 | { |
@@ -2624,6 +2630,10 @@ static long do_rmdir(int dfd, const char __user *pathname) | |||
2624 | error = PTR_ERR(dentry); | 2630 | error = PTR_ERR(dentry); |
2625 | if (IS_ERR(dentry)) | 2631 | if (IS_ERR(dentry)) |
2626 | goto exit2; | 2632 | goto exit2; |
2633 | if (!dentry->d_inode) { | ||
2634 | error = -ENOENT; | ||
2635 | goto exit3; | ||
2636 | } | ||
2627 | error = mnt_want_write(nd.path.mnt); | 2637 | error = mnt_want_write(nd.path.mnt); |
2628 | if (error) | 2638 | if (error) |
2629 | goto exit3; | 2639 | goto exit3; |
@@ -2712,8 +2722,9 @@ static long do_unlinkat(int dfd, const char __user *pathname) | |||
2712 | if (nd.last.name[nd.last.len]) | 2722 | if (nd.last.name[nd.last.len]) |
2713 | goto slashes; | 2723 | goto slashes; |
2714 | inode = dentry->d_inode; | 2724 | inode = dentry->d_inode; |
2715 | if (inode) | 2725 | if (!inode) |
2716 | ihold(inode); | 2726 | goto slashes; |
2727 | ihold(inode); | ||
2717 | error = mnt_want_write(nd.path.mnt); | 2728 | error = mnt_want_write(nd.path.mnt); |
2718 | if (error) | 2729 | if (error) |
2719 | goto exit2; | 2730 | goto exit2; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 144f2a3c7185..6f4850deb272 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -256,7 +256,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
256 | 256 | ||
257 | nfs_attr_check_mountpoint(sb, fattr); | 257 | nfs_attr_check_mountpoint(sb, fattr); |
258 | 258 | ||
259 | if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) | 259 | if (((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) && |
260 | !nfs_attr_use_mounted_on_fileid(fattr)) | ||
260 | goto out_no_inode; | 261 | goto out_no_inode; |
261 | if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) | 262 | if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) |
262 | goto out_no_inode; | 263 | goto out_no_inode; |
@@ -1294,7 +1295,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1294 | if (new_isize != cur_isize) { | 1295 | if (new_isize != cur_isize) { |
1295 | /* Do we perhaps have any outstanding writes, or has | 1296 | /* Do we perhaps have any outstanding writes, or has |
1296 | * the file grown beyond our last write? */ | 1297 | * the file grown beyond our last write? */ |
1297 | if (nfsi->npages == 0 || new_isize > cur_isize) { | 1298 | if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) || |
1299 | new_isize > cur_isize) { | ||
1298 | i_size_write(inode, new_isize); | 1300 | i_size_write(inode, new_isize); |
1299 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | 1301 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; |
1300 | } | 1302 | } |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b9056cbe68d6..2a55347a2daa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -45,6 +45,17 @@ static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct | |||
45 | fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT; | 45 | fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT; |
46 | } | 46 | } |
47 | 47 | ||
48 | static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) | ||
49 | { | ||
50 | if (((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) == 0) || | ||
51 | (((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) && | ||
52 | ((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0))) | ||
53 | return 0; | ||
54 | |||
55 | fattr->fileid = fattr->mounted_on_fileid; | ||
56 | return 1; | ||
57 | } | ||
58 | |||
48 | struct nfs_clone_mount { | 59 | struct nfs_clone_mount { |
49 | const struct super_block *sb; | 60 | const struct super_block *sb; |
50 | const struct dentry *dentry; | 61 | const struct dentry *dentry; |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 426908809c97..0bafcc91c27f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -30,6 +30,7 @@ | |||
30 | */ | 30 | */ |
31 | 31 | ||
32 | #include <linux/nfs_fs.h> | 32 | #include <linux/nfs_fs.h> |
33 | #include <linux/nfs_page.h> | ||
33 | 34 | ||
34 | #include "internal.h" | 35 | #include "internal.h" |
35 | #include "nfs4filelayout.h" | 36 | #include "nfs4filelayout.h" |
@@ -552,13 +553,18 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
552 | __func__, nfl_util, fl->num_fh, fl->first_stripe_index, | 553 | __func__, nfl_util, fl->num_fh, fl->first_stripe_index, |
553 | fl->pattern_offset); | 554 | fl->pattern_offset); |
554 | 555 | ||
555 | if (!fl->num_fh) | 556 | /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. |
557 | * Futher checking is done in filelayout_check_layout */ | ||
558 | if (fl->num_fh < 0 || fl->num_fh > | ||
559 | max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) | ||
556 | goto out_err; | 560 | goto out_err; |
557 | 561 | ||
558 | fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), | 562 | if (fl->num_fh > 0) { |
559 | gfp_flags); | 563 | fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), |
560 | if (!fl->fh_array) | 564 | gfp_flags); |
561 | goto out_err; | 565 | if (!fl->fh_array) |
566 | goto out_err; | ||
567 | } | ||
562 | 568 | ||
563 | for (i = 0; i < fl->num_fh; i++) { | 569 | for (i = 0; i < fl->num_fh; i++) { |
564 | /* Do we want to use a mempool here? */ | 570 | /* Do we want to use a mempool here? */ |
@@ -661,8 +667,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
661 | u64 p_stripe, r_stripe; | 667 | u64 p_stripe, r_stripe; |
662 | u32 stripe_unit; | 668 | u32 stripe_unit; |
663 | 669 | ||
664 | if (!pnfs_generic_pg_test(pgio, prev, req)) | 670 | if (!pnfs_generic_pg_test(pgio, prev, req) || |
665 | return 0; | 671 | !nfs_generic_pg_test(pgio, prev, req)) |
672 | return false; | ||
666 | 673 | ||
667 | if (!pgio->pg_lseg) | 674 | if (!pgio->pg_lseg) |
668 | return 1; | 675 | return 1; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d2c4b59c896d..5879b23e0c99 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -2265,12 +2265,14 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2265 | return nfs4_map_errors(status); | 2265 | return nfs4_map_errors(status); |
2266 | } | 2266 | } |
2267 | 2267 | ||
2268 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); | ||
2268 | /* | 2269 | /* |
2269 | * Get locations and (maybe) other attributes of a referral. | 2270 | * Get locations and (maybe) other attributes of a referral. |
2270 | * Note that we'll actually follow the referral later when | 2271 | * Note that we'll actually follow the referral later when |
2271 | * we detect fsid mismatch in inode revalidation | 2272 | * we detect fsid mismatch in inode revalidation |
2272 | */ | 2273 | */ |
2273 | static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle) | 2274 | static int nfs4_get_referral(struct inode *dir, const struct qstr *name, |
2275 | struct nfs_fattr *fattr, struct nfs_fh *fhandle) | ||
2274 | { | 2276 | { |
2275 | int status = -ENOMEM; | 2277 | int status = -ENOMEM; |
2276 | struct page *page = NULL; | 2278 | struct page *page = NULL; |
@@ -2288,15 +2290,16 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct | |||
2288 | goto out; | 2290 | goto out; |
2289 | /* Make sure server returned a different fsid for the referral */ | 2291 | /* Make sure server returned a different fsid for the referral */ |
2290 | if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { | 2292 | if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { |
2291 | dprintk("%s: server did not return a different fsid for a referral at %s\n", __func__, name->name); | 2293 | dprintk("%s: server did not return a different fsid for" |
2294 | " a referral at %s\n", __func__, name->name); | ||
2292 | status = -EIO; | 2295 | status = -EIO; |
2293 | goto out; | 2296 | goto out; |
2294 | } | 2297 | } |
2298 | /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */ | ||
2299 | nfs_fixup_referral_attributes(&locations->fattr); | ||
2295 | 2300 | ||
2301 | /* replace the lookup nfs_fattr with the locations nfs_fattr */ | ||
2296 | memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); | 2302 | memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); |
2297 | fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL; | ||
2298 | if (!fattr->mode) | ||
2299 | fattr->mode = S_IFDIR; | ||
2300 | memset(fhandle, 0, sizeof(struct nfs_fh)); | 2303 | memset(fhandle, 0, sizeof(struct nfs_fh)); |
2301 | out: | 2304 | out: |
2302 | if (page) | 2305 | if (page) |
@@ -4667,11 +4670,15 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list, | |||
4667 | return len; | 4670 | return len; |
4668 | } | 4671 | } |
4669 | 4672 | ||
4673 | /* | ||
4674 | * nfs_fhget will use either the mounted_on_fileid or the fileid | ||
4675 | */ | ||
4670 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) | 4676 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) |
4671 | { | 4677 | { |
4672 | if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) && | 4678 | if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) || |
4673 | (fattr->valid & NFS_ATTR_FATTR_FSID) && | 4679 | (fattr->valid & NFS_ATTR_FATTR_FILEID)) && |
4674 | (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) | 4680 | (fattr->valid & NFS_ATTR_FATTR_FSID) && |
4681 | (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) | ||
4675 | return; | 4682 | return; |
4676 | 4683 | ||
4677 | fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | | 4684 | fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | |
@@ -4686,7 +4693,6 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, | |||
4686 | struct nfs_server *server = NFS_SERVER(dir); | 4693 | struct nfs_server *server = NFS_SERVER(dir); |
4687 | u32 bitmask[2] = { | 4694 | u32 bitmask[2] = { |
4688 | [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, | 4695 | [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, |
4689 | [1] = FATTR4_WORD1_MOUNTED_ON_FILEID, | ||
4690 | }; | 4696 | }; |
4691 | struct nfs4_fs_locations_arg args = { | 4697 | struct nfs4_fs_locations_arg args = { |
4692 | .dir_fh = NFS_FH(dir), | 4698 | .dir_fh = NFS_FH(dir), |
@@ -4705,11 +4711,18 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, | |||
4705 | int status; | 4711 | int status; |
4706 | 4712 | ||
4707 | dprintk("%s: start\n", __func__); | 4713 | dprintk("%s: start\n", __func__); |
4714 | |||
4715 | /* Ask for the fileid of the absent filesystem if mounted_on_fileid | ||
4716 | * is not supported */ | ||
4717 | if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) | ||
4718 | bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; | ||
4719 | else | ||
4720 | bitmask[0] |= FATTR4_WORD0_FILEID; | ||
4721 | |||
4708 | nfs_fattr_init(&fs_locations->fattr); | 4722 | nfs_fattr_init(&fs_locations->fattr); |
4709 | fs_locations->server = server; | 4723 | fs_locations->server = server; |
4710 | fs_locations->nlocations = 0; | 4724 | fs_locations->nlocations = 0; |
4711 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | 4725 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
4712 | nfs_fixup_referral_attributes(&fs_locations->fattr); | ||
4713 | dprintk("%s: returned status = %d\n", __func__, status); | 4726 | dprintk("%s: returned status = %d\n", __func__, status); |
4714 | return status; | 4727 | return status; |
4715 | } | 4728 | } |
@@ -5098,7 +5111,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) | |||
5098 | if (mxresp_sz == 0) | 5111 | if (mxresp_sz == 0) |
5099 | mxresp_sz = NFS_MAX_FILE_IO_SIZE; | 5112 | mxresp_sz = NFS_MAX_FILE_IO_SIZE; |
5100 | /* Fore channel attributes */ | 5113 | /* Fore channel attributes */ |
5101 | args->fc_attrs.headerpadsz = 0; | ||
5102 | args->fc_attrs.max_rqst_sz = mxrqst_sz; | 5114 | args->fc_attrs.max_rqst_sz = mxrqst_sz; |
5103 | args->fc_attrs.max_resp_sz = mxresp_sz; | 5115 | args->fc_attrs.max_resp_sz = mxresp_sz; |
5104 | args->fc_attrs.max_ops = NFS4_MAX_OPS; | 5116 | args->fc_attrs.max_ops = NFS4_MAX_OPS; |
@@ -5111,7 +5123,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) | |||
5111 | args->fc_attrs.max_ops, args->fc_attrs.max_reqs); | 5123 | args->fc_attrs.max_ops, args->fc_attrs.max_reqs); |
5112 | 5124 | ||
5113 | /* Back channel attributes */ | 5125 | /* Back channel attributes */ |
5114 | args->bc_attrs.headerpadsz = 0; | ||
5115 | args->bc_attrs.max_rqst_sz = PAGE_SIZE; | 5126 | args->bc_attrs.max_rqst_sz = PAGE_SIZE; |
5116 | args->bc_attrs.max_resp_sz = PAGE_SIZE; | 5127 | args->bc_attrs.max_resp_sz = PAGE_SIZE; |
5117 | args->bc_attrs.max_resp_sz_cached = 0; | 5128 | args->bc_attrs.max_resp_sz_cached = 0; |
@@ -5131,8 +5142,6 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args | |||
5131 | struct nfs4_channel_attrs *sent = &args->fc_attrs; | 5142 | struct nfs4_channel_attrs *sent = &args->fc_attrs; |
5132 | struct nfs4_channel_attrs *rcvd = &session->fc_attrs; | 5143 | struct nfs4_channel_attrs *rcvd = &session->fc_attrs; |
5133 | 5144 | ||
5134 | if (rcvd->headerpadsz > sent->headerpadsz) | ||
5135 | return -EINVAL; | ||
5136 | if (rcvd->max_resp_sz > sent->max_resp_sz) | 5145 | if (rcvd->max_resp_sz > sent->max_resp_sz) |
5137 | return -EINVAL; | 5146 | return -EINVAL; |
5138 | /* | 5147 | /* |
@@ -5697,6 +5706,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
5697 | { | 5706 | { |
5698 | struct nfs4_layoutreturn *lrp = calldata; | 5707 | struct nfs4_layoutreturn *lrp = calldata; |
5699 | struct nfs_server *server; | 5708 | struct nfs_server *server; |
5709 | struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; | ||
5700 | 5710 | ||
5701 | dprintk("--> %s\n", __func__); | 5711 | dprintk("--> %s\n", __func__); |
5702 | 5712 | ||
@@ -5708,16 +5718,15 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
5708 | nfs_restart_rpc(task, lrp->clp); | 5718 | nfs_restart_rpc(task, lrp->clp); |
5709 | return; | 5719 | return; |
5710 | } | 5720 | } |
5721 | spin_lock(&lo->plh_inode->i_lock); | ||
5711 | if (task->tk_status == 0) { | 5722 | if (task->tk_status == 0) { |
5712 | struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; | ||
5713 | |||
5714 | if (lrp->res.lrs_present) { | 5723 | if (lrp->res.lrs_present) { |
5715 | spin_lock(&lo->plh_inode->i_lock); | ||
5716 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | 5724 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); |
5717 | spin_unlock(&lo->plh_inode->i_lock); | ||
5718 | } else | 5725 | } else |
5719 | BUG_ON(!list_empty(&lo->plh_segs)); | 5726 | BUG_ON(!list_empty(&lo->plh_segs)); |
5720 | } | 5727 | } |
5728 | lo->plh_block_lgets--; | ||
5729 | spin_unlock(&lo->plh_inode->i_lock); | ||
5721 | dprintk("<-- %s\n", __func__); | 5730 | dprintk("<-- %s\n", __func__); |
5722 | } | 5731 | } |
5723 | 5732 | ||
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d869a5e5464b..6870bc61ceec 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -255,7 +255,7 @@ static int nfs4_stat_to_errno(int); | |||
255 | #define decode_fs_locations_maxsz \ | 255 | #define decode_fs_locations_maxsz \ |
256 | (0) | 256 | (0) |
257 | #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) | 257 | #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) |
258 | #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN))) | 258 | #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4)) |
259 | 259 | ||
260 | #if defined(CONFIG_NFS_V4_1) | 260 | #if defined(CONFIG_NFS_V4_1) |
261 | #define NFS4_MAX_MACHINE_NAME_LEN (64) | 261 | #define NFS4_MAX_MACHINE_NAME_LEN (64) |
@@ -1725,7 +1725,7 @@ static void encode_create_session(struct xdr_stream *xdr, | |||
1725 | *p++ = cpu_to_be32(args->flags); /*flags */ | 1725 | *p++ = cpu_to_be32(args->flags); /*flags */ |
1726 | 1726 | ||
1727 | /* Fore Channel */ | 1727 | /* Fore Channel */ |
1728 | *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ | 1728 | *p++ = cpu_to_be32(0); /* header padding size */ |
1729 | *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ | 1729 | *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */ |
1730 | *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ | 1730 | *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */ |
1731 | *p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */ | 1731 | *p++ = cpu_to_be32(max_resp_sz_cached); /* Max resp sz cached */ |
@@ -1734,7 +1734,7 @@ static void encode_create_session(struct xdr_stream *xdr, | |||
1734 | *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ | 1734 | *p++ = cpu_to_be32(0); /* rdmachannel_attrs */ |
1735 | 1735 | ||
1736 | /* Back Channel */ | 1736 | /* Back Channel */ |
1737 | *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */ | 1737 | *p++ = cpu_to_be32(0); /* header padding size */ |
1738 | *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */ | 1738 | *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */ |
1739 | *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */ | 1739 | *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */ |
1740 | *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ | 1740 | *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ |
@@ -3098,7 +3098,7 @@ out_overflow: | |||
3098 | return -EIO; | 3098 | return -EIO; |
3099 | } | 3099 | } |
3100 | 3100 | ||
3101 | static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap) | 3101 | static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *res) |
3102 | { | 3102 | { |
3103 | __be32 *p; | 3103 | __be32 *p; |
3104 | 3104 | ||
@@ -3109,7 +3109,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap) | |||
3109 | if (unlikely(!p)) | 3109 | if (unlikely(!p)) |
3110 | goto out_overflow; | 3110 | goto out_overflow; |
3111 | bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; | 3111 | bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; |
3112 | return -be32_to_cpup(p); | 3112 | *res = -be32_to_cpup(p); |
3113 | } | 3113 | } |
3114 | return 0; | 3114 | return 0; |
3115 | out_overflow: | 3115 | out_overflow: |
@@ -4070,6 +4070,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | |||
4070 | int status; | 4070 | int status; |
4071 | umode_t fmode = 0; | 4071 | umode_t fmode = 0; |
4072 | uint32_t type; | 4072 | uint32_t type; |
4073 | int32_t err; | ||
4073 | 4074 | ||
4074 | status = decode_attr_type(xdr, bitmap, &type); | 4075 | status = decode_attr_type(xdr, bitmap, &type); |
4075 | if (status < 0) | 4076 | if (status < 0) |
@@ -4095,13 +4096,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | |||
4095 | goto xdr_error; | 4096 | goto xdr_error; |
4096 | fattr->valid |= status; | 4097 | fattr->valid |= status; |
4097 | 4098 | ||
4098 | status = decode_attr_error(xdr, bitmap); | 4099 | err = 0; |
4099 | if (status == -NFS4ERR_WRONGSEC) { | 4100 | status = decode_attr_error(xdr, bitmap, &err); |
4100 | nfs_fixup_secinfo_attributes(fattr, fh); | ||
4101 | status = 0; | ||
4102 | } | ||
4103 | if (status < 0) | 4101 | if (status < 0) |
4104 | goto xdr_error; | 4102 | goto xdr_error; |
4103 | if (err == -NFS4ERR_WRONGSEC) | ||
4104 | nfs_fixup_secinfo_attributes(fattr, fh); | ||
4105 | 4105 | ||
4106 | status = decode_attr_filehandle(xdr, bitmap, fh); | 4106 | status = decode_attr_filehandle(xdr, bitmap, fh); |
4107 | if (status < 0) | 4107 | if (status < 0) |
@@ -4997,12 +4997,14 @@ static int decode_chan_attrs(struct xdr_stream *xdr, | |||
4997 | struct nfs4_channel_attrs *attrs) | 4997 | struct nfs4_channel_attrs *attrs) |
4998 | { | 4998 | { |
4999 | __be32 *p; | 4999 | __be32 *p; |
5000 | u32 nr_attrs; | 5000 | u32 nr_attrs, val; |
5001 | 5001 | ||
5002 | p = xdr_inline_decode(xdr, 28); | 5002 | p = xdr_inline_decode(xdr, 28); |
5003 | if (unlikely(!p)) | 5003 | if (unlikely(!p)) |
5004 | goto out_overflow; | 5004 | goto out_overflow; |
5005 | attrs->headerpadsz = be32_to_cpup(p++); | 5005 | val = be32_to_cpup(p++); /* headerpadsz */ |
5006 | if (val) | ||
5007 | return -EINVAL; /* no support for header padding yet */ | ||
5006 | attrs->max_rqst_sz = be32_to_cpup(p++); | 5008 | attrs->max_rqst_sz = be32_to_cpup(p++); |
5007 | attrs->max_resp_sz = be32_to_cpup(p++); | 5009 | attrs->max_resp_sz = be32_to_cpup(p++); |
5008 | attrs->max_resp_sz_cached = be32_to_cpup(p++); | 5010 | attrs->max_resp_sz_cached = be32_to_cpup(p++); |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 9cf208df1f25..8ff2ea3f10ef 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -108,7 +108,6 @@ _dev_list_add(const struct nfs_server *nfss, | |||
108 | de = n; | 108 | de = n; |
109 | } | 109 | } |
110 | 110 | ||
111 | atomic_inc(&de->id_node.ref); | ||
112 | return de; | 111 | return de; |
113 | } | 112 | } |
114 | 113 | ||
@@ -1001,6 +1000,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | |||
1001 | if (!pnfs_generic_pg_test(pgio, prev, req)) | 1000 | if (!pnfs_generic_pg_test(pgio, prev, req)) |
1002 | return false; | 1001 | return false; |
1003 | 1002 | ||
1003 | if (pgio->pg_lseg == NULL) | ||
1004 | return true; | ||
1005 | |||
1004 | return pgio->pg_count + req->wb_bytes <= | 1006 | return pgio->pg_count + req->wb_bytes <= |
1005 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; | 1007 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; |
1006 | } | 1008 | } |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index dc3956c0de80..1d06f8e2adea 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -291,7 +291,7 @@ objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) | |||
291 | struct nfs_read_data *rdata; | 291 | struct nfs_read_data *rdata; |
292 | 292 | ||
293 | state->status = status; | 293 | state->status = status; |
294 | dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof); | 294 | dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof); |
295 | rdata = state->rpcdata; | 295 | rdata = state->rpcdata; |
296 | rdata->task.tk_status = status; | 296 | rdata->task.tk_status = status; |
297 | if (status >= 0) { | 297 | if (status >= 0) { |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7913961aff22..009855716286 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req) | |||
204 | TASK_UNINTERRUPTIBLE); | 204 | TASK_UNINTERRUPTIBLE); |
205 | } | 205 | } |
206 | 206 | ||
207 | static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) | 207 | bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) |
208 | { | 208 | { |
209 | /* | 209 | /* |
210 | * FIXME: ideally we should be able to coalesce all requests | 210 | * FIXME: ideally we should be able to coalesce all requests |
@@ -218,6 +218,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p | |||
218 | 218 | ||
219 | return desc->pg_count + req->wb_bytes <= desc->pg_bsize; | 219 | return desc->pg_count + req->wb_bytes <= desc->pg_bsize; |
220 | } | 220 | } |
221 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | ||
221 | 222 | ||
222 | /** | 223 | /** |
223 | * nfs_pageio_init - initialise a page io descriptor | 224 | * nfs_pageio_init - initialise a page io descriptor |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8c1309d852a6..29c0ca7fc347 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -634,14 +634,16 @@ _pnfs_return_layout(struct inode *ino) | |||
634 | 634 | ||
635 | spin_lock(&ino->i_lock); | 635 | spin_lock(&ino->i_lock); |
636 | lo = nfsi->layout; | 636 | lo = nfsi->layout; |
637 | if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { | 637 | if (!lo) { |
638 | spin_unlock(&ino->i_lock); | 638 | spin_unlock(&ino->i_lock); |
639 | dprintk("%s: no layout segments to return\n", __func__); | 639 | dprintk("%s: no layout to return\n", __func__); |
640 | goto out; | 640 | return status; |
641 | } | 641 | } |
642 | stateid = nfsi->layout->plh_stateid; | 642 | stateid = nfsi->layout->plh_stateid; |
643 | /* Reference matched in nfs4_layoutreturn_release */ | 643 | /* Reference matched in nfs4_layoutreturn_release */ |
644 | get_layout_hdr(lo); | 644 | get_layout_hdr(lo); |
645 | mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | ||
646 | lo->plh_block_lgets++; | ||
645 | spin_unlock(&ino->i_lock); | 647 | spin_unlock(&ino->i_lock); |
646 | pnfs_free_lseg_list(&tmp_list); | 648 | pnfs_free_lseg_list(&tmp_list); |
647 | 649 | ||
@@ -650,6 +652,9 @@ _pnfs_return_layout(struct inode *ino) | |||
650 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | 652 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); |
651 | if (unlikely(lrp == NULL)) { | 653 | if (unlikely(lrp == NULL)) { |
652 | status = -ENOMEM; | 654 | status = -ENOMEM; |
655 | set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); | ||
656 | set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); | ||
657 | put_layout_hdr(lo); | ||
653 | goto out; | 658 | goto out; |
654 | } | 659 | } |
655 | 660 | ||
@@ -887,7 +892,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, | |||
887 | ret = get_lseg(lseg); | 892 | ret = get_lseg(lseg); |
888 | break; | 893 | break; |
889 | } | 894 | } |
890 | if (cmp_layout(range, &lseg->pls_range) > 0) | 895 | if (lseg->pls_range.offset > range->offset) |
891 | break; | 896 | break; |
892 | } | 897 | } |
893 | 898 | ||
@@ -1059,23 +1064,36 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
1059 | gfp_flags = GFP_NOFS; | 1064 | gfp_flags = GFP_NOFS; |
1060 | } | 1065 | } |
1061 | 1066 | ||
1062 | if (pgio->pg_count == prev->wb_bytes) { | 1067 | if (pgio->pg_lseg == NULL) { |
1068 | if (pgio->pg_count != prev->wb_bytes) | ||
1069 | return true; | ||
1063 | /* This is first coelesce call for a series of nfs_pages */ | 1070 | /* This is first coelesce call for a series of nfs_pages */ |
1064 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1071 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
1065 | prev->wb_context, | 1072 | prev->wb_context, |
1066 | req_offset(req), | 1073 | req_offset(prev), |
1067 | pgio->pg_count, | 1074 | pgio->pg_count, |
1068 | access_type, | 1075 | access_type, |
1069 | gfp_flags); | 1076 | gfp_flags); |
1070 | return true; | 1077 | if (pgio->pg_lseg == NULL) |
1078 | return true; | ||
1071 | } | 1079 | } |
1072 | 1080 | ||
1073 | if (pgio->pg_lseg && | 1081 | /* |
1074 | req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, | 1082 | * Test if a nfs_page is fully contained in the pnfs_layout_range. |
1075 | pgio->pg_lseg->pls_range.length)) | 1083 | * Note that this test makes several assumptions: |
1076 | return false; | 1084 | * - that the previous nfs_page in the struct nfs_pageio_descriptor |
1077 | 1085 | * is known to lie within the range. | |
1078 | return true; | 1086 | * - that the nfs_page being tested is known to be contiguous with the |
1087 | * previous nfs_page. | ||
1088 | * - Layout ranges are page aligned, so we only have to test the | ||
1089 | * start offset of the request. | ||
1090 | * | ||
1091 | * Please also note that 'end_offset' is actually the offset of the | ||
1092 | * first byte that lies outside the pnfs_layout_range. FIXME? | ||
1093 | * | ||
1094 | */ | ||
1095 | return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, | ||
1096 | pgio->pg_lseg->pls_range.length); | ||
1079 | } | 1097 | } |
1080 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | 1098 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); |
1081 | 1099 | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 48d0a8e4d062..96bf4e6f45be 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -186,6 +186,7 @@ int pnfs_ld_read_done(struct nfs_read_data *); | |||
186 | /* pnfs_dev.c */ | 186 | /* pnfs_dev.c */ |
187 | struct nfs4_deviceid_node { | 187 | struct nfs4_deviceid_node { |
188 | struct hlist_node node; | 188 | struct hlist_node node; |
189 | struct hlist_node tmpnode; | ||
189 | const struct pnfs_layoutdriver_type *ld; | 190 | const struct pnfs_layoutdriver_type *ld; |
190 | const struct nfs_client *nfs_client; | 191 | const struct nfs_client *nfs_client; |
191 | struct nfs4_deviceid deviceid; | 192 | struct nfs4_deviceid deviceid; |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index c65e133ce9c0..f0f8e1e22f6c 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -174,6 +174,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | |||
174 | const struct nfs4_deviceid *id) | 174 | const struct nfs4_deviceid *id) |
175 | { | 175 | { |
176 | INIT_HLIST_NODE(&d->node); | 176 | INIT_HLIST_NODE(&d->node); |
177 | INIT_HLIST_NODE(&d->tmpnode); | ||
177 | d->ld = ld; | 178 | d->ld = ld; |
178 | d->nfs_client = nfs_client; | 179 | d->nfs_client = nfs_client; |
179 | d->deviceid = *id; | 180 | d->deviceid = *id; |
@@ -208,6 +209,7 @@ nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) | |||
208 | 209 | ||
209 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); | 210 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); |
210 | spin_unlock(&nfs4_deviceid_lock); | 211 | spin_unlock(&nfs4_deviceid_lock); |
212 | atomic_inc(&new->ref); | ||
211 | 213 | ||
212 | return new; | 214 | return new; |
213 | } | 215 | } |
@@ -238,24 +240,29 @@ static void | |||
238 | _deviceid_purge_client(const struct nfs_client *clp, long hash) | 240 | _deviceid_purge_client(const struct nfs_client *clp, long hash) |
239 | { | 241 | { |
240 | struct nfs4_deviceid_node *d; | 242 | struct nfs4_deviceid_node *d; |
241 | struct hlist_node *n, *next; | 243 | struct hlist_node *n; |
242 | HLIST_HEAD(tmp); | 244 | HLIST_HEAD(tmp); |
243 | 245 | ||
246 | spin_lock(&nfs4_deviceid_lock); | ||
244 | rcu_read_lock(); | 247 | rcu_read_lock(); |
245 | hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) | 248 | hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) |
246 | if (d->nfs_client == clp && atomic_read(&d->ref)) { | 249 | if (d->nfs_client == clp && atomic_read(&d->ref)) { |
247 | hlist_del_init_rcu(&d->node); | 250 | hlist_del_init_rcu(&d->node); |
248 | hlist_add_head(&d->node, &tmp); | 251 | hlist_add_head(&d->tmpnode, &tmp); |
249 | } | 252 | } |
250 | rcu_read_unlock(); | 253 | rcu_read_unlock(); |
254 | spin_unlock(&nfs4_deviceid_lock); | ||
251 | 255 | ||
252 | if (hlist_empty(&tmp)) | 256 | if (hlist_empty(&tmp)) |
253 | return; | 257 | return; |
254 | 258 | ||
255 | synchronize_rcu(); | 259 | synchronize_rcu(); |
256 | hlist_for_each_entry_safe(d, n, next, &tmp, node) | 260 | while (!hlist_empty(&tmp)) { |
261 | d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode); | ||
262 | hlist_del(&d->tmpnode); | ||
257 | if (atomic_dec_and_test(&d->ref)) | 263 | if (atomic_dec_and_test(&d->ref)) |
258 | d->ld->free_deviceid_node(d); | 264 | d->ld->free_deviceid_node(d); |
265 | } | ||
259 | } | 266 | } |
260 | 267 | ||
261 | void | 268 | void |
@@ -263,8 +270,8 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp) | |||
263 | { | 270 | { |
264 | long h; | 271 | long h; |
265 | 272 | ||
266 | spin_lock(&nfs4_deviceid_lock); | 273 | if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) |
274 | return; | ||
267 | for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) | 275 | for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) |
268 | _deviceid_purge_client(clp, h); | 276 | _deviceid_purge_client(clp, h); |
269 | spin_unlock(&nfs4_deviceid_lock); | ||
270 | } | 277 | } |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 18b3e8975fe0..fbb2a5ef5817 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -82,6 +82,7 @@ config NFSD_V4 | |||
82 | select NFSD_V3 | 82 | select NFSD_V3 |
83 | select FS_POSIX_ACL | 83 | select FS_POSIX_ACL |
84 | select SUNRPC_GSS | 84 | select SUNRPC_GSS |
85 | select CRYPTO | ||
85 | help | 86 | help |
86 | This option enables support in your system's NFS server for | 87 | This option enables support in your system's NFS server for |
87 | version 4 of the NFS protocol (RFC 3530). | 88 | version 4 of the NFS protocol (RFC 3530). |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1f5eae40f34e..2b1449dd2f49 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/lockd/lockd.h> | 13 | #include <linux/lockd/lockd.h> |
14 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/gss_api.h> | 15 | #include <linux/sunrpc/gss_api.h> |
16 | #include <linux/sunrpc/gss_krb5_enctypes.h> | ||
16 | 17 | ||
17 | #include "idmap.h" | 18 | #include "idmap.h" |
18 | #include "nfsd.h" | 19 | #include "nfsd.h" |
@@ -189,18 +190,10 @@ static struct file_operations export_features_operations = { | |||
189 | .release = single_release, | 190 | .release = single_release, |
190 | }; | 191 | }; |
191 | 192 | ||
192 | #ifdef CONFIG_SUNRPC_GSS | 193 | #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) |
193 | static int supported_enctypes_show(struct seq_file *m, void *v) | 194 | static int supported_enctypes_show(struct seq_file *m, void *v) |
194 | { | 195 | { |
195 | struct gss_api_mech *k5mech; | 196 | seq_printf(m, KRB5_SUPPORTED_ENCTYPES); |
196 | |||
197 | k5mech = gss_mech_get_by_name("krb5"); | ||
198 | if (k5mech == NULL) | ||
199 | goto out; | ||
200 | if (k5mech->gm_upcall_enctypes != NULL) | ||
201 | seq_printf(m, k5mech->gm_upcall_enctypes); | ||
202 | gss_mech_put(k5mech); | ||
203 | out: | ||
204 | return 0; | 197 | return 0; |
205 | } | 198 | } |
206 | 199 | ||
@@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = { | |||
215 | .llseek = seq_lseek, | 208 | .llseek = seq_lseek, |
216 | .release = single_release, | 209 | .release = single_release, |
217 | }; | 210 | }; |
218 | #endif /* CONFIG_SUNRPC_GSS */ | 211 | #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ |
219 | 212 | ||
220 | extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); | 213 | extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); |
221 | extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); | 214 | extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); |
@@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) | |||
1427 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, | 1420 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, |
1428 | [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, | 1421 | [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, |
1429 | [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, | 1422 | [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, |
1430 | #ifdef CONFIG_SUNRPC_GSS | 1423 | #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) |
1431 | [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, | 1424 | [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, |
1432 | #endif /* CONFIG_SUNRPC_GSS */ | 1425 | #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ |
1433 | #ifdef CONFIG_NFSD_V4 | 1426 | #ifdef CONFIG_NFSD_V4 |
1434 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, | 1427 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, |
1435 | [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, | 1428 | [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d5718273bb32..fd0acca5370a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -696,7 +696,15 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor | |||
696 | } | 696 | } |
697 | #endif /* CONFIG_NFSD_V3 */ | 697 | #endif /* CONFIG_NFSD_V3 */ |
698 | 698 | ||
699 | static int nfsd_open_break_lease(struct inode *inode, int access) | ||
700 | { | ||
701 | unsigned int mode; | ||
699 | 702 | ||
703 | if (access & NFSD_MAY_NOT_BREAK_LEASE) | ||
704 | return 0; | ||
705 | mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY; | ||
706 | return break_lease(inode, mode | O_NONBLOCK); | ||
707 | } | ||
700 | 708 | ||
701 | /* | 709 | /* |
702 | * Open an existing file or directory. | 710 | * Open an existing file or directory. |
@@ -744,12 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
744 | if (!inode->i_fop) | 752 | if (!inode->i_fop) |
745 | goto out; | 753 | goto out; |
746 | 754 | ||
747 | /* | 755 | host_err = nfsd_open_break_lease(inode, access); |
748 | * Check to see if there are any leases on this file. | ||
749 | * This may block while leases are broken. | ||
750 | */ | ||
751 | if (!(access & NFSD_MAY_NOT_BREAK_LEASE)) | ||
752 | host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); | ||
753 | if (host_err) /* NOMEM or WOULDBLOCK */ | 756 | if (host_err) /* NOMEM or WOULDBLOCK */ |
754 | goto out_nfserr; | 757 | goto out_nfserr; |
755 | 758 | ||
@@ -1660,8 +1663,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1660 | if (!dold->d_inode) | 1663 | if (!dold->d_inode) |
1661 | goto out_drop_write; | 1664 | goto out_drop_write; |
1662 | host_err = nfsd_break_lease(dold->d_inode); | 1665 | host_err = nfsd_break_lease(dold->d_inode); |
1663 | if (host_err) | 1666 | if (host_err) { |
1667 | err = nfserrno(host_err); | ||
1664 | goto out_drop_write; | 1668 | goto out_drop_write; |
1669 | } | ||
1665 | host_err = vfs_link(dold, dirp, dnew); | 1670 | host_err = vfs_link(dold, dirp, dnew); |
1666 | if (!host_err) { | 1671 | if (!host_err) { |
1667 | err = nfserrno(commit_metadata(ffhp)); | 1672 | err = nfserrno(commit_metadata(ffhp)); |
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 7eafe468a29c..b2e3ff347620 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c | |||
@@ -1346,6 +1346,11 @@ static void nilfs_btree_shrink(struct nilfs_bmap *btree, | |||
1346 | path[level].bp_bh = NULL; | 1346 | path[level].bp_bh = NULL; |
1347 | } | 1347 | } |
1348 | 1348 | ||
1349 | static void nilfs_btree_nop(struct nilfs_bmap *btree, | ||
1350 | struct nilfs_btree_path *path, | ||
1351 | int level, __u64 *keyp, __u64 *ptrp) | ||
1352 | { | ||
1353 | } | ||
1349 | 1354 | ||
1350 | static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, | 1355 | static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, |
1351 | struct nilfs_btree_path *path, | 1356 | struct nilfs_btree_path *path, |
@@ -1356,20 +1361,19 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, | |||
1356 | struct buffer_head *bh; | 1361 | struct buffer_head *bh; |
1357 | struct nilfs_btree_node *node, *parent, *sib; | 1362 | struct nilfs_btree_node *node, *parent, *sib; |
1358 | __u64 sibptr; | 1363 | __u64 sibptr; |
1359 | int pindex, level, ncmin, ncmax, ncblk, ret; | 1364 | int pindex, dindex, level, ncmin, ncmax, ncblk, ret; |
1360 | 1365 | ||
1361 | ret = 0; | 1366 | ret = 0; |
1362 | stats->bs_nblocks = 0; | 1367 | stats->bs_nblocks = 0; |
1363 | ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); | 1368 | ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); |
1364 | ncblk = nilfs_btree_nchildren_per_block(btree); | 1369 | ncblk = nilfs_btree_nchildren_per_block(btree); |
1365 | 1370 | ||
1366 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | 1371 | for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; |
1367 | level < nilfs_btree_height(btree) - 1; | 1372 | level < nilfs_btree_height(btree) - 1; |
1368 | level++) { | 1373 | level++) { |
1369 | node = nilfs_btree_get_nonroot_node(path, level); | 1374 | node = nilfs_btree_get_nonroot_node(path, level); |
1370 | path[level].bp_oldreq.bpr_ptr = | 1375 | path[level].bp_oldreq.bpr_ptr = |
1371 | nilfs_btree_node_get_ptr(node, path[level].bp_index, | 1376 | nilfs_btree_node_get_ptr(node, dindex, ncblk); |
1372 | ncblk); | ||
1373 | ret = nilfs_bmap_prepare_end_ptr(btree, | 1377 | ret = nilfs_bmap_prepare_end_ptr(btree, |
1374 | &path[level].bp_oldreq, dat); | 1378 | &path[level].bp_oldreq, dat); |
1375 | if (ret < 0) | 1379 | if (ret < 0) |
@@ -1383,6 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, | |||
1383 | 1387 | ||
1384 | parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); | 1388 | parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); |
1385 | pindex = path[level + 1].bp_index; | 1389 | pindex = path[level + 1].bp_index; |
1390 | dindex = pindex; | ||
1386 | 1391 | ||
1387 | if (pindex > 0) { | 1392 | if (pindex > 0) { |
1388 | /* left sibling */ | 1393 | /* left sibling */ |
@@ -1421,6 +1426,14 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, | |||
1421 | path[level].bp_sib_bh = bh; | 1426 | path[level].bp_sib_bh = bh; |
1422 | path[level].bp_op = nilfs_btree_concat_right; | 1427 | path[level].bp_op = nilfs_btree_concat_right; |
1423 | stats->bs_nblocks++; | 1428 | stats->bs_nblocks++; |
1429 | /* | ||
1430 | * When merging right sibling node | ||
1431 | * into the current node, pointer to | ||
1432 | * the right sibling node must be | ||
1433 | * terminated instead. The adjustment | ||
1434 | * below is required for that. | ||
1435 | */ | ||
1436 | dindex = pindex + 1; | ||
1424 | /* continue; */ | 1437 | /* continue; */ |
1425 | } | 1438 | } |
1426 | } else { | 1439 | } else { |
@@ -1431,29 +1444,31 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, | |||
1431 | NILFS_BTREE_ROOT_NCHILDREN_MAX) { | 1444 | NILFS_BTREE_ROOT_NCHILDREN_MAX) { |
1432 | path[level].bp_op = nilfs_btree_shrink; | 1445 | path[level].bp_op = nilfs_btree_shrink; |
1433 | stats->bs_nblocks += 2; | 1446 | stats->bs_nblocks += 2; |
1447 | level++; | ||
1448 | path[level].bp_op = nilfs_btree_nop; | ||
1449 | goto shrink_root_child; | ||
1434 | } else { | 1450 | } else { |
1435 | path[level].bp_op = nilfs_btree_do_delete; | 1451 | path[level].bp_op = nilfs_btree_do_delete; |
1436 | stats->bs_nblocks++; | 1452 | stats->bs_nblocks++; |
1453 | goto out; | ||
1437 | } | 1454 | } |
1438 | |||
1439 | goto out; | ||
1440 | |||
1441 | } | 1455 | } |
1442 | } | 1456 | } |
1443 | 1457 | ||
1458 | /* child of the root node is deleted */ | ||
1459 | path[level].bp_op = nilfs_btree_do_delete; | ||
1460 | stats->bs_nblocks++; | ||
1461 | |||
1462 | shrink_root_child: | ||
1444 | node = nilfs_btree_get_root(btree); | 1463 | node = nilfs_btree_get_root(btree); |
1445 | path[level].bp_oldreq.bpr_ptr = | 1464 | path[level].bp_oldreq.bpr_ptr = |
1446 | nilfs_btree_node_get_ptr(node, path[level].bp_index, | 1465 | nilfs_btree_node_get_ptr(node, dindex, |
1447 | NILFS_BTREE_ROOT_NCHILDREN_MAX); | 1466 | NILFS_BTREE_ROOT_NCHILDREN_MAX); |
1448 | 1467 | ||
1449 | ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); | 1468 | ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); |
1450 | if (ret < 0) | 1469 | if (ret < 0) |
1451 | goto err_out_child_node; | 1470 | goto err_out_child_node; |
1452 | 1471 | ||
1453 | /* child of the root node is deleted */ | ||
1454 | path[level].bp_op = nilfs_btree_do_delete; | ||
1455 | stats->bs_nblocks++; | ||
1456 | |||
1457 | /* success */ | 1472 | /* success */ |
1458 | out: | 1473 | out: |
1459 | *levelp = level; | 1474 | *levelp = level; |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b954878ad6ce..b9b45fc2903e 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -801,12 +801,7 @@ out_err: | |||
801 | 801 | ||
802 | int nilfs_permission(struct inode *inode, int mask, unsigned int flags) | 802 | int nilfs_permission(struct inode *inode, int mask, unsigned int flags) |
803 | { | 803 | { |
804 | struct nilfs_root *root; | 804 | struct nilfs_root *root = NILFS_I(inode)->i_root; |
805 | |||
806 | if (flags & IPERM_FLAG_RCU) | ||
807 | return -ECHILD; | ||
808 | |||
809 | root = NILFS_I(inode)->i_root; | ||
810 | if ((mask & MAY_WRITE) && root && | 805 | if ((mask & MAY_WRITE) && root && |
811 | root->cno != NILFS_CPTREE_CURRENT_CNO) | 806 | root->cno != NILFS_CPTREE_CURRENT_CNO) |
812 | return -EROFS; /* snapshot is not writable */ | 807 | return -EROFS; /* snapshot is not writable */ |
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 141646e88fb5..bb24ab6c282f 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -2573,7 +2573,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, | |||
2573 | sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; | 2573 | sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; |
2574 | 2574 | ||
2575 | if (nilfs->ns_interval) | 2575 | if (nilfs->ns_interval) |
2576 | sci->sc_interval = nilfs->ns_interval; | 2576 | sci->sc_interval = HZ * nilfs->ns_interval; |
2577 | if (nilfs->ns_watermark) | 2577 | if (nilfs->ns_watermark) |
2578 | sci->sc_watermark = nilfs->ns_watermark; | 2578 | sci->sc_watermark = nilfs->ns_watermark; |
2579 | return sci; | 2579 | return sci; |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index cdbaf5e97308..56f61027236b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1072,7 +1072,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1072 | 1072 | ||
1073 | sb->s_magic = OCFS2_SUPER_MAGIC; | 1073 | sb->s_magic = OCFS2_SUPER_MAGIC; |
1074 | 1074 | ||
1075 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 1075 | sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) | |
1076 | ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); | 1076 | ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); |
1077 | 1077 | ||
1078 | /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, | 1078 | /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, |
diff --git a/fs/omfs/file.c b/fs/omfs/file.c index d738a7e493dd..2c6d95257a4d 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c | |||
@@ -4,7 +4,6 @@ | |||
4 | * Released under GPL v2. | 4 | * Released under GPL v2. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/version.h> | ||
8 | #include <linux/module.h> | 7 | #include <linux/module.h> |
9 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
10 | #include <linux/buffer_head.h> | 9 | #include <linux/buffer_head.h> |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 0ead43549431..e3c63d1c5e13 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -255,13 +255,7 @@ static ssize_t part_discard_alignment_show(struct device *dev, | |||
255 | struct device_attribute *attr, char *buf) | 255 | struct device_attribute *attr, char *buf) |
256 | { | 256 | { |
257 | struct hd_struct *p = dev_to_part(dev); | 257 | struct hd_struct *p = dev_to_part(dev); |
258 | struct gendisk *disk = dev_to_disk(dev); | 258 | return sprintf(buf, "%u\n", p->discard_alignment); |
259 | unsigned int alignment = 0; | ||
260 | |||
261 | if (disk->queue) | ||
262 | alignment = queue_limit_discard_alignment(&disk->queue->limits, | ||
263 | p->start_sect); | ||
264 | return sprintf(buf, "%u\n", alignment); | ||
265 | } | 259 | } |
266 | 260 | ||
267 | ssize_t part_stat_show(struct device *dev, | 261 | ssize_t part_stat_show(struct device *dev, |
@@ -455,6 +449,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
455 | p->start_sect = start; | 449 | p->start_sect = start; |
456 | p->alignment_offset = | 450 | p->alignment_offset = |
457 | queue_limit_alignment_offset(&disk->queue->limits, start); | 451 | queue_limit_alignment_offset(&disk->queue->limits, start); |
452 | p->discard_alignment = | ||
453 | queue_limit_discard_alignment(&disk->queue->limits, start); | ||
458 | p->nr_sects = len; | 454 | p->nr_sects = len; |
459 | p->partno = partno; | 455 | p->partno = partno; |
460 | p->policy = get_disk_ro(disk); | 456 | p->policy = get_disk_ro(disk); |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 14def991d9dd..8a84210ca080 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -2169,11 +2169,7 @@ static const struct file_operations proc_fd_operations = { | |||
2169 | */ | 2169 | */ |
2170 | static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) | 2170 | static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) |
2171 | { | 2171 | { |
2172 | int rv; | 2172 | int rv = generic_permission(inode, mask, flags, NULL); |
2173 | |||
2174 | if (flags & IPERM_FLAG_RCU) | ||
2175 | return -ECHILD; | ||
2176 | rv = generic_permission(inode, mask, flags, NULL); | ||
2177 | if (rv == 0) | 2173 | if (rv == 0) |
2178 | return 0; | 2174 | return 0; |
2179 | if (task_pid(current) == proc_pid(inode)) | 2175 | if (task_pid(current) == proc_pid(inode)) |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 781dec5bd682..be177f702acb 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
@@ -38,18 +38,21 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
38 | struct inode *inode; | 38 | struct inode *inode; |
39 | struct proc_inode *ei; | 39 | struct proc_inode *ei; |
40 | struct dentry *error = ERR_PTR(-ENOENT); | 40 | struct dentry *error = ERR_PTR(-ENOENT); |
41 | void *ns; | ||
41 | 42 | ||
42 | inode = proc_pid_make_inode(dir->i_sb, task); | 43 | inode = proc_pid_make_inode(dir->i_sb, task); |
43 | if (!inode) | 44 | if (!inode) |
44 | goto out; | 45 | goto out; |
45 | 46 | ||
47 | ns = ns_ops->get(task); | ||
48 | if (!ns) | ||
49 | goto out_iput; | ||
50 | |||
46 | ei = PROC_I(inode); | 51 | ei = PROC_I(inode); |
47 | inode->i_mode = S_IFREG|S_IRUSR; | 52 | inode->i_mode = S_IFREG|S_IRUSR; |
48 | inode->i_fop = &ns_file_operations; | 53 | inode->i_fop = &ns_file_operations; |
49 | ei->ns_ops = ns_ops; | 54 | ei->ns_ops = ns_ops; |
50 | ei->ns = ns_ops->get(task); | 55 | ei->ns = ns; |
51 | if (!ei->ns) | ||
52 | goto out_iput; | ||
53 | 56 | ||
54 | dentry->d_op = &pid_dentry_operations; | 57 | dentry->d_op = &pid_dentry_operations; |
55 | d_add(dentry, inode); | 58 | d_add(dentry, inode); |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f50133c11c24..d167de365a8d 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -304,9 +304,6 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags) | |||
304 | struct ctl_table *table; | 304 | struct ctl_table *table; |
305 | int error; | 305 | int error; |
306 | 306 | ||
307 | if (flags & IPERM_FLAG_RCU) | ||
308 | return -ECHILD; | ||
309 | |||
310 | /* Executable files are not allowed under /proc/sys/ */ | 307 | /* Executable files are not allowed under /proc/sys/ */ |
311 | if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) | 308 | if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) |
312 | return -EACCES; | 309 | return -EACCES; |
diff --git a/fs/proc/root.c b/fs/proc/root.c index a9000e9cfee5..d6c3b416529b 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -28,11 +28,12 @@ static int proc_test_super(struct super_block *sb, void *data) | |||
28 | 28 | ||
29 | static int proc_set_super(struct super_block *sb, void *data) | 29 | static int proc_set_super(struct super_block *sb, void *data) |
30 | { | 30 | { |
31 | struct pid_namespace *ns; | 31 | int err = set_anon_super(sb, NULL); |
32 | 32 | if (!err) { | |
33 | ns = (struct pid_namespace *)data; | 33 | struct pid_namespace *ns = (struct pid_namespace *)data; |
34 | sb->s_fs_info = get_pid_ns(ns); | 34 | sb->s_fs_info = get_pid_ns(ns); |
35 | return set_anon_super(sb, NULL); | 35 | } |
36 | return err; | ||
36 | } | 37 | } |
37 | 38 | ||
38 | static struct dentry *proc_mount(struct file_system_type *fs_type, | 39 | static struct dentry *proc_mount(struct file_system_type *fs_type, |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e8a62f41b458..d78089690965 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -954,8 +954,6 @@ static int xattr_mount_check(struct super_block *s) | |||
954 | 954 | ||
955 | int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) | 955 | int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) |
956 | { | 956 | { |
957 | if (flags & IPERM_FLAG_RCU) | ||
958 | return -ECHILD; | ||
959 | /* | 957 | /* |
960 | * We don't do permission checks on the internal objects. | 958 | * We don't do permission checks on the internal objects. |
961 | * Permissions are determined by the "owning" object. | 959 | * Permissions are determined by the "owning" object. |
diff --git a/fs/super.c b/fs/super.c index c75593953c52..ab3d672db0de 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -822,7 +822,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, | |||
822 | } else { | 822 | } else { |
823 | char b[BDEVNAME_SIZE]; | 823 | char b[BDEVNAME_SIZE]; |
824 | 824 | ||
825 | s->s_flags = flags; | 825 | s->s_flags = flags | MS_NOSEC; |
826 | s->s_mode = mode; | 826 | s->s_mode = mode; |
827 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); | 827 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); |
828 | sb_set_blocksize(s, block_size(bdev)); | 828 | sb_set_blocksize(s, block_size(bdev)); |
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 266895783b47..e34f0d99ea4e 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
@@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data) | |||
95 | return error; | 95 | return error; |
96 | } | 96 | } |
97 | 97 | ||
98 | static void free_sysfs_super_info(struct sysfs_super_info *info) | ||
99 | { | ||
100 | int type; | ||
101 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) | ||
102 | kobj_ns_drop(type, info->ns[type]); | ||
103 | kfree(info); | ||
104 | } | ||
105 | |||
98 | static struct dentry *sysfs_mount(struct file_system_type *fs_type, | 106 | static struct dentry *sysfs_mount(struct file_system_type *fs_type, |
99 | int flags, const char *dev_name, void *data) | 107 | int flags, const char *dev_name, void *data) |
100 | { | 108 | { |
@@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, | |||
108 | return ERR_PTR(-ENOMEM); | 116 | return ERR_PTR(-ENOMEM); |
109 | 117 | ||
110 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) | 118 | for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) |
111 | info->ns[type] = kobj_ns_current(type); | 119 | info->ns[type] = kobj_ns_grab_current(type); |
112 | 120 | ||
113 | sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); | 121 | sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); |
114 | if (IS_ERR(sb) || sb->s_fs_info != info) | 122 | if (IS_ERR(sb) || sb->s_fs_info != info) |
115 | kfree(info); | 123 | free_sysfs_super_info(info); |
116 | if (IS_ERR(sb)) | 124 | if (IS_ERR(sb)) |
117 | return ERR_CAST(sb); | 125 | return ERR_CAST(sb); |
118 | if (!sb->s_root) { | 126 | if (!sb->s_root) { |
@@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, | |||
131 | static void sysfs_kill_sb(struct super_block *sb) | 139 | static void sysfs_kill_sb(struct super_block *sb) |
132 | { | 140 | { |
133 | struct sysfs_super_info *info = sysfs_info(sb); | 141 | struct sysfs_super_info *info = sysfs_info(sb); |
134 | |||
135 | /* Remove the superblock from fs_supers/s_instances | 142 | /* Remove the superblock from fs_supers/s_instances |
136 | * so we can't find it, before freeing sysfs_super_info. | 143 | * so we can't find it, before freeing sysfs_super_info. |
137 | */ | 144 | */ |
138 | kill_anon_super(sb); | 145 | kill_anon_super(sb); |
139 | kfree(info); | 146 | free_sysfs_super_info(info); |
140 | } | 147 | } |
141 | 148 | ||
142 | static struct file_system_type sysfs_fs_type = { | 149 | static struct file_system_type sysfs_fs_type = { |
@@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = { | |||
145 | .kill_sb = sysfs_kill_sb, | 152 | .kill_sb = sysfs_kill_sb, |
146 | }; | 153 | }; |
147 | 154 | ||
148 | void sysfs_exit_ns(enum kobj_ns_type type, const void *ns) | ||
149 | { | ||
150 | struct super_block *sb; | ||
151 | |||
152 | mutex_lock(&sysfs_mutex); | ||
153 | spin_lock(&sb_lock); | ||
154 | list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) { | ||
155 | struct sysfs_super_info *info = sysfs_info(sb); | ||
156 | /* | ||
157 | * If we see a superblock on the fs_supers/s_instances | ||
158 | * list the unmount has not completed and sb->s_fs_info | ||
159 | * points to a valid struct sysfs_super_info. | ||
160 | */ | ||
161 | /* Ignore superblocks with the wrong ns */ | ||
162 | if (info->ns[type] != ns) | ||
163 | continue; | ||
164 | info->ns[type] = NULL; | ||
165 | } | ||
166 | spin_unlock(&sb_lock); | ||
167 | mutex_unlock(&sysfs_mutex); | ||
168 | } | ||
169 | |||
170 | int __init sysfs_init(void) | 155 | int __init sysfs_init(void) |
171 | { | 156 | { |
172 | int err = -ENOMEM; | 157 | int err = -ENOMEM; |
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3d28af31d863..2ed2404f3113 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -136,7 +136,7 @@ struct sysfs_addrm_cxt { | |||
136 | * instance). | 136 | * instance). |
137 | */ | 137 | */ |
138 | struct sysfs_super_info { | 138 | struct sysfs_super_info { |
139 | const void *ns[KOBJ_NS_TYPES]; | 139 | void *ns[KOBJ_NS_TYPES]; |
140 | }; | 140 | }; |
141 | #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) | 141 | #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info)) |
142 | extern struct sysfs_dirent sysfs_root; | 142 | extern struct sysfs_dirent sysfs_root; |
diff --git a/fs/timerfd.c b/fs/timerfd.c index f67acbdda5e8..dffeb3795af1 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -61,7 +61,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | |||
61 | 61 | ||
62 | /* | 62 | /* |
63 | * Called when the clock was set to cancel the timers in the cancel | 63 | * Called when the clock was set to cancel the timers in the cancel |
64 | * list. | 64 | * list. This will wake up processes waiting on these timers. The |
65 | * wake-up requires ctx->ticks to be non zero, therefore we increment | ||
66 | * it before calling wake_up_locked(). | ||
65 | */ | 67 | */ |
66 | void timerfd_clock_was_set(void) | 68 | void timerfd_clock_was_set(void) |
67 | { | 69 | { |
@@ -76,6 +78,7 @@ void timerfd_clock_was_set(void) | |||
76 | spin_lock_irqsave(&ctx->wqh.lock, flags); | 78 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
77 | if (ctx->moffs.tv64 != moffs.tv64) { | 79 | if (ctx->moffs.tv64 != moffs.tv64) { |
78 | ctx->moffs.tv64 = KTIME_MAX; | 80 | ctx->moffs.tv64 = KTIME_MAX; |
81 | ctx->ticks++; | ||
79 | wake_up_locked(&ctx->wqh); | 82 | wake_up_locked(&ctx->wqh); |
80 | } | 83 | } |
81 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); | 84 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 166951e0dcd3..3be645e012c9 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
@@ -581,6 +581,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
581 | ubifs_assert(wbuf->size % c->min_io_size == 0); | 581 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
582 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); | 582 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); |
583 | ubifs_assert(!c->ro_media && !c->ro_mount); | 583 | ubifs_assert(!c->ro_media && !c->ro_mount); |
584 | ubifs_assert(!c->space_fixup); | ||
584 | if (c->leb_size - wbuf->offs >= c->max_write_size) | 585 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
585 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); | 586 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); |
586 | 587 | ||
@@ -759,6 +760,7 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, | |||
759 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | 760 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); |
760 | ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); | 761 | ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); |
761 | ubifs_assert(!c->ro_media && !c->ro_mount); | 762 | ubifs_assert(!c->ro_media && !c->ro_mount); |
763 | ubifs_assert(!c->space_fixup); | ||
762 | 764 | ||
763 | if (c->ro_error) | 765 | if (c->ro_error) |
764 | return -EROFS; | 766 | return -EROFS; |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 34b1679e6e3a..cef0460f4c54 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -669,6 +669,7 @@ out_free: | |||
669 | 669 | ||
670 | out_release: | 670 | out_release: |
671 | release_head(c, BASEHD); | 671 | release_head(c, BASEHD); |
672 | kfree(dent); | ||
672 | out_ro: | 673 | out_ro: |
673 | ubifs_ro_mode(c, err); | 674 | ubifs_ro_mode(c, err); |
674 | if (last_reference) | 675 | if (last_reference) |
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index bd644bf587a8..a5422fffbd69 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c | |||
@@ -674,7 +674,7 @@ static int kill_orphans(struct ubifs_info *c) | |||
674 | if (IS_ERR(sleb)) { | 674 | if (IS_ERR(sleb)) { |
675 | if (PTR_ERR(sleb) == -EUCLEAN) | 675 | if (PTR_ERR(sleb) == -EUCLEAN) |
676 | sleb = ubifs_recover_leb(c, lnum, 0, | 676 | sleb = ubifs_recover_leb(c, lnum, 0, |
677 | c->sbuf, 0); | 677 | c->sbuf, -1); |
678 | if (IS_ERR(sleb)) { | 678 | if (IS_ERR(sleb)) { |
679 | err = PTR_ERR(sleb); | 679 | err = PTR_ERR(sleb); |
680 | break; | 680 | break; |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 731d9e2e7b50..783d8e0beb76 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
@@ -564,19 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
564 | } | 564 | } |
565 | 565 | ||
566 | /** | 566 | /** |
567 | * drop_last_node - drop the last node or group of nodes. | 567 | * drop_last_group - drop the last group of nodes. |
568 | * @sleb: scanned LEB information | 568 | * @sleb: scanned LEB information |
569 | * @offs: offset of dropped nodes is returned here | 569 | * @offs: offset of dropped nodes is returned here |
570 | * @grouped: non-zero if whole group of nodes have to be dropped | ||
571 | * | 570 | * |
572 | * This is a helper function for 'ubifs_recover_leb()' which drops the last | 571 | * This is a helper function for 'ubifs_recover_leb()' which drops the last |
573 | * node of the scanned LEB or the last group of nodes if @grouped is not zero. | 572 | * group of nodes of the scanned LEB. |
574 | * This function returns %1 if a node was dropped and %0 otherwise. | ||
575 | */ | 573 | */ |
576 | static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) | 574 | static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) |
577 | { | 575 | { |
578 | int dropped = 0; | ||
579 | |||
580 | while (!list_empty(&sleb->nodes)) { | 576 | while (!list_empty(&sleb->nodes)) { |
581 | struct ubifs_scan_node *snod; | 577 | struct ubifs_scan_node *snod; |
582 | struct ubifs_ch *ch; | 578 | struct ubifs_ch *ch; |
@@ -585,17 +581,40 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) | |||
585 | list); | 581 | list); |
586 | ch = snod->node; | 582 | ch = snod->node; |
587 | if (ch->group_type != UBIFS_IN_NODE_GROUP) | 583 | if (ch->group_type != UBIFS_IN_NODE_GROUP) |
588 | return dropped; | 584 | break; |
589 | dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); | 585 | |
586 | dbg_rcvry("dropping grouped node at %d:%d", | ||
587 | sleb->lnum, snod->offs); | ||
588 | *offs = snod->offs; | ||
589 | list_del(&snod->list); | ||
590 | kfree(snod); | ||
591 | sleb->nodes_cnt -= 1; | ||
592 | } | ||
593 | } | ||
594 | |||
595 | /** | ||
596 | * drop_last_node - drop the last node. | ||
597 | * @sleb: scanned LEB information | ||
598 | * @offs: offset of dropped nodes is returned here | ||
599 | * @grouped: non-zero if whole group of nodes have to be dropped | ||
600 | * | ||
601 | * This is a helper function for 'ubifs_recover_leb()' which drops the last | ||
602 | * node of the scanned LEB. | ||
603 | */ | ||
604 | static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) | ||
605 | { | ||
606 | struct ubifs_scan_node *snod; | ||
607 | |||
608 | if (!list_empty(&sleb->nodes)) { | ||
609 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, | ||
610 | list); | ||
611 | |||
612 | dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); | ||
590 | *offs = snod->offs; | 613 | *offs = snod->offs; |
591 | list_del(&snod->list); | 614 | list_del(&snod->list); |
592 | kfree(snod); | 615 | kfree(snod); |
593 | sleb->nodes_cnt -= 1; | 616 | sleb->nodes_cnt -= 1; |
594 | dropped = 1; | ||
595 | if (!grouped) | ||
596 | break; | ||
597 | } | 617 | } |
598 | return dropped; | ||
599 | } | 618 | } |
600 | 619 | ||
601 | /** | 620 | /** |
@@ -604,7 +623,8 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) | |||
604 | * @lnum: LEB number | 623 | * @lnum: LEB number |
605 | * @offs: offset | 624 | * @offs: offset |
606 | * @sbuf: LEB-sized buffer to use | 625 | * @sbuf: LEB-sized buffer to use |
607 | * @grouped: nodes may be grouped for recovery | 626 | * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not |
627 | * belong to any journal head) | ||
608 | * | 628 | * |
609 | * This function does a scan of a LEB, but caters for errors that might have | 629 | * This function does a scan of a LEB, but caters for errors that might have |
610 | * been caused by the unclean unmount from which we are attempting to recover. | 630 | * been caused by the unclean unmount from which we are attempting to recover. |
@@ -612,13 +632,14 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) | |||
612 | * found, and a negative error code in case of failure. | 632 | * found, and a negative error code in case of failure. |
613 | */ | 633 | */ |
614 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | 634 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, |
615 | int offs, void *sbuf, int grouped) | 635 | int offs, void *sbuf, int jhead) |
616 | { | 636 | { |
617 | int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; | 637 | int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; |
638 | int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; | ||
618 | struct ubifs_scan_leb *sleb; | 639 | struct ubifs_scan_leb *sleb; |
619 | void *buf = sbuf + offs; | 640 | void *buf = sbuf + offs; |
620 | 641 | ||
621 | dbg_rcvry("%d:%d", lnum, offs); | 642 | dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); |
622 | 643 | ||
623 | sleb = ubifs_start_scan(c, lnum, offs, sbuf); | 644 | sleb = ubifs_start_scan(c, lnum, offs, sbuf); |
624 | if (IS_ERR(sleb)) | 645 | if (IS_ERR(sleb)) |
@@ -635,7 +656,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
635 | * Scan quietly until there is an error from which we cannot | 656 | * Scan quietly until there is an error from which we cannot |
636 | * recover | 657 | * recover |
637 | */ | 658 | */ |
638 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); | 659 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); |
639 | if (ret == SCANNED_A_NODE) { | 660 | if (ret == SCANNED_A_NODE) { |
640 | /* A valid node, and not a padding node */ | 661 | /* A valid node, and not a padding node */ |
641 | struct ubifs_ch *ch = buf; | 662 | struct ubifs_ch *ch = buf; |
@@ -695,59 +716,62 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
695 | * If nodes are grouped, always drop the incomplete group at | 716 | * If nodes are grouped, always drop the incomplete group at |
696 | * the end. | 717 | * the end. |
697 | */ | 718 | */ |
698 | drop_last_node(sleb, &offs, 1); | 719 | drop_last_group(sleb, &offs); |
699 | 720 | ||
700 | /* | 721 | if (jhead == GCHD) { |
701 | * While we are in the middle of the same min. I/O unit keep dropping | 722 | /* |
702 | * nodes. So basically, what we want is to make sure that the last min. | 723 | * If this LEB belongs to the GC head then while we are in the |
703 | * I/O unit where we saw the corruption is dropped completely with all | 724 | * middle of the same min. I/O unit keep dropping nodes. So |
704 | * the uncorrupted node which may possibly sit there. | 725 | * basically, what we want is to make sure that the last min. |
705 | * | 726 | * I/O unit where we saw the corruption is dropped completely |
706 | * In other words, let's name the min. I/O unit where the corruption | 727 | * with all the uncorrupted nodes which may possibly sit there. |
707 | * starts B, and the previous min. I/O unit A. The below code tries to | 728 | * |
708 | * deal with a situation when half of B contains valid nodes or the end | 729 | * In other words, let's name the min. I/O unit where the |
709 | * of a valid node, and the second half of B contains corrupted data or | 730 | * corruption starts B, and the previous min. I/O unit A. The |
710 | * garbage. This means that UBIFS had been writing to B just before the | 731 | * below code tries to deal with a situation when half of B |
711 | * power cut happened. I do not know how realistic is this scenario | 732 | * contains valid nodes or the end of a valid node, and the |
712 | * that half of the min. I/O unit had been written successfully and the | 733 | * second half of B contains corrupted data or garbage. This |
713 | * other half not, but this is possible in our 'failure mode emulation' | 734 | * means that UBIFS had been writing to B just before the power |
714 | * infrastructure at least. | 735 | * cut happened. I do not know how realistic is this scenario |
715 | * | 736 | * that half of the min. I/O unit had been written successfully |
716 | * So what is the problem, why we need to drop those nodes? Whey can't | 737 | * and the other half not, but this is possible in our 'failure |
717 | * we just clean-up the second half of B by putting a padding node | 738 | * mode emulation' infrastructure at least. |
718 | * there? We can, and this works fine with one exception which was | 739 | * |
719 | * reproduced with power cut emulation testing and happens extremely | 740 | * So what is the problem, why we need to drop those nodes? Why |
720 | * rarely. The description follows, but it is worth noting that that is | 741 | * can't we just clean-up the second half of B by putting a |
721 | * only about the GC head, so we could do this trick only if the bud | 742 | * padding node there? We can, and this works fine with one |
722 | * belongs to the GC head, but it does not seem to be worth an | 743 | * exception which was reproduced with power cut emulation |
723 | * additional "if" statement. | 744 | * testing and happens extremely rarely. |
724 | * | 745 | * |
725 | * So, imagine the file-system is full, we run GC which is moving valid | 746 | * Imagine the file-system is full, we run GC which starts |
726 | * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head | 747 | * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is |
727 | * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X | 748 | * the current GC head LEB). The @c->gc_lnum is -1, which means |
728 | * and will try to continue. Imagine that LEB X is currently the | 749 | * that GC will retain LEB X and will try to continue. Imagine |
729 | * dirtiest LEB, and the amount of used space in LEB Y is exactly the | 750 | * that LEB X is currently the dirtiest LEB, and the amount of |
730 | * same as amount of free space in LEB X. | 751 | * used space in LEB Y is exactly the same as amount of free |
731 | * | 752 | * space in LEB X. |
732 | * And a power cut happens when nodes are moved from LEB X to LEB Y. We | 753 | * |
733 | * are here trying to recover LEB Y which is the GC head LEB. We find | 754 | * And a power cut happens when nodes are moved from LEB X to |
734 | * the min. I/O unit B as described above. Then we clean-up LEB Y by | 755 | * LEB Y. We are here trying to recover LEB Y which is the GC |
735 | * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function | 756 | * head LEB. We find the min. I/O unit B as described above. |
736 | * fails, because it cannot find a dirty LEB which could be GC'd into | 757 | * Then we clean-up LEB Y by padding min. I/O unit. And later |
737 | * LEB Y! Even LEB X does not match because the amount of valid nodes | 758 | * 'ubifs_rcvry_gc_commit()' function fails, because it cannot |
738 | * there does not fit the free space in LEB Y any more! And this is | 759 | * find a dirty LEB which could be GC'd into LEB Y! Even LEB X |
739 | * because of the padding node which we added to LEB Y. The | 760 | * does not match because the amount of valid nodes there does |
740 | * user-visible effect of this which I once observed and analysed is | 761 | * not fit the free space in LEB Y any more! And this is |
741 | * that we cannot mount the file-system with -ENOSPC error. | 762 | * because of the padding node which we added to LEB Y. The |
742 | * | 763 | * user-visible effect of this which I once observed and |
743 | * So obviously, to make sure that situation does not happen we should | 764 | * analysed is that we cannot mount the file-system with |
744 | * free min. I/O unit B in LEB Y completely and the last used min. I/O | 765 | * -ENOSPC error. |
745 | * unit in LEB Y should be A. This is basically what the below code | 766 | * |
746 | * tries to do. | 767 | * So obviously, to make sure that situation does not happen we |
747 | */ | 768 | * should free min. I/O unit B in LEB Y completely and the last |
748 | while (min_io_unit == round_down(offs, c->min_io_size) && | 769 | * used min. I/O unit in LEB Y should be A. This is basically |
749 | min_io_unit != offs && | 770 | * what the below code tries to do. |
750 | drop_last_node(sleb, &offs, grouped)); | 771 | */ |
772 | while (offs > min_io_unit) | ||
773 | drop_last_node(sleb, &offs); | ||
774 | } | ||
751 | 775 | ||
752 | buf = sbuf + offs; | 776 | buf = sbuf + offs; |
753 | len = c->leb_size - offs; | 777 | len = c->leb_size - offs; |
@@ -881,7 +905,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, | |||
881 | } | 905 | } |
882 | ubifs_scan_destroy(sleb); | 906 | ubifs_scan_destroy(sleb); |
883 | } | 907 | } |
884 | return ubifs_recover_leb(c, lnum, offs, sbuf, 0); | 908 | return ubifs_recover_leb(c, lnum, offs, sbuf, -1); |
885 | } | 909 | } |
886 | 910 | ||
887 | /** | 911 | /** |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 6617280d1679..5e97161ce4d3 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -557,8 +557,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) | |||
557 | * these LEBs could possibly be written to at the power cut | 557 | * these LEBs could possibly be written to at the power cut |
558 | * time. | 558 | * time. |
559 | */ | 559 | */ |
560 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, | 560 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); |
561 | b->bud->jhead != GCHD); | ||
562 | else | 561 | else |
563 | sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); | 562 | sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); |
564 | if (IS_ERR(sleb)) | 563 | if (IS_ERR(sleb)) |
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index ca953a945029..9e1d05666fed 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c | |||
@@ -284,7 +284,11 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) | |||
284 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | 284 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); |
285 | 285 | ||
286 | if (nr == 0) | 286 | if (nr == 0) |
287 | return clean_zn_cnt; | 287 | /* |
288 | * Due to the way UBIFS updates the clean znode counter it may | ||
289 | * temporarily be negative. | ||
290 | */ | ||
291 | return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; | ||
288 | 292 | ||
289 | if (!clean_zn_cnt) { | 293 | if (!clean_zn_cnt) { |
290 | /* | 294 | /* |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1ab0d22e4c94..529be0582029 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -811,15 +811,18 @@ static int alloc_wbufs(struct ubifs_info *c) | |||
811 | 811 | ||
812 | c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; | 812 | c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; |
813 | c->jheads[i].wbuf.jhead = i; | 813 | c->jheads[i].wbuf.jhead = i; |
814 | c->jheads[i].grouped = 1; | ||
814 | } | 815 | } |
815 | 816 | ||
816 | c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; | 817 | c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; |
817 | /* | 818 | /* |
818 | * Garbage Collector head likely contains long-term data and | 819 | * Garbage Collector head likely contains long-term data and |
819 | * does not need to be synchronized by timer. | 820 | * does not need to be synchronized by timer. Also GC head nodes are |
821 | * not grouped. | ||
820 | */ | 822 | */ |
821 | c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; | 823 | c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; |
822 | c->jheads[GCHD].wbuf.no_timer = 1; | 824 | c->jheads[GCHD].wbuf.no_timer = 1; |
825 | c->jheads[GCHD].grouped = 0; | ||
823 | 826 | ||
824 | return 0; | 827 | return 0; |
825 | } | 828 | } |
@@ -1284,12 +1287,25 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1284 | if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { | 1287 | if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { |
1285 | ubifs_msg("recovery needed"); | 1288 | ubifs_msg("recovery needed"); |
1286 | c->need_recovery = 1; | 1289 | c->need_recovery = 1; |
1287 | if (!c->ro_mount) { | 1290 | } |
1288 | err = ubifs_recover_inl_heads(c, c->sbuf); | 1291 | |
1289 | if (err) | 1292 | if (c->need_recovery && !c->ro_mount) { |
1290 | goto out_master; | 1293 | err = ubifs_recover_inl_heads(c, c->sbuf); |
1291 | } | 1294 | if (err) |
1292 | } else if (!c->ro_mount) { | 1295 | goto out_master; |
1296 | } | ||
1297 | |||
1298 | err = ubifs_lpt_init(c, 1, !c->ro_mount); | ||
1299 | if (err) | ||
1300 | goto out_master; | ||
1301 | |||
1302 | if (!c->ro_mount && c->space_fixup) { | ||
1303 | err = ubifs_fixup_free_space(c); | ||
1304 | if (err) | ||
1305 | goto out_master; | ||
1306 | } | ||
1307 | |||
1308 | if (!c->ro_mount) { | ||
1293 | /* | 1309 | /* |
1294 | * Set the "dirty" flag so that if we reboot uncleanly we | 1310 | * Set the "dirty" flag so that if we reboot uncleanly we |
1295 | * will notice this immediately on the next mount. | 1311 | * will notice this immediately on the next mount. |
@@ -1297,13 +1313,9 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1297 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); | 1313 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); |
1298 | err = ubifs_write_master(c); | 1314 | err = ubifs_write_master(c); |
1299 | if (err) | 1315 | if (err) |
1300 | goto out_master; | 1316 | goto out_lpt; |
1301 | } | 1317 | } |
1302 | 1318 | ||
1303 | err = ubifs_lpt_init(c, 1, !c->ro_mount); | ||
1304 | if (err) | ||
1305 | goto out_lpt; | ||
1306 | |||
1307 | err = dbg_check_idx_size(c, c->bi.old_idx_sz); | 1319 | err = dbg_check_idx_size(c, c->bi.old_idx_sz); |
1308 | if (err) | 1320 | if (err) |
1309 | goto out_lpt; | 1321 | goto out_lpt; |
@@ -1396,12 +1408,6 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1396 | } else | 1408 | } else |
1397 | ubifs_assert(c->lst.taken_empty_lebs > 0); | 1409 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
1398 | 1410 | ||
1399 | if (!c->ro_mount && c->space_fixup) { | ||
1400 | err = ubifs_fixup_free_space(c); | ||
1401 | if (err) | ||
1402 | goto out_infos; | ||
1403 | } | ||
1404 | |||
1405 | err = dbg_check_filesystem(c); | 1411 | err = dbg_check_filesystem(c); |
1406 | if (err) | 1412 | if (err) |
1407 | goto out_infos; | 1413 | goto out_infos; |
@@ -1842,7 +1848,6 @@ static void ubifs_put_super(struct super_block *sb) | |||
1842 | bdi_destroy(&c->bdi); | 1848 | bdi_destroy(&c->bdi); |
1843 | ubi_close_volume(c->ubi); | 1849 | ubi_close_volume(c->ubi); |
1844 | mutex_unlock(&c->umount_mutex); | 1850 | mutex_unlock(&c->umount_mutex); |
1845 | kfree(c); | ||
1846 | } | 1851 | } |
1847 | 1852 | ||
1848 | static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | 1853 | static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) |
@@ -1965,61 +1970,65 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) | |||
1965 | return ERR_PTR(-EINVAL); | 1970 | return ERR_PTR(-EINVAL); |
1966 | } | 1971 | } |
1967 | 1972 | ||
1968 | static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | 1973 | static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) |
1969 | { | 1974 | { |
1970 | struct ubi_volume_desc *ubi = sb->s_fs_info; | ||
1971 | struct ubifs_info *c; | 1975 | struct ubifs_info *c; |
1972 | struct inode *root; | ||
1973 | int err; | ||
1974 | 1976 | ||
1975 | c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); | 1977 | c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); |
1976 | if (!c) | 1978 | if (c) { |
1977 | return -ENOMEM; | 1979 | spin_lock_init(&c->cnt_lock); |
1980 | spin_lock_init(&c->cs_lock); | ||
1981 | spin_lock_init(&c->buds_lock); | ||
1982 | spin_lock_init(&c->space_lock); | ||
1983 | spin_lock_init(&c->orphan_lock); | ||
1984 | init_rwsem(&c->commit_sem); | ||
1985 | mutex_init(&c->lp_mutex); | ||
1986 | mutex_init(&c->tnc_mutex); | ||
1987 | mutex_init(&c->log_mutex); | ||
1988 | mutex_init(&c->mst_mutex); | ||
1989 | mutex_init(&c->umount_mutex); | ||
1990 | mutex_init(&c->bu_mutex); | ||
1991 | mutex_init(&c->write_reserve_mutex); | ||
1992 | init_waitqueue_head(&c->cmt_wq); | ||
1993 | c->buds = RB_ROOT; | ||
1994 | c->old_idx = RB_ROOT; | ||
1995 | c->size_tree = RB_ROOT; | ||
1996 | c->orph_tree = RB_ROOT; | ||
1997 | INIT_LIST_HEAD(&c->infos_list); | ||
1998 | INIT_LIST_HEAD(&c->idx_gc); | ||
1999 | INIT_LIST_HEAD(&c->replay_list); | ||
2000 | INIT_LIST_HEAD(&c->replay_buds); | ||
2001 | INIT_LIST_HEAD(&c->uncat_list); | ||
2002 | INIT_LIST_HEAD(&c->empty_list); | ||
2003 | INIT_LIST_HEAD(&c->freeable_list); | ||
2004 | INIT_LIST_HEAD(&c->frdi_idx_list); | ||
2005 | INIT_LIST_HEAD(&c->unclean_leb_list); | ||
2006 | INIT_LIST_HEAD(&c->old_buds); | ||
2007 | INIT_LIST_HEAD(&c->orph_list); | ||
2008 | INIT_LIST_HEAD(&c->orph_new); | ||
2009 | c->no_chk_data_crc = 1; | ||
2010 | |||
2011 | c->highest_inum = UBIFS_FIRST_INO; | ||
2012 | c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; | ||
2013 | |||
2014 | ubi_get_volume_info(ubi, &c->vi); | ||
2015 | ubi_get_device_info(c->vi.ubi_num, &c->di); | ||
2016 | } | ||
2017 | return c; | ||
2018 | } | ||
1978 | 2019 | ||
1979 | spin_lock_init(&c->cnt_lock); | 2020 | static int ubifs_fill_super(struct super_block *sb, void *data, int silent) |
1980 | spin_lock_init(&c->cs_lock); | 2021 | { |
1981 | spin_lock_init(&c->buds_lock); | 2022 | struct ubifs_info *c = sb->s_fs_info; |
1982 | spin_lock_init(&c->space_lock); | 2023 | struct inode *root; |
1983 | spin_lock_init(&c->orphan_lock); | 2024 | int err; |
1984 | init_rwsem(&c->commit_sem); | ||
1985 | mutex_init(&c->lp_mutex); | ||
1986 | mutex_init(&c->tnc_mutex); | ||
1987 | mutex_init(&c->log_mutex); | ||
1988 | mutex_init(&c->mst_mutex); | ||
1989 | mutex_init(&c->umount_mutex); | ||
1990 | mutex_init(&c->bu_mutex); | ||
1991 | mutex_init(&c->write_reserve_mutex); | ||
1992 | init_waitqueue_head(&c->cmt_wq); | ||
1993 | c->buds = RB_ROOT; | ||
1994 | c->old_idx = RB_ROOT; | ||
1995 | c->size_tree = RB_ROOT; | ||
1996 | c->orph_tree = RB_ROOT; | ||
1997 | INIT_LIST_HEAD(&c->infos_list); | ||
1998 | INIT_LIST_HEAD(&c->idx_gc); | ||
1999 | INIT_LIST_HEAD(&c->replay_list); | ||
2000 | INIT_LIST_HEAD(&c->replay_buds); | ||
2001 | INIT_LIST_HEAD(&c->uncat_list); | ||
2002 | INIT_LIST_HEAD(&c->empty_list); | ||
2003 | INIT_LIST_HEAD(&c->freeable_list); | ||
2004 | INIT_LIST_HEAD(&c->frdi_idx_list); | ||
2005 | INIT_LIST_HEAD(&c->unclean_leb_list); | ||
2006 | INIT_LIST_HEAD(&c->old_buds); | ||
2007 | INIT_LIST_HEAD(&c->orph_list); | ||
2008 | INIT_LIST_HEAD(&c->orph_new); | ||
2009 | c->no_chk_data_crc = 1; | ||
2010 | 2025 | ||
2011 | c->vfs_sb = sb; | 2026 | c->vfs_sb = sb; |
2012 | c->highest_inum = UBIFS_FIRST_INO; | ||
2013 | c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; | ||
2014 | |||
2015 | ubi_get_volume_info(ubi, &c->vi); | ||
2016 | ubi_get_device_info(c->vi.ubi_num, &c->di); | ||
2017 | |||
2018 | /* Re-open the UBI device in read-write mode */ | 2027 | /* Re-open the UBI device in read-write mode */ |
2019 | c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); | 2028 | c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); |
2020 | if (IS_ERR(c->ubi)) { | 2029 | if (IS_ERR(c->ubi)) { |
2021 | err = PTR_ERR(c->ubi); | 2030 | err = PTR_ERR(c->ubi); |
2022 | goto out_free; | 2031 | goto out; |
2023 | } | 2032 | } |
2024 | 2033 | ||
2025 | /* | 2034 | /* |
@@ -2085,24 +2094,29 @@ out_bdi: | |||
2085 | bdi_destroy(&c->bdi); | 2094 | bdi_destroy(&c->bdi); |
2086 | out_close: | 2095 | out_close: |
2087 | ubi_close_volume(c->ubi); | 2096 | ubi_close_volume(c->ubi); |
2088 | out_free: | 2097 | out: |
2089 | kfree(c); | ||
2090 | return err; | 2098 | return err; |
2091 | } | 2099 | } |
2092 | 2100 | ||
2093 | static int sb_test(struct super_block *sb, void *data) | 2101 | static int sb_test(struct super_block *sb, void *data) |
2094 | { | 2102 | { |
2095 | dev_t *dev = data; | 2103 | struct ubifs_info *c1 = data; |
2096 | struct ubifs_info *c = sb->s_fs_info; | 2104 | struct ubifs_info *c = sb->s_fs_info; |
2097 | 2105 | ||
2098 | return c->vi.cdev == *dev; | 2106 | return c->vi.cdev == c1->vi.cdev; |
2107 | } | ||
2108 | |||
2109 | static int sb_set(struct super_block *sb, void *data) | ||
2110 | { | ||
2111 | sb->s_fs_info = data; | ||
2112 | return set_anon_super(sb, NULL); | ||
2099 | } | 2113 | } |
2100 | 2114 | ||
2101 | static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, | 2115 | static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, |
2102 | const char *name, void *data) | 2116 | const char *name, void *data) |
2103 | { | 2117 | { |
2104 | struct ubi_volume_desc *ubi; | 2118 | struct ubi_volume_desc *ubi; |
2105 | struct ubi_volume_info vi; | 2119 | struct ubifs_info *c; |
2106 | struct super_block *sb; | 2120 | struct super_block *sb; |
2107 | int err; | 2121 | int err; |
2108 | 2122 | ||
@@ -2119,19 +2133,25 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, | |||
2119 | name, (int)PTR_ERR(ubi)); | 2133 | name, (int)PTR_ERR(ubi)); |
2120 | return ERR_CAST(ubi); | 2134 | return ERR_CAST(ubi); |
2121 | } | 2135 | } |
2122 | ubi_get_volume_info(ubi, &vi); | ||
2123 | 2136 | ||
2124 | dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); | 2137 | c = alloc_ubifs_info(ubi); |
2138 | if (!c) { | ||
2139 | err = -ENOMEM; | ||
2140 | goto out_close; | ||
2141 | } | ||
2142 | |||
2143 | dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); | ||
2125 | 2144 | ||
2126 | sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev); | 2145 | sb = sget(fs_type, sb_test, sb_set, c); |
2127 | if (IS_ERR(sb)) { | 2146 | if (IS_ERR(sb)) { |
2128 | err = PTR_ERR(sb); | 2147 | err = PTR_ERR(sb); |
2148 | kfree(c); | ||
2129 | goto out_close; | 2149 | goto out_close; |
2130 | } | 2150 | } |
2131 | 2151 | ||
2132 | if (sb->s_root) { | 2152 | if (sb->s_root) { |
2133 | struct ubifs_info *c1 = sb->s_fs_info; | 2153 | struct ubifs_info *c1 = sb->s_fs_info; |
2134 | 2154 | kfree(c); | |
2135 | /* A new mount point for already mounted UBIFS */ | 2155 | /* A new mount point for already mounted UBIFS */ |
2136 | dbg_gen("this ubi volume is already mounted"); | 2156 | dbg_gen("this ubi volume is already mounted"); |
2137 | if (!!(flags & MS_RDONLY) != c1->ro_mount) { | 2157 | if (!!(flags & MS_RDONLY) != c1->ro_mount) { |
@@ -2140,11 +2160,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, | |||
2140 | } | 2160 | } |
2141 | } else { | 2161 | } else { |
2142 | sb->s_flags = flags; | 2162 | sb->s_flags = flags; |
2143 | /* | ||
2144 | * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is | ||
2145 | * replaced by 'c'. | ||
2146 | */ | ||
2147 | sb->s_fs_info = ubi; | ||
2148 | err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); | 2163 | err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); |
2149 | if (err) | 2164 | if (err) |
2150 | goto out_deact; | 2165 | goto out_deact; |
@@ -2164,11 +2179,18 @@ out_close: | |||
2164 | return ERR_PTR(err); | 2179 | return ERR_PTR(err); |
2165 | } | 2180 | } |
2166 | 2181 | ||
2182 | static void kill_ubifs_super(struct super_block *s) | ||
2183 | { | ||
2184 | struct ubifs_info *c = s->s_fs_info; | ||
2185 | kill_anon_super(s); | ||
2186 | kfree(c); | ||
2187 | } | ||
2188 | |||
2167 | static struct file_system_type ubifs_fs_type = { | 2189 | static struct file_system_type ubifs_fs_type = { |
2168 | .name = "ubifs", | 2190 | .name = "ubifs", |
2169 | .owner = THIS_MODULE, | 2191 | .owner = THIS_MODULE, |
2170 | .mount = ubifs_mount, | 2192 | .mount = ubifs_mount, |
2171 | .kill_sb = kill_anon_super, | 2193 | .kill_sb = kill_ubifs_super, |
2172 | }; | 2194 | }; |
2173 | 2195 | ||
2174 | /* | 2196 | /* |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 8119b1fd8d94..91b4213dde84 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -2876,12 +2876,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c) | |||
2876 | */ | 2876 | */ |
2877 | void ubifs_tnc_close(struct ubifs_info *c) | 2877 | void ubifs_tnc_close(struct ubifs_info *c) |
2878 | { | 2878 | { |
2879 | long clean_freed; | ||
2880 | |||
2881 | tnc_destroy_cnext(c); | 2879 | tnc_destroy_cnext(c); |
2882 | if (c->zroot.znode) { | 2880 | if (c->zroot.znode) { |
2883 | clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); | 2881 | long n; |
2884 | atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); | 2882 | |
2883 | ubifs_destroy_tnc_subtree(c->zroot.znode); | ||
2884 | n = atomic_long_read(&c->clean_zn_cnt); | ||
2885 | atomic_long_sub(n, &ubifs_clean_zn_cnt); | ||
2885 | } | 2886 | } |
2886 | kfree(c->gap_lebs); | 2887 | kfree(c->gap_lebs); |
2887 | kfree(c->ilebs); | 2888 | kfree(c->ilebs); |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index a70d7b4ffb25..f79983d6f860 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -722,12 +722,14 @@ struct ubifs_bud { | |||
722 | * struct ubifs_jhead - journal head. | 722 | * struct ubifs_jhead - journal head. |
723 | * @wbuf: head's write-buffer | 723 | * @wbuf: head's write-buffer |
724 | * @buds_list: list of bud LEBs belonging to this journal head | 724 | * @buds_list: list of bud LEBs belonging to this journal head |
725 | * @grouped: non-zero if UBIFS groups nodes when writing to this journal head | ||
725 | * | 726 | * |
726 | * Note, the @buds list is protected by the @c->buds_lock. | 727 | * Note, the @buds list is protected by the @c->buds_lock. |
727 | */ | 728 | */ |
728 | struct ubifs_jhead { | 729 | struct ubifs_jhead { |
729 | struct ubifs_wbuf wbuf; | 730 | struct ubifs_wbuf wbuf; |
730 | struct list_head buds_list; | 731 | struct list_head buds_list; |
732 | unsigned int grouped:1; | ||
731 | }; | 733 | }; |
732 | 734 | ||
733 | /** | 735 | /** |
@@ -1742,7 +1744,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); | |||
1742 | int ubifs_recover_master_node(struct ubifs_info *c); | 1744 | int ubifs_recover_master_node(struct ubifs_info *c); |
1743 | int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); | 1745 | int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); |
1744 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | 1746 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, |
1745 | int offs, void *sbuf, int grouped); | 1747 | int offs, void *sbuf, int jhead); |
1746 | struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, | 1748 | struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, |
1747 | int offs, void *sbuf); | 1749 | int offs, void *sbuf); |
1748 | int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); | 1750 | int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f4213ba1ff85..7f782af286bf 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -131,19 +131,34 @@ xfs_file_fsync( | |||
131 | { | 131 | { |
132 | struct inode *inode = file->f_mapping->host; | 132 | struct inode *inode = file->f_mapping->host; |
133 | struct xfs_inode *ip = XFS_I(inode); | 133 | struct xfs_inode *ip = XFS_I(inode); |
134 | struct xfs_mount *mp = ip->i_mount; | ||
134 | struct xfs_trans *tp; | 135 | struct xfs_trans *tp; |
135 | int error = 0; | 136 | int error = 0; |
136 | int log_flushed = 0; | 137 | int log_flushed = 0; |
137 | 138 | ||
138 | trace_xfs_file_fsync(ip); | 139 | trace_xfs_file_fsync(ip); |
139 | 140 | ||
140 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 141 | if (XFS_FORCED_SHUTDOWN(mp)) |
141 | return -XFS_ERROR(EIO); | 142 | return -XFS_ERROR(EIO); |
142 | 143 | ||
143 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 144 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
144 | 145 | ||
145 | xfs_ioend_wait(ip); | 146 | xfs_ioend_wait(ip); |
146 | 147 | ||
148 | if (mp->m_flags & XFS_MOUNT_BARRIER) { | ||
149 | /* | ||
150 | * If we have an RT and/or log subvolume we need to make sure | ||
151 | * to flush the write cache the device used for file data | ||
152 | * first. This is to ensure newly written file data make | ||
153 | * it to disk before logging the new inode size in case of | ||
154 | * an extending write. | ||
155 | */ | ||
156 | if (XFS_IS_REALTIME_INODE(ip)) | ||
157 | xfs_blkdev_issue_flush(mp->m_rtdev_targp); | ||
158 | else if (mp->m_logdev_targp != mp->m_ddev_targp) | ||
159 | xfs_blkdev_issue_flush(mp->m_ddev_targp); | ||
160 | } | ||
161 | |||
147 | /* | 162 | /* |
148 | * We always need to make sure that the required inode state is safe on | 163 | * We always need to make sure that the required inode state is safe on |
149 | * disk. The inode might be clean but we still might need to force the | 164 | * disk. The inode might be clean but we still might need to force the |
@@ -175,9 +190,9 @@ xfs_file_fsync( | |||
175 | * updates. The sync transaction will also force the log. | 190 | * updates. The sync transaction will also force the log. |
176 | */ | 191 | */ |
177 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 192 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
178 | tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); | 193 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); |
179 | error = xfs_trans_reserve(tp, 0, | 194 | error = xfs_trans_reserve(tp, 0, |
180 | XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); | 195 | XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); |
181 | if (error) { | 196 | if (error) { |
182 | xfs_trans_cancel(tp, 0); | 197 | xfs_trans_cancel(tp, 0); |
183 | return -error; | 198 | return -error; |
@@ -209,28 +224,25 @@ xfs_file_fsync( | |||
209 | * force the log. | 224 | * force the log. |
210 | */ | 225 | */ |
211 | if (xfs_ipincount(ip)) { | 226 | if (xfs_ipincount(ip)) { |
212 | error = _xfs_log_force_lsn(ip->i_mount, | 227 | error = _xfs_log_force_lsn(mp, |
213 | ip->i_itemp->ili_last_lsn, | 228 | ip->i_itemp->ili_last_lsn, |
214 | XFS_LOG_SYNC, &log_flushed); | 229 | XFS_LOG_SYNC, &log_flushed); |
215 | } | 230 | } |
216 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 231 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
217 | } | 232 | } |
218 | 233 | ||
219 | if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { | 234 | /* |
220 | /* | 235 | * If we only have a single device, and the log force about was |
221 | * If the log write didn't issue an ordered tag we need | 236 | * a no-op we might have to flush the data device cache here. |
222 | * to flush the disk cache for the data device now. | 237 | * This can only happen for fdatasync/O_DSYNC if we were overwriting |
223 | */ | 238 | * an already allocated file and thus do not have any metadata to |
224 | if (!log_flushed) | 239 | * commit. |
225 | xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); | 240 | */ |
226 | 241 | if ((mp->m_flags & XFS_MOUNT_BARRIER) && | |
227 | /* | 242 | mp->m_logdev_targp == mp->m_ddev_targp && |
228 | * If this inode is on the RT dev we need to flush that | 243 | !XFS_IS_REALTIME_INODE(ip) && |
229 | * cache as well. | 244 | !log_flushed) |
230 | */ | 245 | xfs_blkdev_issue_flush(mp->m_ddev_targp); |
231 | if (XFS_IS_REALTIME_INODE(ip)) | ||
232 | xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); | ||
233 | } | ||
234 | 246 | ||
235 | return -error; | 247 | return -error; |
236 | } | 248 | } |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index dd21784525a8..d44d92cd12b1 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -182,7 +182,7 @@ xfs_vn_mknod( | |||
182 | if (IS_POSIXACL(dir)) { | 182 | if (IS_POSIXACL(dir)) { |
183 | default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); | 183 | default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); |
184 | if (IS_ERR(default_acl)) | 184 | if (IS_ERR(default_acl)) |
185 | return -PTR_ERR(default_acl); | 185 | return PTR_ERR(default_acl); |
186 | 186 | ||
187 | if (!default_acl) | 187 | if (!default_acl) |
188 | mode &= ~current_umask(); | 188 | mode &= ~current_umask(); |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1e3a7ce804dc..a1a881e68a9a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -627,68 +627,6 @@ xfs_blkdev_put( | |||
627 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | 627 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
628 | } | 628 | } |
629 | 629 | ||
630 | /* | ||
631 | * Try to write out the superblock using barriers. | ||
632 | */ | ||
633 | STATIC int | ||
634 | xfs_barrier_test( | ||
635 | xfs_mount_t *mp) | ||
636 | { | ||
637 | xfs_buf_t *sbp = xfs_getsb(mp, 0); | ||
638 | int error; | ||
639 | |||
640 | XFS_BUF_UNDONE(sbp); | ||
641 | XFS_BUF_UNREAD(sbp); | ||
642 | XFS_BUF_UNDELAYWRITE(sbp); | ||
643 | XFS_BUF_WRITE(sbp); | ||
644 | XFS_BUF_UNASYNC(sbp); | ||
645 | XFS_BUF_ORDERED(sbp); | ||
646 | |||
647 | xfsbdstrat(mp, sbp); | ||
648 | error = xfs_buf_iowait(sbp); | ||
649 | |||
650 | /* | ||
651 | * Clear all the flags we set and possible error state in the | ||
652 | * buffer. We only did the write to try out whether barriers | ||
653 | * worked and shouldn't leave any traces in the superblock | ||
654 | * buffer. | ||
655 | */ | ||
656 | XFS_BUF_DONE(sbp); | ||
657 | XFS_BUF_ERROR(sbp, 0); | ||
658 | XFS_BUF_UNORDERED(sbp); | ||
659 | |||
660 | xfs_buf_relse(sbp); | ||
661 | return error; | ||
662 | } | ||
663 | |||
664 | STATIC void | ||
665 | xfs_mountfs_check_barriers(xfs_mount_t *mp) | ||
666 | { | ||
667 | int error; | ||
668 | |||
669 | if (mp->m_logdev_targp != mp->m_ddev_targp) { | ||
670 | xfs_notice(mp, | ||
671 | "Disabling barriers, not supported with external log device"); | ||
672 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
673 | return; | ||
674 | } | ||
675 | |||
676 | if (xfs_readonly_buftarg(mp->m_ddev_targp)) { | ||
677 | xfs_notice(mp, | ||
678 | "Disabling barriers, underlying device is readonly"); | ||
679 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
680 | return; | ||
681 | } | ||
682 | |||
683 | error = xfs_barrier_test(mp); | ||
684 | if (error) { | ||
685 | xfs_notice(mp, | ||
686 | "Disabling barriers, trial barrier write failed"); | ||
687 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
688 | return; | ||
689 | } | ||
690 | } | ||
691 | |||
692 | void | 630 | void |
693 | xfs_blkdev_issue_flush( | 631 | xfs_blkdev_issue_flush( |
694 | xfs_buftarg_t *buftarg) | 632 | xfs_buftarg_t *buftarg) |
@@ -1240,14 +1178,6 @@ xfs_fs_remount( | |||
1240 | switch (token) { | 1178 | switch (token) { |
1241 | case Opt_barrier: | 1179 | case Opt_barrier: |
1242 | mp->m_flags |= XFS_MOUNT_BARRIER; | 1180 | mp->m_flags |= XFS_MOUNT_BARRIER; |
1243 | |||
1244 | /* | ||
1245 | * Test if barriers are actually working if we can, | ||
1246 | * else delay this check until the filesystem is | ||
1247 | * marked writeable. | ||
1248 | */ | ||
1249 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) | ||
1250 | xfs_mountfs_check_barriers(mp); | ||
1251 | break; | 1181 | break; |
1252 | case Opt_nobarrier: | 1182 | case Opt_nobarrier: |
1253 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1183 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
@@ -1282,8 +1212,6 @@ xfs_fs_remount( | |||
1282 | /* ro -> rw */ | 1212 | /* ro -> rw */ |
1283 | if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { | 1213 | if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { |
1284 | mp->m_flags &= ~XFS_MOUNT_RDONLY; | 1214 | mp->m_flags &= ~XFS_MOUNT_RDONLY; |
1285 | if (mp->m_flags & XFS_MOUNT_BARRIER) | ||
1286 | xfs_mountfs_check_barriers(mp); | ||
1287 | 1215 | ||
1288 | /* | 1216 | /* |
1289 | * If this is the first remount to writeable state we | 1217 | * If this is the first remount to writeable state we |
@@ -1465,9 +1393,6 @@ xfs_fs_fill_super( | |||
1465 | if (error) | 1393 | if (error) |
1466 | goto out_free_sb; | 1394 | goto out_free_sb; |
1467 | 1395 | ||
1468 | if (mp->m_flags & XFS_MOUNT_BARRIER) | ||
1469 | xfs_mountfs_check_barriers(mp); | ||
1470 | |||
1471 | error = xfs_filestream_mount(mp); | 1396 | error = xfs_filestream_mount(mp); |
1472 | if (error) | 1397 | if (error) |
1473 | goto out_free_sb; | 1398 | goto out_free_sb; |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 211930246f20..41d5b8f2bf92 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log, | |||
1372 | XFS_BUF_ASYNC(bp); | 1372 | XFS_BUF_ASYNC(bp); |
1373 | bp->b_flags |= XBF_LOG_BUFFER; | 1373 | bp->b_flags |= XBF_LOG_BUFFER; |
1374 | 1374 | ||
1375 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) | 1375 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { |
1376 | /* | ||
1377 | * If we have an external log device, flush the data device | ||
1378 | * before flushing the log to make sure all meta data | ||
1379 | * written back from the AIL actually made it to disk | ||
1380 | * before writing out the new log tail LSN in the log buffer. | ||
1381 | */ | ||
1382 | if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) | ||
1383 | xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); | ||
1376 | XFS_BUF_ORDERED(bp); | 1384 | XFS_BUF_ORDERED(bp); |
1385 | } | ||
1377 | 1386 | ||
1378 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); | 1387 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); |
1379 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); | 1388 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); |