aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_flat.c17
-rw-r--r--fs/btrfs/extent-tree.c21
-rw-r--r--fs/btrfs/free-space-cache.c73
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/relocation.c9
-rw-r--r--fs/btrfs/zlib.c6
-rw-r--r--fs/cifs/CHANGES7
-rw-r--r--fs/cifs/README25
-rw-r--r--fs/cifs/cifs_dfs_ref.c12
-rw-r--r--fs/cifs/cifs_unicode.c2
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/connect.c55
-rw-r--r--fs/compat_ioctl.c1
-rw-r--r--fs/gfs2/sys.c20
-rw-r--r--fs/inode.c40
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/nfs/direct.c20
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/segment.c16
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c13
-rw-r--r--fs/notify/inotify/inotify_user.c9
-rw-r--r--fs/notify/notification.c11
-rw-r--r--fs/ocfs2/alloc.c47
-rw-r--r--fs/ocfs2/aops.c69
-rw-r--r--fs/ocfs2/dcache.c35
-rw-r--r--fs/ocfs2/dcache.h3
-rw-r--r--fs/ocfs2/dlm/dlmast.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/journal.h19
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c134
-rw-r--r--fs/ocfs2/quota_local.c110
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/super.c30
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/proc/base.c27
-rw-r--r--fs/proc/task_mmu.c1
-rw-r--r--fs/proc/task_nommu.c1
-rw-r--r--fs/select.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/xfs_attr.c8
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_iget.c253
-rw-r--r--fs/xfs/xfs_inode.c10
-rw-r--r--fs/xfs/xfs_inode.h17
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
59 files changed, 820 insertions, 431 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 697f6b5f1313..e92f229e3c6e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -828,15 +828,22 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
828 if (IS_ERR(bprm.file)) 828 if (IS_ERR(bprm.file))
829 return res; 829 return res;
830 830
831 bprm.cred = prepare_exec_creds();
832 res = -ENOMEM;
833 if (!bprm.cred)
834 goto out;
835
831 res = prepare_binprm(&bprm); 836 res = prepare_binprm(&bprm);
832 837
833 if (res <= (unsigned long)-4096) 838 if (res <= (unsigned long)-4096)
834 res = load_flat_file(&bprm, libs, id, NULL); 839 res = load_flat_file(&bprm, libs, id, NULL);
835 if (bprm.file) { 840
836 allow_write_access(bprm.file); 841 abort_creds(bprm.cred);
837 fput(bprm.file); 842
838 bprm.file = NULL; 843out:
839 } 844 allow_write_access(bprm.file);
845 fput(bprm.file);
846
840 return(res); 847 return(res);
841} 848}
842 849
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index dc84daee6bc4..72a2b9c28e9f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -265,10 +265,6 @@ static int caching_kthread(void *data)
265 265
266 atomic_inc(&block_group->space_info->caching_threads); 266 atomic_inc(&block_group->space_info->caching_threads);
267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
268again:
269 /* need to make sure the commit_root doesn't disappear */
270 down_read(&fs_info->extent_commit_sem);
271
272 /* 268 /*
273 * We don't want to deadlock with somebody trying to allocate a new 269 * We don't want to deadlock with somebody trying to allocate a new
274 * extent for the extent root while also trying to search the extent 270 * extent for the extent root while also trying to search the extent
@@ -282,6 +278,10 @@ again:
282 key.objectid = last; 278 key.objectid = last;
283 key.offset = 0; 279 key.offset = 0;
284 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 280 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
281again:
282 /* need to make sure the commit_root doesn't disappear */
283 down_read(&fs_info->extent_commit_sem);
284
285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
286 if (ret < 0) 286 if (ret < 0)
287 goto err; 287 goto err;
@@ -304,6 +304,19 @@ again:
304 304
305 if (need_resched() || 305 if (need_resched() ||
306 btrfs_transaction_in_commit(fs_info)) { 306 btrfs_transaction_in_commit(fs_info)) {
307 leaf = path->nodes[0];
308
309 /* this shouldn't happen, but if the
310 * leaf is empty just move on.
311 */
312 if (btrfs_header_nritems(leaf) == 0)
313 break;
314 /*
315 * we need to copy the key out so that
316 * we are sure the next search advances
317 * us forward in the btree.
318 */
319 btrfs_item_key_to_cpu(leaf, &key, 0);
307 btrfs_release_path(fs_info->extent_root, path); 320 btrfs_release_path(fs_info->extent_root, path);
308 up_read(&fs_info->extent_commit_sem); 321 up_read(&fs_info->extent_commit_sem);
309 schedule_timeout(1); 322 schedule_timeout(1);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index af99b78b288e..5edcee3a617f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -414,11 +414,29 @@ static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_gro
414 u64 *offset, u64 *bytes) 414 u64 *offset, u64 *bytes)
415{ 415{
416 u64 end; 416 u64 end;
417 u64 search_start, search_bytes;
418 int ret;
417 419
418again: 420again:
419 end = bitmap_info->offset + 421 end = bitmap_info->offset +
420 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; 422 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
421 423
424 /*
425 * XXX - this can go away after a few releases.
426 *
427 * since the only user of btrfs_remove_free_space is the tree logging
428 * stuff, and the only way to test that is under crash conditions, we
429 * want to have this debug stuff here just in case somethings not
430 * working. Search the bitmap for the space we are trying to use to
431 * make sure its actually there. If its not there then we need to stop
432 * because something has gone wrong.
433 */
434 search_start = *offset;
435 search_bytes = *bytes;
436 ret = search_bitmap(block_group, bitmap_info, &search_start,
437 &search_bytes);
438 BUG_ON(ret < 0 || search_start != *offset);
439
422 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 440 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
423 bitmap_clear_bits(block_group, bitmap_info, *offset, 441 bitmap_clear_bits(block_group, bitmap_info, *offset,
424 end - *offset + 1); 442 end - *offset + 1);
@@ -430,6 +448,7 @@ again:
430 } 448 }
431 449
432 if (*bytes) { 450 if (*bytes) {
451 struct rb_node *next = rb_next(&bitmap_info->offset_index);
433 if (!bitmap_info->bytes) { 452 if (!bitmap_info->bytes) {
434 unlink_free_space(block_group, bitmap_info); 453 unlink_free_space(block_group, bitmap_info);
435 kfree(bitmap_info->bitmap); 454 kfree(bitmap_info->bitmap);
@@ -438,16 +457,36 @@ again:
438 recalculate_thresholds(block_group); 457 recalculate_thresholds(block_group);
439 } 458 }
440 459
441 bitmap_info = tree_search_offset(block_group, 460 /*
442 offset_to_bitmap(block_group, 461 * no entry after this bitmap, but we still have bytes to
443 *offset), 462 * remove, so something has gone wrong.
444 1, 0); 463 */
445 if (!bitmap_info) 464 if (!next)
446 return -EINVAL; 465 return -EINVAL;
447 466
467 bitmap_info = rb_entry(next, struct btrfs_free_space,
468 offset_index);
469
470 /*
471 * if the next entry isn't a bitmap we need to return to let the
472 * extent stuff do its work.
473 */
448 if (!bitmap_info->bitmap) 474 if (!bitmap_info->bitmap)
449 return -EAGAIN; 475 return -EAGAIN;
450 476
477 /*
478 * Ok the next item is a bitmap, but it may not actually hold
479 * the information for the rest of this free space stuff, so
480 * look for it, and if we don't find it return so we can try
481 * everything over again.
482 */
483 search_start = *offset;
484 search_bytes = *bytes;
485 ret = search_bitmap(block_group, bitmap_info, &search_start,
486 &search_bytes);
487 if (ret < 0 || search_start != *offset)
488 return -EAGAIN;
489
451 goto again; 490 goto again;
452 } else if (!bitmap_info->bytes) { 491 } else if (!bitmap_info->bytes) {
453 unlink_free_space(block_group, bitmap_info); 492 unlink_free_space(block_group, bitmap_info);
@@ -644,8 +683,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
644again: 683again:
645 info = tree_search_offset(block_group, offset, 0, 0); 684 info = tree_search_offset(block_group, offset, 0, 0);
646 if (!info) { 685 if (!info) {
647 WARN_ON(1); 686 /*
648 goto out_lock; 687 * oops didn't find an extent that matched the space we wanted
688 * to remove, look for a bitmap instead
689 */
690 info = tree_search_offset(block_group,
691 offset_to_bitmap(block_group, offset),
692 1, 0);
693 if (!info) {
694 WARN_ON(1);
695 goto out_lock;
696 }
649 } 697 }
650 698
651 if (info->bytes < bytes && rb_next(&info->offset_index)) { 699 if (info->bytes < bytes && rb_next(&info->offset_index)) {
@@ -957,8 +1005,15 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
957 if (cluster->block_group != block_group) 1005 if (cluster->block_group != block_group)
958 goto out; 1006 goto out;
959 1007
960 entry = tree_search_offset(block_group, search_start, 0, 0); 1008 /*
961 1009 * search_start is the beginning of the bitmap, but at some point it may
1010 * be a good idea to point to the actual start of the free area in the
1011 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1012 * to 1 to make sure we get the bitmap entry
1013 */
1014 entry = tree_search_offset(block_group,
1015 offset_to_bitmap(block_group, search_start),
1016 1, 0);
962 if (!entry || !entry->bitmap) 1017 if (!entry || !entry->bitmap)
963 goto out; 1018 goto out;
964 1019
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 56fe83fa60c4..272b9b2bea86 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4785,8 +4785,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4785 * and the replacement file is large. Start IO on it now so 4785 * and the replacement file is large. Start IO on it now so
4786 * we don't add too much work to the end of the transaction 4786 * we don't add too much work to the end of the transaction
4787 */ 4787 */
4788 if (new_inode && old_inode && S_ISREG(old_inode->i_mode) && 4788 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
4789 new_inode->i_size &&
4790 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 4789 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
4791 filemap_flush(old_inode->i_mapping); 4790 filemap_flush(old_inode->i_mapping);
4792 4791
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index e71264d1c2c9..c04f7f212602 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2553,8 +2553,13 @@ int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2554 2554
2555 /* make sure the dirty trick played by the caller work */ 2555 /* make sure the dirty trick played by the caller work */
2556 ret = invalidate_inode_pages2_range(inode->i_mapping, 2556 while (1) {
2557 first_index, last_index); 2557 ret = invalidate_inode_pages2_range(inode->i_mapping,
2558 first_index, last_index);
2559 if (ret != -EBUSY)
2560 break;
2561 schedule_timeout(HZ/10);
2562 }
2558 if (ret) 2563 if (ret)
2559 goto out_unlock; 2564 goto out_unlock;
2560 2565
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index ecfbce836d32..3e2b90eaa239 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -208,7 +208,7 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
208 *total_in = 0; 208 *total_in = 0;
209 209
210 workspace = find_zlib_workspace(); 210 workspace = find_zlib_workspace();
211 if (!workspace) 211 if (IS_ERR(workspace))
212 return -1; 212 return -1;
213 213
214 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 214 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
@@ -366,7 +366,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
366 char *kaddr; 366 char *kaddr;
367 367
368 workspace = find_zlib_workspace(); 368 workspace = find_zlib_workspace();
369 if (!workspace) 369 if (IS_ERR(workspace))
370 return -ENOMEM; 370 return -ENOMEM;
371 371
372 data_in = kmap(pages_in[page_in_index]); 372 data_in = kmap(pages_in[page_in_index]);
@@ -547,7 +547,7 @@ int btrfs_zlib_decompress(unsigned char *data_in,
547 return -ENOMEM; 547 return -ENOMEM;
548 548
549 workspace = find_zlib_workspace(); 549 workspace = find_zlib_workspace();
550 if (!workspace) 550 if (IS_ERR(workspace))
551 return -ENOMEM; 551 return -ENOMEM;
552 552
553 workspace->inf_strm.next_in = data_in; 553 workspace->inf_strm.next_in = data_in;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 92888aa90749..e85b1e4389e0 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,10 @@
1Version 1.60
2-------------
3Fix memory leak in reconnect. Fix oops in DFS mount error path.
4Set s_maxbytes to smaller (the max that vfs can handle) so that
5sendfile will now work over cifs mounts again. Add noforcegid
6and noforceuid mount parameters.
7
1Version 1.59 8Version 1.59
2------------ 9------------
3Client uses server inode numbers (which are persistent) rather than 10Client uses server inode numbers (which are persistent) rather than
diff --git a/fs/cifs/README b/fs/cifs/README
index ad92921dbde4..79c1a93400be 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -262,11 +262,11 @@ A partial list of the supported mount options follows:
262 mount. 262 mount.
263 domain Set the SMB/CIFS workgroup name prepended to the 263 domain Set the SMB/CIFS workgroup name prepended to the
264 username during CIFS session establishment 264 username during CIFS session establishment
265 forceuid Set the default uid for inodes based on the uid 265 forceuid Set the default uid for inodes to the uid
266 passed in. For mounts to servers 266 passed in on mount. For mounts to servers
267 which do support the CIFS Unix extensions, such as a 267 which do support the CIFS Unix extensions, such as a
268 properly configured Samba server, the server provides 268 properly configured Samba server, the server provides
269 the uid, gid and mode so this parameter should not be 269 the uid, gid and mode so this parameter should not be
270 specified unless the server and clients uid and gid 270 specified unless the server and clients uid and gid
271 numbering differ. If the server and client are in the 271 numbering differ. If the server and client are in the
272 same domain (e.g. running winbind or nss_ldap) and 272 same domain (e.g. running winbind or nss_ldap) and
@@ -278,11 +278,7 @@ A partial list of the supported mount options follows:
278 of existing files will be the uid (gid) of the person 278 of existing files will be the uid (gid) of the person
279 who executed the mount (root, except when mount.cifs 279 who executed the mount (root, except when mount.cifs
280 is configured setuid for user mounts) unless the "uid=" 280 is configured setuid for user mounts) unless the "uid="
281 (gid) mount option is specified. For the uid (gid) of newly 281 (gid) mount option is specified. Also note that permission
282 created files and directories, ie files created since
283 the last mount of the server share, the expected uid
284 (gid) is cached as long as the inode remains in
285 memory on the client. Also note that permission
286 checks (authorization checks) on accesses to a file occur 282 checks (authorization checks) on accesses to a file occur
287 at the server, but there are cases in which an administrator 283 at the server, but there are cases in which an administrator
288 may want to restrict at the client as well. For those 284 may want to restrict at the client as well. For those
@@ -290,12 +286,15 @@ A partial list of the supported mount options follows:
290 (such as Windows), permissions can also be checked at the 286 (such as Windows), permissions can also be checked at the
291 client, and a crude form of client side permission checking 287 client, and a crude form of client side permission checking
292 can be enabled by specifying file_mode and dir_mode on 288 can be enabled by specifying file_mode and dir_mode on
293 the client. Note that the mount.cifs helper must be 289 the client. (default)
294 at version 1.10 or higher to support specifying the uid 290 forcegid (similar to above but for the groupid instead of uid) (default)
295 (or gid) in non-numeric form. 291 noforceuid Fill in file owner information (uid) by requesting it from
296 forcegid (similar to above but for the groupid instead of uid) 292 the server if possible. With this option, the value given in
293 the uid= option (on mount) will only be used if the server
294 can not support returning uids on inodes.
295 noforcegid (similar to above but for the group owner, gid, instead of uid)
297 uid Set the default uid for inodes, and indicate to the 296 uid Set the default uid for inodes, and indicate to the
298 cifs kernel driver which local user mounted . If the server 297 cifs kernel driver which local user mounted. If the server
299 supports the unix extensions the default uid is 298 supports the unix extensions the default uid is
300 not used to fill in the owner fields of inodes (files) 299 not used to fill in the owner fields of inodes (files)
301 unless the "forceuid" parameter is specified. 300 unless the "forceuid" parameter is specified.
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 3bb11be8b6a8..606912d8f2a8 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -55,7 +55,7 @@ void cifs_dfs_release_automount_timer(void)
55 * i.e. strips from UNC trailing path that is not part of share 55 * i.e. strips from UNC trailing path that is not part of share
56 * name and fixup missing '\' in the begining of DFS node refferal 56 * name and fixup missing '\' in the begining of DFS node refferal
57 * if neccessary. 57 * if neccessary.
58 * Returns pointer to share name on success or NULL on error. 58 * Returns pointer to share name on success or ERR_PTR on error.
59 * Caller is responsible for freeing returned string. 59 * Caller is responsible for freeing returned string.
60 */ 60 */
61static char *cifs_get_share_name(const char *node_name) 61static char *cifs_get_share_name(const char *node_name)
@@ -68,7 +68,7 @@ static char *cifs_get_share_name(const char *node_name)
68 UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */, 68 UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */,
69 GFP_KERNEL); 69 GFP_KERNEL);
70 if (!UNC) 70 if (!UNC)
71 return NULL; 71 return ERR_PTR(-ENOMEM);
72 72
73 /* get share name and server name */ 73 /* get share name and server name */
74 if (node_name[1] != '\\') { 74 if (node_name[1] != '\\') {
@@ -87,7 +87,7 @@ static char *cifs_get_share_name(const char *node_name)
87 cERROR(1, ("%s: no server name end in node name: %s", 87 cERROR(1, ("%s: no server name end in node name: %s",
88 __func__, node_name)); 88 __func__, node_name));
89 kfree(UNC); 89 kfree(UNC);
90 return NULL; 90 return ERR_PTR(-EINVAL);
91 } 91 }
92 92
93 /* find sharename end */ 93 /* find sharename end */
@@ -133,6 +133,12 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
133 return ERR_PTR(-EINVAL); 133 return ERR_PTR(-EINVAL);
134 134
135 *devname = cifs_get_share_name(ref->node_name); 135 *devname = cifs_get_share_name(ref->node_name);
136 if (IS_ERR(*devname)) {
137 rc = PTR_ERR(*devname);
138 *devname = NULL;
139 goto compose_mount_options_err;
140 }
141
136 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
137 if (rc != 0) { 143 if (rc != 0) {
138 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", 144 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d",
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 60e3c4253de0..714a542cbafc 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -44,7 +44,7 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes,
44 int maxwords = maxbytes / 2; 44 int maxwords = maxbytes / 2;
45 char tmp[NLS_MAX_CHARSET_SIZE]; 45 char tmp[NLS_MAX_CHARSET_SIZE];
46 46
47 for (i = 0; from[i] && i < maxwords; i++) { 47 for (i = 0; i < maxwords && from[i]; i++) {
48 charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp, 48 charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp,
49 NLS_MAX_CHARSET_SIZE); 49 NLS_MAX_CHARSET_SIZE);
50 if (charlen > 0) 50 if (charlen > 0)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 44f30504b82d..84b75253b05a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -376,10 +376,14 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
376 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); 376 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) 377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
378 seq_printf(s, ",forceuid"); 378 seq_printf(s, ",forceuid");
379 else
380 seq_printf(s, ",noforceuid");
379 381
380 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); 382 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
381 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) 383 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
382 seq_printf(s, ",forcegid"); 384 seq_printf(s, ",forcegid");
385 else
386 seq_printf(s, ",noforcegid");
383 387
384 cifs_show_address(s, tcon->ses->server); 388 cifs_show_address(s, tcon->ses->server);
385 389
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index fc44d316d0bb..1f3345d7fa79 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -803,6 +803,10 @@ cifs_parse_mount_options(char *options, const char *devname,
803 char *data; 803 char *data;
804 unsigned int temp_len, i, j; 804 unsigned int temp_len, i, j;
805 char separator[2]; 805 char separator[2];
806 short int override_uid = -1;
807 short int override_gid = -1;
808 bool uid_specified = false;
809 bool gid_specified = false;
806 810
807 separator[0] = ','; 811 separator[0] = ',';
808 separator[1] = 0; 812 separator[1] = 0;
@@ -1093,18 +1097,20 @@ cifs_parse_mount_options(char *options, const char *devname,
1093 "too long.\n"); 1097 "too long.\n");
1094 return 1; 1098 return 1;
1095 } 1099 }
1096 } else if (strnicmp(data, "uid", 3) == 0) { 1100 } else if (!strnicmp(data, "uid", 3) && value && *value) {
1097 if (value && *value) 1101 vol->linux_uid = simple_strtoul(value, &value, 0);
1098 vol->linux_uid = 1102 uid_specified = true;
1099 simple_strtoul(value, &value, 0); 1103 } else if (!strnicmp(data, "forceuid", 8)) {
1100 } else if (strnicmp(data, "forceuid", 8) == 0) { 1104 override_uid = 1;
1101 vol->override_uid = 1; 1105 } else if (!strnicmp(data, "noforceuid", 10)) {
1102 } else if (strnicmp(data, "gid", 3) == 0) { 1106 override_uid = 0;
1103 if (value && *value) 1107 } else if (!strnicmp(data, "gid", 3) && value && *value) {
1104 vol->linux_gid = 1108 vol->linux_gid = simple_strtoul(value, &value, 0);
1105 simple_strtoul(value, &value, 0); 1109 gid_specified = true;
1106 } else if (strnicmp(data, "forcegid", 8) == 0) { 1110 } else if (!strnicmp(data, "forcegid", 8)) {
1107 vol->override_gid = 1; 1111 override_gid = 1;
1112 } else if (!strnicmp(data, "noforcegid", 10)) {
1113 override_gid = 0;
1108 } else if (strnicmp(data, "file_mode", 4) == 0) { 1114 } else if (strnicmp(data, "file_mode", 4) == 0) {
1109 if (value && *value) { 1115 if (value && *value) {
1110 vol->file_mode = 1116 vol->file_mode =
@@ -1355,6 +1361,18 @@ cifs_parse_mount_options(char *options, const char *devname,
1355 if (vol->UNCip == NULL) 1361 if (vol->UNCip == NULL)
1356 vol->UNCip = &vol->UNC[2]; 1362 vol->UNCip = &vol->UNC[2];
1357 1363
1364 if (uid_specified)
1365 vol->override_uid = override_uid;
1366 else if (override_uid == 1)
1367 printk(KERN_NOTICE "CIFS: ignoring forceuid mount option "
1368 "specified with no uid= option.\n");
1369
1370 if (gid_specified)
1371 vol->override_gid = override_gid;
1372 else if (override_gid == 1)
1373 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
1374 "specified with no gid= option.\n");
1375
1358 return 0; 1376 return 0;
1359} 1377}
1360 1378
@@ -2544,11 +2562,20 @@ remote_path_check:
2544 2562
2545 if (mount_data != mount_data_global) 2563 if (mount_data != mount_data_global)
2546 kfree(mount_data); 2564 kfree(mount_data);
2565
2547 mount_data = cifs_compose_mount_options( 2566 mount_data = cifs_compose_mount_options(
2548 cifs_sb->mountdata, full_path + 1, 2567 cifs_sb->mountdata, full_path + 1,
2549 referrals, &fake_devname); 2568 referrals, &fake_devname);
2550 kfree(fake_devname); 2569
2551 free_dfs_info_array(referrals, num_referrals); 2570 free_dfs_info_array(referrals, num_referrals);
2571 kfree(fake_devname);
2572 kfree(full_path);
2573
2574 if (IS_ERR(mount_data)) {
2575 rc = PTR_ERR(mount_data);
2576 mount_data = NULL;
2577 goto mount_fail_check;
2578 }
2552 2579
2553 if (tcon) 2580 if (tcon)
2554 cifs_put_tcon(tcon); 2581 cifs_put_tcon(tcon);
@@ -2556,8 +2583,6 @@ remote_path_check:
2556 cifs_put_smb_ses(pSesInfo); 2583 cifs_put_smb_ses(pSesInfo);
2557 2584
2558 cleanup_volume_info(&volume_info); 2585 cleanup_volume_info(&volume_info);
2559 FreeXid(xid);
2560 kfree(full_path);
2561 referral_walks_count++; 2586 referral_walks_count++;
2562 goto try_mount_again; 2587 goto try_mount_again;
2563 } 2588 }
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index f28f070a60fc..f91fd51b32e3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1905,6 +1905,7 @@ COMPATIBLE_IOCTL(FIONCLEX)
1905COMPATIBLE_IOCTL(FIOASYNC) 1905COMPATIBLE_IOCTL(FIOASYNC)
1906COMPATIBLE_IOCTL(FIONBIO) 1906COMPATIBLE_IOCTL(FIONBIO)
1907COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */ 1907COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */
1908COMPATIBLE_IOCTL(FS_IOC_FIEMAP)
1908/* 0x00 */ 1909/* 0x00 */
1909COMPATIBLE_IOCTL(FIBMAP) 1910COMPATIBLE_IOCTL(FIBMAP)
1910COMPATIBLE_IOCTL(FIGETBSZ) 1911COMPATIBLE_IOCTL(FIGETBSZ)
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 23419dc3027b..a7cbfbd340c7 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -386,16 +386,16 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
386#define GDLM_ATTR(_name,_mode,_show,_store) \ 386#define GDLM_ATTR(_name,_mode,_show,_store) \
387static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) 387static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
388 388
389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
390GDLM_ATTR(block, 0644, block_show, block_store); 390GDLM_ATTR(block, 0644, block_show, block_store);
391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
392GDLM_ATTR(id, 0444, lkid_show, NULL); 392GDLM_ATTR(id, 0444, lkid_show, NULL);
393GDLM_ATTR(jid, 0444, jid_show, NULL); 393GDLM_ATTR(jid, 0444, jid_show, NULL);
394GDLM_ATTR(first, 0444, lkfirst_show, NULL); 394GDLM_ATTR(first, 0444, lkfirst_show, NULL);
395GDLM_ATTR(first_done, 0444, first_done_show, NULL); 395GDLM_ATTR(first_done, 0444, first_done_show, NULL);
396GDLM_ATTR(recover, 0200, NULL, recover_store); 396GDLM_ATTR(recover, 0600, NULL, recover_store);
397GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); 397GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
398GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); 398GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
399 399
400static struct attribute *lock_module_attrs[] = { 400static struct attribute *lock_module_attrs[] = {
401 &gdlm_attr_proto_name.attr, 401 &gdlm_attr_proto_name.attr,
diff --git a/fs/inode.c b/fs/inode.c
index 901bad1e5f12..ae7b67e48661 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -120,12 +120,11 @@ static void wake_up_inode(struct inode *inode)
120 * These are initializations that need to be done on every inode 120 * These are initializations that need to be done on every inode
121 * allocation as the fields are not initialised by slab allocation. 121 * allocation as the fields are not initialised by slab allocation.
122 */ 122 */
123struct inode *inode_init_always(struct super_block *sb, struct inode *inode) 123int inode_init_always(struct super_block *sb, struct inode *inode)
124{ 124{
125 static const struct address_space_operations empty_aops; 125 static const struct address_space_operations empty_aops;
126 static struct inode_operations empty_iops; 126 static struct inode_operations empty_iops;
127 static const struct file_operations empty_fops; 127 static const struct file_operations empty_fops;
128
129 struct address_space *const mapping = &inode->i_data; 128 struct address_space *const mapping = &inode->i_data;
130 129
131 inode->i_sb = sb; 130 inode->i_sb = sb;
@@ -152,7 +151,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
152 inode->dirtied_when = 0; 151 inode->dirtied_when = 0;
153 152
154 if (security_inode_alloc(inode)) 153 if (security_inode_alloc(inode))
155 goto out_free_inode; 154 goto out;
156 155
157 /* allocate and initialize an i_integrity */ 156 /* allocate and initialize an i_integrity */
158 if (ima_inode_alloc(inode)) 157 if (ima_inode_alloc(inode))
@@ -198,16 +197,12 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
198 inode->i_fsnotify_mask = 0; 197 inode->i_fsnotify_mask = 0;
199#endif 198#endif
200 199
201 return inode; 200 return 0;
202 201
203out_free_security: 202out_free_security:
204 security_inode_free(inode); 203 security_inode_free(inode);
205out_free_inode: 204out:
206 if (inode->i_sb->s_op->destroy_inode) 205 return -ENOMEM;
207 inode->i_sb->s_op->destroy_inode(inode);
208 else
209 kmem_cache_free(inode_cachep, (inode));
210 return NULL;
211} 206}
212EXPORT_SYMBOL(inode_init_always); 207EXPORT_SYMBOL(inode_init_always);
213 208
@@ -220,12 +215,21 @@ static struct inode *alloc_inode(struct super_block *sb)
220 else 215 else
221 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); 216 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
222 217
223 if (inode) 218 if (!inode)
224 return inode_init_always(sb, inode); 219 return NULL;
225 return NULL; 220
221 if (unlikely(inode_init_always(sb, inode))) {
222 if (inode->i_sb->s_op->destroy_inode)
223 inode->i_sb->s_op->destroy_inode(inode);
224 else
225 kmem_cache_free(inode_cachep, inode);
226 return NULL;
227 }
228
229 return inode;
226} 230}
227 231
228void destroy_inode(struct inode *inode) 232void __destroy_inode(struct inode *inode)
229{ 233{
230 BUG_ON(inode_has_buffers(inode)); 234 BUG_ON(inode_has_buffers(inode));
231 ima_inode_free(inode); 235 ima_inode_free(inode);
@@ -237,13 +241,17 @@ void destroy_inode(struct inode *inode)
237 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 241 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
238 posix_acl_release(inode->i_default_acl); 242 posix_acl_release(inode->i_default_acl);
239#endif 243#endif
244}
245EXPORT_SYMBOL(__destroy_inode);
246
247void destroy_inode(struct inode *inode)
248{
249 __destroy_inode(inode);
240 if (inode->i_sb->s_op->destroy_inode) 250 if (inode->i_sb->s_op->destroy_inode)
241 inode->i_sb->s_op->destroy_inode(inode); 251 inode->i_sb->s_op->destroy_inode(inode);
242 else 252 else
243 kmem_cache_free(inode_cachep, (inode)); 253 kmem_cache_free(inode_cachep, (inode));
244} 254}
245EXPORT_SYMBOL(destroy_inode);
246
247 255
248/* 256/*
249 * These are initializations that only need to be done 257 * These are initializations that only need to be done
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5edc2bf20581..23c947539864 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -99,7 +99,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
99 kunmap(pg); 99 kunmap(pg);
100 100
101 D2(printk(KERN_DEBUG "readpage finished\n")); 101 D2(printk(KERN_DEBUG "readpage finished\n"));
102 return 0; 102 return ret;
103} 103}
104 104
105int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg) 105int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg)
diff --git a/fs/namespace.c b/fs/namespace.c
index 277c28a63ead..7230787d18b0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -316,7 +316,8 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
316 */ 316 */
317int mnt_want_write_file(struct file *file) 317int mnt_want_write_file(struct file *file)
318{ 318{
319 if (!(file->f_mode & FMODE_WRITE)) 319 struct inode *inode = file->f_dentry->d_inode;
320 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
320 return mnt_want_write(file->f_path.mnt); 321 return mnt_want_write(file->f_path.mnt);
321 else 322 else
322 return mnt_clone_write(file->f_path.mnt); 323 return mnt_clone_write(file->f_path.mnt);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489fc01a3204..e4e089a8f294 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata)
255 255
256 if (put_dreq(dreq)) 256 if (put_dreq(dreq))
257 nfs_direct_complete(dreq); 257 nfs_direct_complete(dreq);
258 nfs_readdata_release(calldata); 258 nfs_readdata_free(data);
259} 259}
260 260
261static const struct rpc_call_ops nfs_read_direct_ops = { 261static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
314 data->npages, 1, 0, data->pagevec, NULL); 314 data->npages, 1, 0, data->pagevec, NULL);
315 up_read(&current->mm->mmap_sem); 315 up_read(&current->mm->mmap_sem);
316 if (result < 0) { 316 if (result < 0) {
317 nfs_readdata_release(data); 317 nfs_readdata_free(data);
318 break; 318 break;
319 } 319 }
320 if ((unsigned)result < data->npages) { 320 if ((unsigned)result < data->npages) {
321 bytes = result * PAGE_SIZE; 321 bytes = result * PAGE_SIZE;
322 if (bytes <= pgbase) { 322 if (bytes <= pgbase) {
323 nfs_direct_release_pages(data->pagevec, result); 323 nfs_direct_release_pages(data->pagevec, result);
324 nfs_readdata_release(data); 324 nfs_readdata_free(data);
325 break; 325 break;
326 } 326 }
327 bytes -= pgbase; 327 bytes -= pgbase;
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
334 data->inode = inode; 334 data->inode = inode;
335 data->cred = msg.rpc_cred; 335 data->cred = msg.rpc_cred;
336 data->args.fh = NFS_FH(inode); 336 data->args.fh = NFS_FH(inode);
337 data->args.context = get_nfs_open_context(ctx); 337 data->args.context = ctx;
338 data->args.offset = pos; 338 data->args.offset = pos;
339 data->args.pgbase = pgbase; 339 data->args.pgbase = pgbase;
340 data->args.pages = data->pagevec; 340 data->args.pages = data->pagevec;
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); 441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
442 list_del(&data->pages); 442 list_del(&data->pages);
443 nfs_direct_release_pages(data->pagevec, data->npages); 443 nfs_direct_release_pages(data->pagevec, data->npages);
444 nfs_writedata_release(data); 444 nfs_writedata_free(data);
445 } 445 }
446} 446}
447 447
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata)
534 534
535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
536 nfs_direct_write_complete(dreq, data->inode); 536 nfs_direct_write_complete(dreq, data->inode);
537 nfs_commitdata_release(calldata); 537 nfs_commit_free(data);
538} 538}
539 539
540static const struct rpc_call_ops nfs_commit_direct_ops = { 540static const struct rpc_call_ops nfs_commit_direct_ops = {
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
570 data->args.fh = NFS_FH(data->inode); 570 data->args.fh = NFS_FH(data->inode);
571 data->args.offset = 0; 571 data->args.offset = 0;
572 data->args.count = 0; 572 data->args.count = 0;
573 data->args.context = get_nfs_open_context(dreq->ctx); 573 data->args.context = dreq->ctx;
574 data->res.count = 0; 574 data->res.count = 0;
575 data->res.fattr = &data->fattr; 575 data->res.fattr = &data->fattr;
576 data->res.verf = &data->verf; 576 data->res.verf = &data->verf;
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
734 data->npages, 0, 0, data->pagevec, NULL); 734 data->npages, 0, 0, data->pagevec, NULL);
735 up_read(&current->mm->mmap_sem); 735 up_read(&current->mm->mmap_sem);
736 if (result < 0) { 736 if (result < 0) {
737 nfs_writedata_release(data); 737 nfs_writedata_free(data);
738 break; 738 break;
739 } 739 }
740 if ((unsigned)result < data->npages) { 740 if ((unsigned)result < data->npages) {
741 bytes = result * PAGE_SIZE; 741 bytes = result * PAGE_SIZE;
742 if (bytes <= pgbase) { 742 if (bytes <= pgbase) {
743 nfs_direct_release_pages(data->pagevec, result); 743 nfs_direct_release_pages(data->pagevec, result);
744 nfs_writedata_release(data); 744 nfs_writedata_free(data);
745 break; 745 break;
746 } 746 }
747 bytes -= pgbase; 747 bytes -= pgbase;
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
756 data->inode = inode; 756 data->inode = inode;
757 data->cred = msg.rpc_cred; 757 data->cred = msg.rpc_cred;
758 data->args.fh = NFS_FH(inode); 758 data->args.fh = NFS_FH(inode);
759 data->args.context = get_nfs_open_context(ctx); 759 data->args.context = ctx;
760 data->args.offset = pos; 760 data->args.offset = pos;
761 data->args.pgbase = pgbase; 761 data->args.pgbase = pgbase;
762 data->args.pages = data->pagevec; 762 data->args.pages = data->pagevec;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 73ea5e8d66ce..12c9e66d3f1d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
60 return p; 60 return p;
61} 61}
62 62
63static void nfs_readdata_free(struct nfs_read_data *p) 63void nfs_readdata_free(struct nfs_read_data *p)
64{ 64{
65 if (p && (p->pagevec != &p->page_array[0])) 65 if (p && (p->pagevec != &p->page_array[0]))
66 kfree(p->pagevec); 66 kfree(p->pagevec);
67 mempool_free(p, nfs_rdata_mempool); 67 mempool_free(p, nfs_rdata_mempool);
68} 68}
69 69
70void nfs_readdata_release(void *data) 70static void nfs_readdata_release(struct nfs_read_data *rdata)
71{ 71{
72 struct nfs_read_data *rdata = data;
73
74 put_nfs_open_context(rdata->args.context); 72 put_nfs_open_context(rdata->args.context);
75 nfs_readdata_free(rdata); 73 nfs_readdata_free(rdata);
76} 74}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0a0a2ff767c3..a34fae21fe10 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -87,17 +87,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
87 return p; 87 return p;
88} 88}
89 89
90static void nfs_writedata_free(struct nfs_write_data *p) 90void nfs_writedata_free(struct nfs_write_data *p)
91{ 91{
92 if (p && (p->pagevec != &p->page_array[0])) 92 if (p && (p->pagevec != &p->page_array[0]))
93 kfree(p->pagevec); 93 kfree(p->pagevec);
94 mempool_free(p, nfs_wdata_mempool); 94 mempool_free(p, nfs_wdata_mempool);
95} 95}
96 96
97void nfs_writedata_release(void *data) 97static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 98{
99 struct nfs_write_data *wdata = data;
100
101 put_nfs_open_context(wdata->args.context); 99 put_nfs_open_context(wdata->args.context);
102 nfs_writedata_free(wdata); 100 nfs_writedata_free(wdata);
103} 101}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 3d3ddb3f5177..2dfd47714ae5 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -412,8 +412,10 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
412 return 0; /* Do not request flush for shadow page cache */ 412 return 0; /* Do not request flush for shadow page cache */
413 if (!sb) { 413 if (!sb) {
414 writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); 414 writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs);
415 if (!writer) 415 if (!writer) {
416 nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs);
416 return -EROFS; 417 return -EROFS;
418 }
417 sb = writer->s_super; 419 sb = writer->s_super;
418 } 420 }
419 421
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 8b5e4778cf28..51ff3d0a4ee2 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1859,12 +1859,26 @@ static void nilfs_end_page_io(struct page *page, int err)
1859 if (!page) 1859 if (!page)
1860 return; 1860 return;
1861 1861
1862 if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) 1862 if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) {
1863 /* 1863 /*
1864 * For b-tree node pages, this function may be called twice 1864 * For b-tree node pages, this function may be called twice
1865 * or more because they might be split in a segment. 1865 * or more because they might be split in a segment.
1866 */ 1866 */
1867 if (PageDirty(page)) {
1868 /*
1869 * For pages holding split b-tree node buffers, dirty
1870 * flag on the buffers may be cleared discretely.
1871 * In that case, the page is once redirtied for
1872 * remaining buffers, and it must be cancelled if
1873 * all the buffers get cleaned later.
1874 */
1875 lock_page(page);
1876 if (nilfs_page_buffers_clean(page))
1877 __nilfs_clear_page_dirty(page);
1878 unlock_page(page);
1879 }
1867 return; 1880 return;
1881 }
1868 1882
1869 __nilfs_end_page_io(page, err); 1883 __nilfs_end_page_io(page, err);
1870} 1884}
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 47cd258fd24d..5dcbafe72d71 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
62 event_priv->wd = wd; 62 event_priv->wd = wd;
63 63
64 ret = fsnotify_add_notify_event(group, event, fsn_event_priv); 64 ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
65 /* EEXIST is not an error */ 65 if (ret) {
66 if (ret == -EEXIST)
67 ret = 0;
68
69 /* did event_priv get attached? */
70 if (list_empty(&fsn_event_priv->event_list))
71 inotify_free_event_priv(fsn_event_priv); 66 inotify_free_event_priv(fsn_event_priv);
67 /* EEXIST says we tail matched, EOVERFLOW isn't something
68 * to report up the stack. */
69 if ((ret == -EEXIST) ||
70 (ret == -EOVERFLOW))
71 ret = 0;
72 }
72 73
73 /* 74 /*
74 * If we hold the entry until after the event is on the queue 75 * If we hold the entry until after the event is on the queue
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f30d9bbc2e1b..dc32ed8323ba 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -386,6 +386,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
386 struct fsnotify_event *ignored_event; 386 struct fsnotify_event *ignored_event;
387 struct inotify_event_private_data *event_priv; 387 struct inotify_event_private_data *event_priv;
388 struct fsnotify_event_private_data *fsn_event_priv; 388 struct fsnotify_event_private_data *fsn_event_priv;
389 int ret;
389 390
390 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, 391 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
391 FSNOTIFY_EVENT_NONE, NULL, 0, 392 FSNOTIFY_EVENT_NONE, NULL, 0,
@@ -404,10 +405,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
404 fsn_event_priv->group = group; 405 fsn_event_priv->group = group;
405 event_priv->wd = ientry->wd; 406 event_priv->wd = ientry->wd;
406 407
407 fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); 408 ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
408 409 if (ret)
409 /* did the private data get added? */
410 if (list_empty(&fsn_event_priv->event_list))
411 inotify_free_event_priv(fsn_event_priv); 410 inotify_free_event_priv(fsn_event_priv);
412 411
413skip_send_ignore: 412skip_send_ignore:
@@ -568,7 +567,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
568 567
569 spin_lock_init(&group->inotify_data.idr_lock); 568 spin_lock_init(&group->inotify_data.idr_lock);
570 idr_init(&group->inotify_data.idr); 569 idr_init(&group->inotify_data.idr);
571 group->inotify_data.last_wd = 0; 570 group->inotify_data.last_wd = 1;
572 group->inotify_data.user = user; 571 group->inotify_data.user = user;
573 group->inotify_data.fa = NULL; 572 group->inotify_data.fa = NULL;
574 573
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 521368574e97..3816d5750dd5 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -153,6 +153,10 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
153 return true; 153 return true;
154 break; 154 break;
155 case (FSNOTIFY_EVENT_NONE): 155 case (FSNOTIFY_EVENT_NONE):
156 if (old->mask & FS_Q_OVERFLOW)
157 return true;
158 else if (old->mask & FS_IN_IGNORED)
159 return false;
156 return false; 160 return false;
157 }; 161 };
158 } 162 }
@@ -171,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
171 struct list_head *list = &group->notification_list; 175 struct list_head *list = &group->notification_list;
172 struct fsnotify_event_holder *last_holder; 176 struct fsnotify_event_holder *last_holder;
173 struct fsnotify_event *last_event; 177 struct fsnotify_event *last_event;
174 178 int ret = 0;
175 /* easy to tell if priv was attached to the event */
176 INIT_LIST_HEAD(&priv->event_list);
177 179
178 /* 180 /*
179 * There is one fsnotify_event_holder embedded inside each fsnotify_event. 181 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -194,6 +196,7 @@ alloc_holder:
194 196
195 if (group->q_len >= group->max_events) { 197 if (group->q_len >= group->max_events) {
196 event = &q_overflow_event; 198 event = &q_overflow_event;
199 ret = -EOVERFLOW;
197 /* sorry, no private data on the overflow event */ 200 /* sorry, no private data on the overflow event */
198 priv = NULL; 201 priv = NULL;
199 } 202 }
@@ -235,7 +238,7 @@ alloc_holder:
235 mutex_unlock(&group->notification_mutex); 238 mutex_unlock(&group->notification_mutex);
236 239
237 wake_up(&group->notification_waitq); 240 wake_up(&group->notification_waitq);
238 return 0; 241 return ret;
239} 242}
240 243
241/* 244/*
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9edcde4974aa..f9a3e8942669 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1914 * immediately to their right. 1914 * immediately to their right.
1915 */ 1915 */
1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
1917 if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { 1917 if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
1918 BUG_ON(right_child_el->l_tree_depth);
1918 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); 1919 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
1919 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); 1920 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
1920 } 1921 }
@@ -2476,15 +2477,37 @@ out_ret_path:
2476 return ret; 2477 return ret;
2477} 2478}
2478 2479
2479static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2480static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2480 struct ocfs2_path *path) 2481 int subtree_index, struct ocfs2_path *path)
2481{ 2482{
2482 int i, idx; 2483 int i, idx, ret;
2483 struct ocfs2_extent_rec *rec; 2484 struct ocfs2_extent_rec *rec;
2484 struct ocfs2_extent_list *el; 2485 struct ocfs2_extent_list *el;
2485 struct ocfs2_extent_block *eb; 2486 struct ocfs2_extent_block *eb;
2486 u32 range; 2487 u32 range;
2487 2488
2489 /*
2490 * In normal tree rotation process, we will never touch the
2491 * tree branch above subtree_index and ocfs2_extend_rotate_transaction
2492 * doesn't reserve the credits for them either.
2493 *
2494 * But we do have a special case here which will update the rightmost
2495 * records for all the bh in the path.
2496 * So we have to allocate extra credits and access them.
2497 */
2498 ret = ocfs2_extend_trans(handle,
2499 handle->h_buffer_credits + subtree_index);
2500 if (ret) {
2501 mlog_errno(ret);
2502 goto out;
2503 }
2504
2505 ret = ocfs2_journal_access_path(inode, handle, path);
2506 if (ret) {
2507 mlog_errno(ret);
2508 goto out;
2509 }
2510
2488 /* Path should always be rightmost. */ 2511 /* Path should always be rightmost. */
2489 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 2512 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
2490 BUG_ON(eb->h_next_leaf_blk != 0ULL); 2513 BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2505 2528
2506 ocfs2_journal_dirty(handle, path->p_node[i].bh); 2529 ocfs2_journal_dirty(handle, path->p_node[i].bh);
2507 } 2530 }
2531out:
2532 return ret;
2508} 2533}
2509 2534
2510static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2535static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2717 if (del_right_subtree) { 2742 if (del_right_subtree) {
2718 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2743 ocfs2_unlink_subtree(inode, handle, left_path, right_path,
2719 subtree_index, dealloc); 2744 subtree_index, dealloc);
2720 ocfs2_update_edge_lengths(inode, handle, left_path); 2745 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
2746 left_path);
2747 if (ret) {
2748 mlog_errno(ret);
2749 goto out;
2750 }
2721 2751
2722 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2752 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2723 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 2753 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3034 3064
3035 ocfs2_unlink_subtree(inode, handle, left_path, path, 3065 ocfs2_unlink_subtree(inode, handle, left_path, path,
3036 subtree_index, dealloc); 3066 subtree_index, dealloc);
3037 ocfs2_update_edge_lengths(inode, handle, left_path); 3067 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
3068 left_path);
3069 if (ret) {
3070 mlog_errno(ret);
3071 goto out;
3072 }
3038 3073
3039 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 3074 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
3040 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 3075 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b2c52b3a1484..b401654011a2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
193 (unsigned long long)OCFS2_I(inode)->ip_blkno); 193 (unsigned long long)OCFS2_I(inode)->ip_blkno);
194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); 194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
195 dump_stack(); 195 dump_stack();
196 goto bail;
196 } 197 }
197 198
198 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 199 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
894 */ 895 */
895 unsigned c_new; 896 unsigned c_new;
896 unsigned c_unwritten; 897 unsigned c_unwritten;
898 unsigned c_needs_zero;
897}; 899};
898 900
899static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
900{
901 return d->c_new || d->c_unwritten;
902}
903
904struct ocfs2_write_ctxt { 901struct ocfs2_write_ctxt {
905 /* Logical cluster position / len of write */ 902 /* Logical cluster position / len of write */
906 u32 w_cpos; 903 u32 w_cpos;
907 u32 w_clen; 904 u32 w_clen;
908 905
906 /* First cluster allocated in a nonsparse extend */
907 u32 w_first_new_cpos;
908
909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; 909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
910 910
911 /* 911 /*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
983 return -ENOMEM; 983 return -ENOMEM;
984 984
985 wc->w_cpos = pos >> osb->s_clustersize_bits; 985 wc->w_cpos = pos >> osb->s_clustersize_bits;
986 wc->w_first_new_cpos = UINT_MAX;
986 cend = (pos + len - 1) >> osb->s_clustersize_bits; 987 cend = (pos + len - 1) >> osb->s_clustersize_bits;
987 wc->w_clen = cend - wc->w_cpos + 1; 988 wc->w_clen = cend - wc->w_cpos + 1;
988 get_bh(di_bh); 989 get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
1217 */ 1218 */
1218static int ocfs2_write_cluster(struct address_space *mapping, 1219static int ocfs2_write_cluster(struct address_space *mapping,
1219 u32 phys, unsigned int unwritten, 1220 u32 phys, unsigned int unwritten,
1221 unsigned int should_zero,
1220 struct ocfs2_alloc_context *data_ac, 1222 struct ocfs2_alloc_context *data_ac,
1221 struct ocfs2_alloc_context *meta_ac, 1223 struct ocfs2_alloc_context *meta_ac,
1222 struct ocfs2_write_ctxt *wc, u32 cpos, 1224 struct ocfs2_write_ctxt *wc, u32 cpos,
1223 loff_t user_pos, unsigned user_len) 1225 loff_t user_pos, unsigned user_len)
1224{ 1226{
1225 int ret, i, new, should_zero = 0; 1227 int ret, i, new;
1226 u64 v_blkno, p_blkno; 1228 u64 v_blkno, p_blkno;
1227 struct inode *inode = mapping->host; 1229 struct inode *inode = mapping->host;
1228 struct ocfs2_extent_tree et; 1230 struct ocfs2_extent_tree et;
1229 1231
1230 new = phys == 0 ? 1 : 0; 1232 new = phys == 0 ? 1 : 0;
1231 if (new || unwritten)
1232 should_zero = 1;
1233
1234 if (new) { 1233 if (new) {
1235 u32 tmp_pos; 1234 u32 tmp_pos;
1236 1235
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1301 if (tmpret) { 1300 if (tmpret) {
1302 mlog_errno(tmpret); 1301 mlog_errno(tmpret);
1303 if (ret == 0) 1302 if (ret == 0)
1304 tmpret = ret; 1303 ret = tmpret;
1305 } 1304 }
1306 } 1305 }
1307 1306
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1341 local_len = osb->s_clustersize - cluster_off; 1340 local_len = osb->s_clustersize - cluster_off;
1342 1341
1343 ret = ocfs2_write_cluster(mapping, desc->c_phys, 1342 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1344 desc->c_unwritten, data_ac, meta_ac, 1343 desc->c_unwritten,
1344 desc->c_needs_zero,
1345 data_ac, meta_ac,
1345 wc, desc->c_cpos, pos, local_len); 1346 wc, desc->c_cpos, pos, local_len);
1346 if (ret) { 1347 if (ret) {
1347 mlog_errno(ret); 1348 mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1391 * newly allocated cluster. 1392 * newly allocated cluster.
1392 */ 1393 */
1393 desc = &wc->w_desc[0]; 1394 desc = &wc->w_desc[0];
1394 if (ocfs2_should_zero_cluster(desc)) 1395 if (desc->c_needs_zero)
1395 ocfs2_figure_cluster_boundaries(osb, 1396 ocfs2_figure_cluster_boundaries(osb,
1396 desc->c_cpos, 1397 desc->c_cpos,
1397 &wc->w_target_from, 1398 &wc->w_target_from,
1398 NULL); 1399 NULL);
1399 1400
1400 desc = &wc->w_desc[wc->w_clen - 1]; 1401 desc = &wc->w_desc[wc->w_clen - 1];
1401 if (ocfs2_should_zero_cluster(desc)) 1402 if (desc->c_needs_zero)
1402 ocfs2_figure_cluster_boundaries(osb, 1403 ocfs2_figure_cluster_boundaries(osb,
1403 desc->c_cpos, 1404 desc->c_cpos,
1404 NULL, 1405 NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1466 phys++; 1467 phys++;
1467 } 1468 }
1468 1469
1470 /*
1471 * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
1472 * file that got extended. w_first_new_cpos tells us
1473 * where the newly allocated clusters are so we can
1474 * zero them.
1475 */
1476 if (desc->c_cpos >= wc->w_first_new_cpos) {
1477 BUG_ON(phys == 0);
1478 desc->c_needs_zero = 1;
1479 }
1480
1469 desc->c_phys = phys; 1481 desc->c_phys = phys;
1470 if (phys == 0) { 1482 if (phys == 0) {
1471 desc->c_new = 1; 1483 desc->c_new = 1;
1484 desc->c_needs_zero = 1;
1472 *clusters_to_alloc = *clusters_to_alloc + 1; 1485 *clusters_to_alloc = *clusters_to_alloc + 1;
1473 } 1486 }
1474 if (ext_flags & OCFS2_EXT_UNWRITTEN) 1487
1488 if (ext_flags & OCFS2_EXT_UNWRITTEN) {
1475 desc->c_unwritten = 1; 1489 desc->c_unwritten = 1;
1490 desc->c_needs_zero = 1;
1491 }
1476 1492
1477 num_clusters--; 1493 num_clusters--;
1478 } 1494 }
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1632 if (newsize <= i_size_read(inode)) 1648 if (newsize <= i_size_read(inode))
1633 return 0; 1649 return 0;
1634 1650
1635 ret = ocfs2_extend_no_holes(inode, newsize, newsize - len); 1651 ret = ocfs2_extend_no_holes(inode, newsize, pos);
1636 if (ret) 1652 if (ret)
1637 mlog_errno(ret); 1653 mlog_errno(ret);
1638 1654
1655 wc->w_first_new_cpos =
1656 ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
1657
1639 return ret; 1658 return ret;
1640} 1659}
1641 1660
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1644 struct page **pagep, void **fsdata, 1663 struct page **pagep, void **fsdata,
1645 struct buffer_head *di_bh, struct page *mmap_page) 1664 struct buffer_head *di_bh, struct page *mmap_page)
1646{ 1665{
1647 int ret, credits = OCFS2_INODE_UPDATE_CREDITS; 1666 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
1648 unsigned int clusters_to_alloc, extents_to_split; 1667 unsigned int clusters_to_alloc, extents_to_split;
1649 struct ocfs2_write_ctxt *wc; 1668 struct ocfs2_write_ctxt *wc;
1650 struct inode *inode = mapping->host; 1669 struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1722 1741
1723 } 1742 }
1724 1743
1725 ocfs2_set_target_boundaries(osb, wc, pos, len, 1744 /*
1726 clusters_to_alloc + extents_to_split); 1745 * We have to zero sparse allocated clusters, unwritten extent clusters,
1746 * and non-sparse clusters we just extended. For non-sparse writes,
1747 * we know zeros will only be needed in the first and/or last cluster.
1748 */
1749 if (clusters_to_alloc || extents_to_split ||
1750 wc->w_desc[0].c_needs_zero ||
1751 wc->w_desc[wc->w_clen - 1].c_needs_zero)
1752 cluster_of_pages = 1;
1753 else
1754 cluster_of_pages = 0;
1755
1756 ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
1727 1757
1728 handle = ocfs2_start_trans(osb, credits); 1758 handle = ocfs2_start_trans(osb, credits);
1729 if (IS_ERR(handle)) { 1759 if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1756 * extent. 1786 * extent.
1757 */ 1787 */
1758 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1788 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1759 clusters_to_alloc + extents_to_split, 1789 cluster_of_pages, mmap_page);
1760 mmap_page);
1761 if (ret) { 1790 if (ret) {
1762 mlog_errno(ret); 1791 mlog_errno(ret);
1763 goto out_quota; 1792 goto out_quota;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b574431a031d..2f28b7de2c8d 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -310,22 +310,19 @@ out_attach:
310 return ret; 310 return ret;
311} 311}
312 312
313static DEFINE_SPINLOCK(dentry_list_lock); 313DEFINE_SPINLOCK(dentry_list_lock);
314 314
315/* We limit the number of dentry locks to drop in one go. We have 315/* We limit the number of dentry locks to drop in one go. We have
316 * this limit so that we don't starve other users of ocfs2_wq. */ 316 * this limit so that we don't starve other users of ocfs2_wq. */
317#define DL_INODE_DROP_COUNT 64 317#define DL_INODE_DROP_COUNT 64
318 318
319/* Drop inode references from dentry locks */ 319/* Drop inode references from dentry locks */
320void ocfs2_drop_dl_inodes(struct work_struct *work) 320static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
321{ 321{
322 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
323 dentry_lock_work);
324 struct ocfs2_dentry_lock *dl; 322 struct ocfs2_dentry_lock *dl;
325 int drop_count = DL_INODE_DROP_COUNT;
326 323
327 spin_lock(&dentry_list_lock); 324 spin_lock(&dentry_list_lock);
328 while (osb->dentry_lock_list && drop_count--) { 325 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
329 dl = osb->dentry_lock_list; 326 dl = osb->dentry_lock_list;
330 osb->dentry_lock_list = dl->dl_next; 327 osb->dentry_lock_list = dl->dl_next;
331 spin_unlock(&dentry_list_lock); 328 spin_unlock(&dentry_list_lock);
@@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
333 kfree(dl); 330 kfree(dl);
334 spin_lock(&dentry_list_lock); 331 spin_lock(&dentry_list_lock);
335 } 332 }
336 if (osb->dentry_lock_list) 333 spin_unlock(&dentry_list_lock);
334}
335
336void ocfs2_drop_dl_inodes(struct work_struct *work)
337{
338 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
339 dentry_lock_work);
340
341 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
342 /*
343 * Don't queue dropping if umount is in progress. We flush the
344 * list in ocfs2_dismount_volume
345 */
346 spin_lock(&dentry_list_lock);
347 if (osb->dentry_lock_list &&
348 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
337 queue_work(ocfs2_wq, &osb->dentry_lock_work); 349 queue_work(ocfs2_wq, &osb->dentry_lock_work);
338 spin_unlock(&dentry_list_lock); 350 spin_unlock(&dentry_list_lock);
339} 351}
340 352
353/* Flush the whole work queue */
354void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
355{
356 __ocfs2_drop_dl_inodes(osb, -1);
357}
358
341/* 359/*
342 * ocfs2_dentry_iput() and friends. 360 * ocfs2_dentry_iput() and friends.
343 * 361 *
@@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
368 /* We leave dropping of inode reference to ocfs2_wq as that can 386 /* We leave dropping of inode reference to ocfs2_wq as that can
369 * possibly lead to inode deletion which gets tricky */ 387 * possibly lead to inode deletion which gets tricky */
370 spin_lock(&dentry_list_lock); 388 spin_lock(&dentry_list_lock);
371 if (!osb->dentry_lock_list) 389 if (!osb->dentry_lock_list &&
390 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
372 queue_work(ocfs2_wq, &osb->dentry_lock_work); 391 queue_work(ocfs2_wq, &osb->dentry_lock_work);
373 dl->dl_next = osb->dentry_lock_list; 392 dl->dl_next = osb->dentry_lock_list;
374 osb->dentry_lock_list = dl; 393 osb->dentry_lock_list = dl;
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index faa12e75f98d..f5dd1789acf1 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, 49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
50 u64 parent_blkno); 50 u64 parent_blkno);
51 51
52extern spinlock_t dentry_list_lock;
53
52void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 54void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
53 struct ocfs2_dentry_lock *dl); 55 struct ocfs2_dentry_lock *dl);
54 56
55void ocfs2_drop_dl_inodes(struct work_struct *work); 57void ocfs2_drop_dl_inodes(struct work_struct *work);
58void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
56 59
57struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, 60struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
58 int skip_unhashed); 61 int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index d07ddbe4b283..81eff8e58322 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
103 lock->ast_pending, lock->ml.type); 103 lock->ast_pending, lock->ml.type);
104 BUG(); 104 BUG();
105 } 105 }
106 BUG_ON(!list_empty(&lock->ast_list));
107 if (lock->ast_pending) 106 if (lock->ast_pending)
108 mlog(0, "lock has an ast getting flushed right now\n"); 107 mlog(0, "lock has an ast getting flushed right now\n");
109 108
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index bcb9260c3735..43e6e3280569 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1118 1118
1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", 1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
1120 dlm->name, res->lockname.len, res->lockname.name, 1120 dlm->name, res->lockname.len, res->lockname.name,
1121 orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", 1121 orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
1122 send_to); 1122 send_to);
1123 1123
1124 /* send it */ 1124 /* send it */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 62442e413a00..aa501d3f93f1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1851,6 +1851,7 @@ relock:
1851 if (ret) 1851 if (ret)
1852 goto out_dio; 1852 goto out_dio;
1853 1853
1854 count = ocount;
1854 ret = generic_write_checks(file, ppos, &count, 1855 ret = generic_write_checks(file, ppos, &count,
1855 S_ISBLK(inode->i_mode)); 1856 S_ISBLK(inode->i_mode));
1856 if (ret) 1857 if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
1918 1919
1919 mutex_unlock(&inode->i_mutex); 1920 mutex_unlock(&inode->i_mutex);
1920 1921
1922 if (written)
1923 ret = written;
1921 mlog_exit(ret); 1924 mlog_exit(ret);
1922 return written ? written : ret; 1925 return ret;
1923} 1926}
1924 1927
1925static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, 1928static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f033760ecbea..c48b93ac6b65 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1954 os->os_osb = osb; 1954 os->os_osb = osb;
1955 os->os_count = 0; 1955 os->os_count = 0;
1956 os->os_seqno = 0; 1956 os->os_seqno = 0;
1957 os->os_scantime = CURRENT_TIME;
1958 mutex_init(&os->os_lock); 1957 mutex_init(&os->os_lock);
1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); 1958 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1959}
1960 1960
1961void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1962{
1963 struct ocfs2_orphan_scan *os;
1964
1965 os = &osb->osb_orphan_scan;
1966 os->os_scantime = CURRENT_TIME;
1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 1967 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1968 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1963 else { 1969 else {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 5432c7f79cc6..2c3222aec622 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147void ocfs2_orphan_scan_init(struct ocfs2_super *osb); 147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); 149void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); 150void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150 151
@@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle,
329/* extended attribute block update */ 330/* extended attribute block update */
330#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 331#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
331 332
333/* Update of a single quota block */
334#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
335
332/* global quotafile inode update, data block */ 336/* global quotafile inode update, data block */
333#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 337#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
338 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
334 339
340#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
335/* 341/*
336 * The two writes below can accidentally see global info dirty due 342 * The two writes below can accidentally see global info dirty due
337 * to set_info() quotactl so make them prepared for the writes. 343 * to set_info() quotactl so make them prepared for the writes.
338 */ 344 */
339/* quota data block, global info */ 345/* quota data block, global info */
340/* Write to local quota file */ 346/* Write to local quota file */
341#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) 347#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
348 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
342 349
343/* global quota data block, local quota data block, global quota inode, 350/* global quota data block, local quota data block, global quota inode,
344 * global quota info */ 351 * global quota info */
345#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) 352#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
353 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
346 354
347static inline int ocfs2_quota_trans_credits(struct super_block *sb) 355static inline int ocfs2_quota_trans_credits(struct super_block *sb)
348{ 356{
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
355 return credits; 363 return credits;
356} 364}
357 365
358/* Number of credits needed for removing quota structure from file */
359int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
360/* Number of credits needed for initialization of new quota structure */
361int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
362
363/* group extend. inode update and last group update. */ 366/* group extend. inode update and last group update. */
364#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 367#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
365 368
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9345ebb8493..39e1d5a39505 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
225}; 225};
226 226
227#define OCFS2_OSB_SOFT_RO 0x0001 227#define OCFS2_OSB_SOFT_RO 0x0001
228#define OCFS2_OSB_HARD_RO 0x0002 228#define OCFS2_OSB_HARD_RO 0x0002
229#define OCFS2_OSB_ERROR_FS 0x0004 229#define OCFS2_OSB_ERROR_FS 0x0004
230#define OCFS2_DEFAULT_ATIME_QUANTUM 60 230#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
231
232#define OCFS2_DEFAULT_ATIME_QUANTUM 60
231 233
232struct ocfs2_journal; 234struct ocfs2_journal;
233struct ocfs2_slot_info; 235struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
490 spin_unlock(&osb->osb_lock); 492 spin_unlock(&osb->osb_lock);
491} 493}
492 494
495
496static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
497 unsigned long flag)
498{
499 unsigned long ret;
500
501 spin_lock(&osb->osb_lock);
502 ret = osb->osb_flags & flag;
503 spin_unlock(&osb->osb_lock);
504 return ret;
505}
506
493static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 507static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
494 int hard) 508 int hard)
495{ 509{
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 7365e2e08706..3fb96fcd4c81 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
50 unsigned int dqi_chunks; /* Number of chunks in local quota file */ 50 unsigned int dqi_chunks; /* Number of chunks in local quota file */
51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ 51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
52 unsigned int dqi_syncms; /* How often should we sync with other nodes */ 52 unsigned int dqi_syncms; /* How often should we sync with other nodes */
53 unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
54 struct list_head dqi_chunk; /* List of chunks */ 53 struct list_head dqi_chunk; /* List of chunks */
55 struct inode *dqi_gqinode; /* Global quota file inode */ 54 struct inode *dqi_gqinode; /* Global quota file inode */
56 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ 55 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index edfa60cd155c..bf7742d0ee3b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace); 69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
70 d->dqb_btime = cpu_to_le64(m->dqb_btime); 70 d->dqb_btime = cpu_to_le64(m->dqb_btime);
71 d->dqb_itime = cpu_to_le64(m->dqb_itime); 71 d->dqb_itime = cpu_to_le64(m->dqb_itime);
72 d->dqb_pad1 = d->dqb_pad2 = 0;
72} 73}
73 74
74static int ocfs2_global_is_id(void *dp, struct dquot *dquot) 75static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
211 212
212 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); 213 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
213 if (gqinode->i_size < off + len) { 214 if (gqinode->i_size < off + len) {
214 down_write(&OCFS2_I(gqinode)->ip_alloc_sem); 215 loff_t rounded_end =
215 err = ocfs2_extend_no_holes(gqinode, off + len, off); 216 ocfs2_align_bytes_to_blocks(sb, off + len);
216 up_write(&OCFS2_I(gqinode)->ip_alloc_sem); 217
217 if (err < 0) 218 /* Space is already allocated in ocfs2_global_read_dquot() */
218 goto out;
219 err = ocfs2_simple_size_update(gqinode, 219 err = ocfs2_simple_size_update(gqinode,
220 oinfo->dqi_gqi_bh, 220 oinfo->dqi_gqi_bh,
221 off + len); 221 rounded_end);
222 if (err < 0) 222 if (err < 0)
223 goto out; 223 goto out;
224 new = 1; 224 new = 1;
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 } 234 }
235 if (err) { 235 if (err) {
236 mlog_errno(err); 236 mlog_errno(err);
237 return err; 237 goto out;
238 } 238 }
239 lock_buffer(bh); 239 lock_buffer(bh);
240 if (new) 240 if (new)
@@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); 344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
345 oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
346 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); 345 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
347 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); 346 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
348 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); 347 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
352 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 351 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
353 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 352 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
354 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 353 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
355 oinfo->dqi_syncjiff); 354 msecs_to_jiffies(oinfo->dqi_syncms));
356 355
357out_err: 356out_err:
358 mlog_exit(status); 357 mlog_exit(status);
@@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
402 return err; 401 return err;
403} 402}
404 403
404static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
405{
406 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
407
408 /*
409 * We may need to allocate tree blocks and a leaf block but not the
410 * root block
411 */
412 return oinfo->dqi_gi.dqi_qtree_depth;
413}
414
415static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
416{
417 /* We modify all the allocated blocks, tree root, and info block */
418 return (ocfs2_global_qinit_alloc(sb, type) + 2) *
419 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
420}
421
405/* Read in information from global quota file and acquire a reference to it. 422/* Read in information from global quota file and acquire a reference to it.
406 * dquot_acquire() has already started the transaction and locked quota file */ 423 * dquot_acquire() has already started the transaction and locked quota file */
407int ocfs2_global_read_dquot(struct dquot *dquot) 424int ocfs2_global_read_dquot(struct dquot *dquot)
408{ 425{
409 int err, err2, ex = 0; 426 int err, err2, ex = 0;
410 struct ocfs2_mem_dqinfo *info = 427 struct super_block *sb = dquot->dq_sb;
411 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 428 int type = dquot->dq_type;
429 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
430 struct ocfs2_super *osb = OCFS2_SB(sb);
431 struct inode *gqinode = info->dqi_gqinode;
432 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
433 handle_t *handle = NULL;
412 434
413 err = ocfs2_qinfo_lock(info, 0); 435 err = ocfs2_qinfo_lock(info, 0);
414 if (err < 0) 436 if (err < 0)
@@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
419 OCFS2_DQUOT(dquot)->dq_use_count++; 441 OCFS2_DQUOT(dquot)->dq_use_count++;
420 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; 442 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; 443 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
444 ocfs2_qinfo_unlock(info, 0);
445
422 if (!dquot->dq_off) { /* No real quota entry? */ 446 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */
424 ocfs2_qinfo_unlock(info, 0);
425 err = ocfs2_qinfo_lock(info, 1);
426 if (err < 0)
427 goto out_qlock;
428 ex = 1; 447 ex = 1;
448 /*
449 * Add blocks to quota file before we start a transaction since
450 * locking allocators ranks above a transaction start
451 */
452 WARN_ON(journal_current_handle());
453 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
454 err = ocfs2_extend_no_holes(gqinode,
455 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
456 gqinode->i_size);
457 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
458 if (err < 0)
459 goto out;
429 } 460 }
461
462 handle = ocfs2_start_trans(osb,
463 ocfs2_calc_global_qinit_credits(sb, type));
464 if (IS_ERR(handle)) {
465 err = PTR_ERR(handle);
466 goto out;
467 }
468 err = ocfs2_qinfo_lock(info, ex);
469 if (err < 0)
470 goto out_trans;
430 err = qtree_write_dquot(&info->dqi_gi, dquot); 471 err = qtree_write_dquot(&info->dqi_gi, dquot);
431 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { 472 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
432 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); 473 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +479,9 @@ out_qlock:
438 ocfs2_qinfo_unlock(info, 1); 479 ocfs2_qinfo_unlock(info, 1);
439 else 480 else
440 ocfs2_qinfo_unlock(info, 0); 481 ocfs2_qinfo_unlock(info, 0);
482out_trans:
483 if (handle)
484 ocfs2_commit_trans(osb, handle);
441out: 485out:
442 if (err < 0) 486 if (err < 0)
443 mlog_errno(err); 487 mlog_errno(err);
@@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
607 651
608 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 652 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
609 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 653 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
610 oinfo->dqi_syncjiff); 654 msecs_to_jiffies(oinfo->dqi_syncms));
611} 655}
612 656
613/* 657/*
@@ -635,20 +679,18 @@ out:
635 return status; 679 return status;
636} 680}
637 681
638int ocfs2_calc_qdel_credits(struct super_block *sb, int type) 682static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
639{ 683{
640 struct ocfs2_mem_dqinfo *oinfo; 684 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
641 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, 685 /*
642 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; 686 * We modify tree, leaf block, global info, local chunk header,
643 687 * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
644 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) 688 * accounts for inode update
645 return 0; 689 */
646 690 return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
647 oinfo = sb_dqinfo(sb, type)->dqi_priv; 691 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
648 /* We modify tree, leaf block, global info, local chunk header, 692 OCFS2_QINFO_WRITE_CREDITS +
649 * global and local inode */ 693 OCFS2_INODE_UPDATE_CREDITS;
650 return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
651 2 * OCFS2_INODE_UPDATE_CREDITS;
652} 694}
653 695
654static int ocfs2_release_dquot(struct dquot *dquot) 696static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +722,10 @@ out:
680 return status; 722 return status;
681} 723}
682 724
683int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
684{
685 struct ocfs2_mem_dqinfo *oinfo;
686 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
687 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
688 struct ocfs2_dinode *lfe, *gfe;
689
690 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
691 return 0;
692
693 oinfo = sb_dqinfo(sb, type)->dqi_priv;
694 gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
695 lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
696 /* We can extend local file + global file. In local file we
697 * can modify info, chunk header block and dquot block. In
698 * global file we can modify info, tree and leaf block */
699 return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
700 ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
701 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
702}
703
704static int ocfs2_acquire_dquot(struct dquot *dquot) 725static int ocfs2_acquire_dquot(struct dquot *dquot)
705{ 726{
706 handle_t *handle;
707 struct ocfs2_mem_dqinfo *oinfo = 727 struct ocfs2_mem_dqinfo *oinfo =
708 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 728 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
709 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
710 int status = 0; 729 int status = 0;
711 730
712 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 731 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
715 status = ocfs2_lock_global_qf(oinfo, 1); 734 status = ocfs2_lock_global_qf(oinfo, 1);
716 if (status < 0) 735 if (status < 0)
717 goto out; 736 goto out;
718 handle = ocfs2_start_trans(osb,
719 ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
720 if (IS_ERR(handle)) {
721 status = PTR_ERR(handle);
722 mlog_errno(status);
723 goto out_ilock;
724 }
725 status = dquot_acquire(dquot); 737 status = dquot_acquire(dquot);
726 ocfs2_commit_trans(osb, handle);
727out_ilock:
728 ocfs2_unlock_global_qf(oinfo, 1); 738 ocfs2_unlock_global_qf(oinfo, 1);
729out: 739out:
730 mlog_exit(status); 740 mlog_exit(status);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 5a460fa82553..bdb09cb6e1fe 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -20,6 +20,7 @@
20#include "sysfile.h" 20#include "sysfile.h"
21#include "dlmglue.h" 21#include "dlmglue.h"
22#include "quota.h" 22#include "quota.h"
23#include "uptodate.h"
23 24
24/* Number of local quota structures per block */ 25/* Number of local quota structures per block */
25static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 26static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
100 handle_t *handle; 101 handle_t *handle;
101 int status; 102 int status;
102 103
103 handle = ocfs2_start_trans(OCFS2_SB(sb), 1); 104 handle = ocfs2_start_trans(OCFS2_SB(sb),
105 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
104 if (IS_ERR(handle)) { 106 if (IS_ERR(handle)) {
105 status = PTR_ERR(handle); 107 status = PTR_ERR(handle);
106 mlog_errno(status); 108 mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
610 goto out_bh; 612 goto out_bh;
611 /* Mark quota file as clean if we are recovering quota file of 613 /* Mark quota file as clean if we are recovering quota file of
612 * some other node. */ 614 * some other node. */
613 handle = ocfs2_start_trans(osb, 1); 615 handle = ocfs2_start_trans(osb,
616 OCFS2_LOCAL_QINFO_WRITE_CREDITS);
614 if (IS_ERR(handle)) { 617 if (IS_ERR(handle)) {
615 status = PTR_ERR(handle); 618 status = PTR_ERR(handle);
616 mlog_errno(status); 619 mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
940 struct ocfs2_local_disk_chunk *dchunk; 943 struct ocfs2_local_disk_chunk *dchunk;
941 int status; 944 int status;
942 handle_t *handle; 945 handle_t *handle;
943 struct buffer_head *bh = NULL; 946 struct buffer_head *bh = NULL, *dbh = NULL;
944 u64 p_blkno; 947 u64 p_blkno;
945 948
946 /* We are protected by dqio_sem so no locking needed */ 949 /* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
964 mlog_errno(status); 967 mlog_errno(status);
965 goto out; 968 goto out;
966 } 969 }
970 /* Local quota info and two new blocks we initialize */
971 handle = ocfs2_start_trans(OCFS2_SB(sb),
972 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
973 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
974 if (IS_ERR(handle)) {
975 status = PTR_ERR(handle);
976 mlog_errno(status);
977 goto out;
978 }
967 979
980 /* Initialize chunk header */
968 down_read(&OCFS2_I(lqinode)->ip_alloc_sem); 981 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
969 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 982 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
970 &p_blkno, NULL, NULL); 983 &p_blkno, NULL, NULL);
971 up_read(&OCFS2_I(lqinode)->ip_alloc_sem); 984 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
972 if (status < 0) { 985 if (status < 0) {
973 mlog_errno(status); 986 mlog_errno(status);
974 goto out; 987 goto out_trans;
975 } 988 }
976 bh = sb_getblk(sb, p_blkno); 989 bh = sb_getblk(sb, p_blkno);
977 if (!bh) { 990 if (!bh) {
978 status = -ENOMEM; 991 status = -ENOMEM;
979 mlog_errno(status); 992 mlog_errno(status);
980 goto out; 993 goto out_trans;
981 } 994 }
982 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; 995 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
983 996 ocfs2_set_new_buffer_uptodate(lqinode, bh);
984 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
985 if (IS_ERR(handle)) {
986 status = PTR_ERR(handle);
987 mlog_errno(status);
988 goto out;
989 }
990
991 status = ocfs2_journal_access_dq(handle, lqinode, bh, 997 status = ocfs2_journal_access_dq(handle, lqinode, bh,
992 OCFS2_JOURNAL_ACCESS_WRITE); 998 OCFS2_JOURNAL_ACCESS_CREATE);
993 if (status < 0) { 999 if (status < 0) {
994 mlog_errno(status); 1000 mlog_errno(status);
995 goto out_trans; 1001 goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
999 memset(dchunk->dqc_bitmap, 0, 1005 memset(dchunk->dqc_bitmap, 0,
1000 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1006 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1001 OCFS2_QBLK_RESERVED_SPACE); 1007 OCFS2_QBLK_RESERVED_SPACE);
1002 set_buffer_uptodate(bh);
1003 unlock_buffer(bh); 1008 unlock_buffer(bh);
1004 status = ocfs2_journal_dirty(handle, bh); 1009 status = ocfs2_journal_dirty(handle, bh);
1005 if (status < 0) { 1010 if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1007 goto out_trans; 1012 goto out_trans;
1008 } 1013 }
1009 1014
1015 /* Initialize new block with structures */
1016 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1017 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
1018 &p_blkno, NULL, NULL);
1019 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1020 if (status < 0) {
1021 mlog_errno(status);
1022 goto out_trans;
1023 }
1024 dbh = sb_getblk(sb, p_blkno);
1025 if (!dbh) {
1026 status = -ENOMEM;
1027 mlog_errno(status);
1028 goto out_trans;
1029 }
1030 ocfs2_set_new_buffer_uptodate(lqinode, dbh);
1031 status = ocfs2_journal_access_dq(handle, lqinode, dbh,
1032 OCFS2_JOURNAL_ACCESS_CREATE);
1033 if (status < 0) {
1034 mlog_errno(status);
1035 goto out_trans;
1036 }
1037 lock_buffer(dbh);
1038 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1039 unlock_buffer(dbh);
1040 status = ocfs2_journal_dirty(handle, dbh);
1041 if (status < 0) {
1042 mlog_errno(status);
1043 goto out_trans;
1044 }
1045
1046 /* Update local quotafile info */
1010 oinfo->dqi_blocks += 2; 1047 oinfo->dqi_blocks += 2;
1011 oinfo->dqi_chunks++; 1048 oinfo->dqi_chunks++;
1012 status = ocfs2_local_write_info(sb, type); 1049 status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
1031 ocfs2_commit_trans(OCFS2_SB(sb), handle); 1068 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1032out: 1069out:
1033 brelse(bh); 1070 brelse(bh);
1071 brelse(dbh);
1034 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); 1072 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
1035 return ERR_PTR(status); 1073 return ERR_PTR(status);
1036} 1074}
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1048 struct ocfs2_local_disk_chunk *dchunk; 1086 struct ocfs2_local_disk_chunk *dchunk;
1049 int epb = ol_quota_entries_per_block(sb); 1087 int epb = ol_quota_entries_per_block(sb);
1050 unsigned int chunk_blocks; 1088 unsigned int chunk_blocks;
1089 struct buffer_head *bh;
1090 u64 p_blkno;
1051 int status; 1091 int status;
1052 handle_t *handle; 1092 handle_t *handle;
1053 1093
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1075 mlog_errno(status); 1115 mlog_errno(status);
1076 goto out; 1116 goto out;
1077 } 1117 }
1078 handle = ocfs2_start_trans(OCFS2_SB(sb), 2); 1118
1119 /* Get buffer from the just added block */
1120 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1121 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
1122 &p_blkno, NULL, NULL);
1123 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1124 if (status < 0) {
1125 mlog_errno(status);
1126 goto out;
1127 }
1128 bh = sb_getblk(sb, p_blkno);
1129 if (!bh) {
1130 status = -ENOMEM;
1131 mlog_errno(status);
1132 goto out;
1133 }
1134 ocfs2_set_new_buffer_uptodate(lqinode, bh);
1135
1136 /* Local quota info, chunk header and the new block we initialize */
1137 handle = ocfs2_start_trans(OCFS2_SB(sb),
1138 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
1139 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
1079 if (IS_ERR(handle)) { 1140 if (IS_ERR(handle)) {
1080 status = PTR_ERR(handle); 1141 status = PTR_ERR(handle);
1081 mlog_errno(status); 1142 mlog_errno(status);
1082 goto out; 1143 goto out;
1083 } 1144 }
1145 /* Zero created block */
1146 status = ocfs2_journal_access_dq(handle, lqinode, bh,
1147 OCFS2_JOURNAL_ACCESS_CREATE);
1148 if (status < 0) {
1149 mlog_errno(status);
1150 goto out_trans;
1151 }
1152 lock_buffer(bh);
1153 memset(bh->b_data, 0, sb->s_blocksize);
1154 unlock_buffer(bh);
1155 status = ocfs2_journal_dirty(handle, bh);
1156 if (status < 0) {
1157 mlog_errno(status);
1158 goto out_trans;
1159 }
1160 /* Update chunk header */
1084 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, 1161 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
1085 OCFS2_JOURNAL_ACCESS_WRITE); 1162 OCFS2_JOURNAL_ACCESS_WRITE);
1086 if (status < 0) { 1163 if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1097 mlog_errno(status); 1174 mlog_errno(status);
1098 goto out_trans; 1175 goto out_trans;
1099 } 1176 }
1177 /* Update file header */
1100 oinfo->dqi_blocks++; 1178 oinfo->dqi_blocks++;
1101 status = ocfs2_local_write_info(sb, type); 1179 status = ocfs2_local_write_info(sb, type);
1102 if (status < 0) { 1180 if (status < 0) {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 3f661376a2de..e49c41050264 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -17,6 +17,7 @@
17 * General Public License for more details. 17 * General Public License for more details.
18 */ 18 */
19 19
20#include <linux/kernel.h>
20#include <linux/crc32.h> 21#include <linux/crc32.h>
21#include <linux/module.h> 22#include <linux/module.h>
22 23
@@ -153,7 +154,7 @@ static int status_map[] = {
153 154
154static int dlm_status_to_errno(enum dlm_status status) 155static int dlm_status_to_errno(enum dlm_status status)
155{ 156{
156 BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); 157 BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
157 158
158 return status_map[status]; 159 return status_map[status];
159} 160}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7efb349fb9bd..b0ee0fdf799a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
777 } 777 }
778 di = (struct ocfs2_dinode *) (*bh)->b_data; 778 di = (struct ocfs2_dinode *) (*bh)->b_data;
779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); 779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
780 spin_lock_init(&stats->b_lock);
780 status = ocfs2_verify_volume(di, *bh, blksize, stats); 781 status = ocfs2_verify_volume(di, *bh, blksize, stats);
781 if (status >= 0) 782 if (status >= 0)
782 goto bail; 783 goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1182 wake_up(&osb->osb_mount_event); 1183 wake_up(&osb->osb_mount_event);
1183 1184
1184 /* Start this when the mount is almost sure of being successful */ 1185 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb); 1186 ocfs2_orphan_scan_start(osb);
1186 1187
1187 mlog_exit(status); 1188 mlog_exit(status);
1188 return status; 1189 return status;
@@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
1213 mnt); 1214 mnt);
1214} 1215}
1215 1216
1217static void ocfs2_kill_sb(struct super_block *sb)
1218{
1219 struct ocfs2_super *osb = OCFS2_SB(sb);
1220
1221 /* Prevent further queueing of inode drop events */
1222 spin_lock(&dentry_list_lock);
1223 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
1224 spin_unlock(&dentry_list_lock);
1225 /* Wait for work to finish and/or remove it */
1226 cancel_work_sync(&osb->dentry_lock_work);
1227
1228 kill_block_super(sb);
1229}
1230
1216static struct file_system_type ocfs2_fs_type = { 1231static struct file_system_type ocfs2_fs_type = {
1217 .owner = THIS_MODULE, 1232 .owner = THIS_MODULE,
1218 .name = "ocfs2", 1233 .name = "ocfs2",
1219 .get_sb = ocfs2_get_sb, /* is this called when we mount 1234 .get_sb = ocfs2_get_sb, /* is this called when we mount
1220 * the fs? */ 1235 * the fs? */
1221 .kill_sb = kill_block_super, /* set to the generic one 1236 .kill_sb = ocfs2_kill_sb,
1222 * right now, but do we 1237
1223 * need to change that? */
1224 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1238 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
1225 .next = NULL 1239 .next = NULL
1226}; 1240};
@@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1819 1833
1820 debugfs_remove(osb->osb_ctxt); 1834 debugfs_remove(osb->osb_ctxt);
1821 1835
1836 /*
1837 * Flush inode dropping work queue so that deletes are
1838 * performed while the filesystem is still working
1839 */
1840 ocfs2_drop_all_dl_inodes(osb);
1841
1822 /* Orphan scan should be stopped as early as possible */ 1842 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb); 1843 ocfs2_orphan_scan_stop(osb);
1824 1844
@@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1981 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 2001 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
1982 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 2002 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1983 2003
2004 ocfs2_orphan_scan_init(osb);
2005
1984 status = ocfs2_recovery_init(osb); 2006 status = ocfs2_recovery_init(osb);
1985 if (status) { 2007 if (status) {
1986 mlog(ML_ERROR, "Unable to initialize recovery state\n"); 2008 mlog(ML_ERROR, "Unable to initialize recovery state\n");
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ba320e250747..d1a27cda984f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1052 struct ocfs2_xattr_block *xb; 1052 struct ocfs2_xattr_block *xb;
1053 struct ocfs2_xattr_value_root *xv; 1053 struct ocfs2_xattr_value_root *xv;
1054 size_t size; 1054 size_t size;
1055 int ret = -ENODATA, name_offset, name_len, block_off, i; 1055 int ret = -ENODATA, name_offset, name_len, i;
1056 int uninitialized_var(block_off);
1056 1057
1057 xs->bucket = ocfs2_xattr_bucket_new(inode); 1058 xs->bucket = ocfs2_xattr_bucket_new(inode);
1058 if (!xs->bucket) { 1059 if (!xs->bucket) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3ce5ae9e3d2d..175db258942f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -234,23 +234,20 @@ static int check_mem_permission(struct task_struct *task)
234 234
235struct mm_struct *mm_for_maps(struct task_struct *task) 235struct mm_struct *mm_for_maps(struct task_struct *task)
236{ 236{
237 struct mm_struct *mm = get_task_mm(task); 237 struct mm_struct *mm;
238 if (!mm) 238
239 if (mutex_lock_killable(&task->cred_guard_mutex))
239 return NULL; 240 return NULL;
240 down_read(&mm->mmap_sem); 241
241 task_lock(task); 242 mm = get_task_mm(task);
242 if (task->mm != mm) 243 if (mm && mm != current->mm &&
243 goto out; 244 !ptrace_may_access(task, PTRACE_MODE_READ)) {
244 if (task->mm != current->mm && 245 mmput(mm);
245 __ptrace_may_access(task, PTRACE_MODE_READ) < 0) 246 mm = NULL;
246 goto out; 247 }
247 task_unlock(task); 248 mutex_unlock(&task->cred_guard_mutex);
249
248 return mm; 250 return mm;
249out:
250 task_unlock(task);
251 up_read(&mm->mmap_sem);
252 mmput(mm);
253 return NULL;
254} 251}
255 252
256static int proc_pid_cmdline(struct task_struct *task, char * buffer) 253static int proc_pid_cmdline(struct task_struct *task, char * buffer)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6f61b7cc32e0..9bd8be1d235c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
119 mm = mm_for_maps(priv->task); 119 mm = mm_for_maps(priv->task);
120 if (!mm) 120 if (!mm)
121 return NULL; 121 return NULL;
122 down_read(&mm->mmap_sem);
122 123
123 tail_vma = get_gate_vma(priv->task); 124 tail_vma = get_gate_vma(priv->task);
124 priv->tail_vma = tail_vma; 125 priv->tail_vma = tail_vma;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 64a72e2e7650..8f5c05d3dbd3 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
189 priv->task = NULL; 189 priv->task = NULL;
190 return NULL; 190 return NULL;
191 } 191 }
192 down_read(&mm->mmap_sem);
192 193
193 /* start from the Nth VMA */ 194 /* start from the Nth VMA */
194 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) 195 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
diff --git a/fs/select.c b/fs/select.c
index d870237e42c7..8084834e123e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq)
110{ 110{
111 init_poll_funcptr(&pwq->pt, __pollwait); 111 init_poll_funcptr(&pwq->pt, __pollwait);
112 pwq->polling_task = current; 112 pwq->polling_task = current;
113 pwq->triggered = 0;
113 pwq->error = 0; 114 pwq->error = 0;
114 pwq->table = NULL; 115 pwq->table = NULL;
115 pwq->inline_index = 0; 116 pwq->inline_index = 0;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 0c93c7ef3d18..965df1227d64 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -770,7 +770,7 @@ xfs_buf_associate_memory(
770 bp->b_pages = NULL; 770 bp->b_pages = NULL;
771 bp->b_addr = mem; 771 bp->b_addr = mem;
772 772
773 rval = _xfs_buf_get_pages(bp, page_count, 0); 773 rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
774 if (rval) 774 if (rval)
775 return rval; 775 return rval;
776 776
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index b619d6b8ca43..98ef624d9baf 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -708,6 +708,16 @@ xfs_reclaim_inode(
708 return 0; 708 return 0;
709} 709}
710 710
711void
712__xfs_inode_set_reclaim_tag(
713 struct xfs_perag *pag,
714 struct xfs_inode *ip)
715{
716 radix_tree_tag_set(&pag->pag_ici_root,
717 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
718 XFS_ICI_RECLAIM_TAG);
719}
720
711/* 721/*
712 * We set the inode flag atomically with the radix tree tag. 722 * We set the inode flag atomically with the radix tree tag.
713 * Once we get tag lookups on the radix tree, this inode flag 723 * Once we get tag lookups on the radix tree, this inode flag
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag(
722 732
723 read_lock(&pag->pag_ici_lock); 733 read_lock(&pag->pag_ici_lock);
724 spin_lock(&ip->i_flags_lock); 734 spin_lock(&ip->i_flags_lock);
725 radix_tree_tag_set(&pag->pag_ici_root, 735 __xfs_inode_set_reclaim_tag(pag, ip);
726 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
727 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 736 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
728 spin_unlock(&ip->i_flags_lock); 737 spin_unlock(&ip->i_flags_lock);
729 read_unlock(&pag->pag_ici_lock); 738 read_unlock(&pag->pag_ici_lock);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 2a10301c99c7..59120602588a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 49
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
51void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
51void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); 52void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
52void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, 53void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
53 struct xfs_inode *ip); 54 struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index db15feb906ff..4ece1906bd41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, 2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2013 blkcnt, XFS_BUF_LOCK, &bp); 2013 blkcnt,
2014 XFS_BUF_LOCK | XBF_DONT_BLOCK,
2015 &bp);
2014 if (error) 2016 if (error)
2015 return(error); 2017 return(error);
2016 2018
@@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2141 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 2143 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2142 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2144 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2143 2145
2144 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, 2146 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt,
2145 blkcnt, XFS_BUF_LOCK); 2147 XFS_BUF_LOCK | XBF_DONT_BLOCK);
2146 ASSERT(bp); 2148 ASSERT(bp);
2147 ASSERT(!XFS_BUF_GETERROR(bp)); 2149 ASSERT(!XFS_BUF_GETERROR(bp));
2148 2150
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 7928b9983c1d..8ee5b5a76a2a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6009,7 +6009,7 @@ xfs_getbmap(
6009 */ 6009 */
6010 error = ENOMEM; 6010 error = ENOMEM;
6011 subnex = 16; 6011 subnex = 16;
6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); 6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
6013 if (!map) 6013 if (!map)
6014 goto out_unlock_ilock; 6014 goto out_unlock_ilock;
6015 6015
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e9df99574829..26717388acf5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -120,8 +120,8 @@ xfs_btree_check_sblock(
120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 if (bp) 121 if (bp)
122 xfs_buftrace("SBTREE ERROR", bp); 122 xfs_buftrace("SBTREE ERROR", bp);
123 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, 123 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124 cur->bc_mp); 124 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125 return XFS_ERROR(EFSCORRUPTED); 125 return XFS_ERROR(EFSCORRUPTED);
126 } 126 }
127 return 0; 127 return 0;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9ff6e57a5075..2847bbc1c534 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
2201xfs_da_state_t * 2201xfs_da_state_t *
2202xfs_da_state_alloc(void) 2202xfs_da_state_alloc(void)
2203{ 2203{
2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); 2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2205} 2205}
2206 2206
2207/* 2207/*
@@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2261 int off; 2261 int off;
2262 2262
2263 if (nbuf == 1) 2263 if (nbuf == 1)
2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); 2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2265 else 2265 else
2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); 2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2267 dabuf->dirty = 0; 2267 dabuf->dirty = 0;
2268#ifdef XFS_DABUF_DEBUG 2268#ifdef XFS_DABUF_DEBUG
2269 dabuf->ra = ra; 2269 dabuf->ra = ra;
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index c657bec6d951..bb1d58eb3982 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -256,7 +256,7 @@ xfs_dir_cilookup_result(
256 !(args->op_flags & XFS_DA_OP_CILOOKUP)) 256 !(args->op_flags & XFS_DA_OP_CILOOKUP))
257 return EEXIST; 257 return EEXIST;
258 258
259 args->value = kmem_alloc(len, KM_MAYFAIL); 259 args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
260 if (!args->value) 260 if (!args->value)
261 return ENOMEM; 261 return ENOMEM;
262 262
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cbd451bb4848..2d0b3e1da9e6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,17 +167,25 @@ xfs_growfs_data_private(
167 new = nb - mp->m_sb.sb_dblocks; 167 new = nb - mp->m_sb.sb_dblocks;
168 oagcount = mp->m_sb.sb_agcount; 168 oagcount = mp->m_sb.sb_agcount;
169 if (nagcount > oagcount) { 169 if (nagcount > oagcount) {
170 void *new_perag, *old_perag;
171
170 xfs_filestream_flush(mp); 172 xfs_filestream_flush(mp);
173
174 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
175 KM_MAYFAIL);
176 if (!new_perag)
177 return XFS_ERROR(ENOMEM);
178
171 down_write(&mp->m_peraglock); 179 down_write(&mp->m_peraglock);
172 mp->m_perag = kmem_realloc(mp->m_perag, 180 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
173 sizeof(xfs_perag_t) * nagcount, 181 old_perag = mp->m_perag;
174 sizeof(xfs_perag_t) * oagcount, 182 mp->m_perag = new_perag;
175 KM_SLEEP); 183
176 memset(&mp->m_perag[oagcount], 0,
177 (nagcount - oagcount) * sizeof(xfs_perag_t));
178 mp->m_flags |= XFS_MOUNT_32BITINODES; 184 mp->m_flags |= XFS_MOUNT_32BITINODES;
179 nagimax = xfs_initialize_perag(mp, nagcount); 185 nagimax = xfs_initialize_perag(mp, nagcount);
180 up_write(&mp->m_peraglock); 186 up_write(&mp->m_peraglock);
187
188 kmem_free(old_perag);
181 } 189 }
182 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 190 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
183 tp->t_flags |= XFS_TRANS_RESERVE; 191 tp->t_flags |= XFS_TRANS_RESERVE;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 5fcec6f020a7..ecbf8b4d2e2e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -64,6 +64,10 @@ xfs_inode_alloc(
64 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 64 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
65 if (!ip) 65 if (!ip)
66 return NULL; 66 return NULL;
67 if (inode_init_always(mp->m_super, VFS_I(ip))) {
68 kmem_zone_free(xfs_inode_zone, ip);
69 return NULL;
70 }
67 71
68 ASSERT(atomic_read(&ip->i_iocount) == 0); 72 ASSERT(atomic_read(&ip->i_iocount) == 0);
69 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
@@ -105,17 +109,6 @@ xfs_inode_alloc(
105#ifdef XFS_DIR2_TRACE 109#ifdef XFS_DIR2_TRACE
106 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 110 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
107#endif 111#endif
108 /*
109 * Now initialise the VFS inode. We do this after the xfs_inode
110 * initialisation as internal failures will result in ->destroy_inode
111 * being called and that will pass down through the reclaim path and
112 * free the XFS inode. This path requires the XFS inode to already be
113 * initialised. Hence if this call fails, the xfs_inode has already
114 * been freed and we should not reference it at all in the error
115 * handling.
116 */
117 if (!inode_init_always(mp->m_super, VFS_I(ip)))
118 return NULL;
119 112
120 /* prevent anyone from using this yet */ 113 /* prevent anyone from using this yet */
121 VFS_I(ip)->i_state = I_NEW|I_LOCK; 114 VFS_I(ip)->i_state = I_NEW|I_LOCK;
@@ -123,6 +116,71 @@ xfs_inode_alloc(
123 return ip; 116 return ip;
124} 117}
125 118
119STATIC void
120xfs_inode_free(
121 struct xfs_inode *ip)
122{
123 switch (ip->i_d.di_mode & S_IFMT) {
124 case S_IFREG:
125 case S_IFDIR:
126 case S_IFLNK:
127 xfs_idestroy_fork(ip, XFS_DATA_FORK);
128 break;
129 }
130
131 if (ip->i_afp)
132 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
133
134#ifdef XFS_INODE_TRACE
135 ktrace_free(ip->i_trace);
136#endif
137#ifdef XFS_BMAP_TRACE
138 ktrace_free(ip->i_xtrace);
139#endif
140#ifdef XFS_BTREE_TRACE
141 ktrace_free(ip->i_btrace);
142#endif
143#ifdef XFS_RW_TRACE
144 ktrace_free(ip->i_rwtrace);
145#endif
146#ifdef XFS_ILOCK_TRACE
147 ktrace_free(ip->i_lock_trace);
148#endif
149#ifdef XFS_DIR2_TRACE
150 ktrace_free(ip->i_dir_trace);
151#endif
152
153 if (ip->i_itemp) {
154 /*
155 * Only if we are shutting down the fs will we see an
156 * inode still in the AIL. If it is there, we should remove
157 * it to prevent a use-after-free from occurring.
158 */
159 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
160 struct xfs_ail *ailp = lip->li_ailp;
161
162 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
163 XFS_FORCED_SHUTDOWN(ip->i_mount));
164 if (lip->li_flags & XFS_LI_IN_AIL) {
165 spin_lock(&ailp->xa_lock);
166 if (lip->li_flags & XFS_LI_IN_AIL)
167 xfs_trans_ail_delete(ailp, lip);
168 else
169 spin_unlock(&ailp->xa_lock);
170 }
171 xfs_inode_item_destroy(ip);
172 ip->i_itemp = NULL;
173 }
174
175 /* asserts to verify all state is correct here */
176 ASSERT(atomic_read(&ip->i_iocount) == 0);
177 ASSERT(atomic_read(&ip->i_pincount) == 0);
178 ASSERT(!spin_is_locked(&ip->i_flags_lock));
179 ASSERT(completion_done(&ip->i_flush));
180
181 kmem_zone_free(xfs_inode_zone, ip);
182}
183
126/* 184/*
127 * Check the validity of the inode we just found it the cache 185 * Check the validity of the inode we just found it the cache
128 */ 186 */
@@ -133,80 +191,82 @@ xfs_iget_cache_hit(
133 int flags, 191 int flags,
134 int lock_flags) __releases(pag->pag_ici_lock) 192 int lock_flags) __releases(pag->pag_ici_lock)
135{ 193{
194 struct inode *inode = VFS_I(ip);
136 struct xfs_mount *mp = ip->i_mount; 195 struct xfs_mount *mp = ip->i_mount;
137 int error = EAGAIN; 196 int error;
197
198 spin_lock(&ip->i_flags_lock);
138 199
139 /* 200 /*
140 * If INEW is set this inode is being set up 201 * If we are racing with another cache hit that is currently
141 * If IRECLAIM is set this inode is being torn down 202 * instantiating this inode or currently recycling it out of
142 * Pause and try again. 203 * reclaimabe state, wait for the initialisation to complete
204 * before continuing.
205 *
206 * XXX(hch): eventually we should do something equivalent to
207 * wait_on_inode to wait for these flags to be cleared
208 * instead of polling for it.
143 */ 209 */
144 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { 210 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
145 XFS_STATS_INC(xs_ig_frecycle); 211 XFS_STATS_INC(xs_ig_frecycle);
212 error = EAGAIN;
146 goto out_error; 213 goto out_error;
147 } 214 }
148 215
149 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ 216 /*
150 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { 217 * If lookup is racing with unlink return an error immediately.
151 218 */
152 /* 219 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
153 * If lookup is racing with unlink, then we should return an 220 error = ENOENT;
154 * error immediately so we don't remove it from the reclaim 221 goto out_error;
155 * list and potentially leak the inode. 222 }
156 */
157 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
158 error = ENOENT;
159 goto out_error;
160 }
161 223
224 /*
225 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
226 * Need to carefully get it back into useable state.
227 */
228 if (ip->i_flags & XFS_IRECLAIMABLE) {
162 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 229 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
163 230
164 /* 231 /*
165 * We need to re-initialise the VFS inode as it has been 232 * We need to set XFS_INEW atomically with clearing the
166 * 'freed' by the VFS. Do this here so we can deal with 233 * reclaimable tag so that we do have an indicator of the
167 * errors cleanly, then tag it so it can be set up correctly 234 * inode still being initialized.
168 * later.
169 */ 235 */
170 if (!inode_init_always(mp->m_super, VFS_I(ip))) { 236 ip->i_flags |= XFS_INEW;
171 error = ENOMEM; 237 ip->i_flags &= ~XFS_IRECLAIMABLE;
172 goto out_error; 238 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
173 }
174 239
175 /* 240 spin_unlock(&ip->i_flags_lock);
176 * We must set the XFS_INEW flag before clearing the 241 read_unlock(&pag->pag_ici_lock);
177 * XFS_IRECLAIMABLE flag so that if a racing lookup does
178 * not find the XFS_IRECLAIMABLE above but has the igrab()
179 * below succeed we can safely check XFS_INEW to detect
180 * that this inode is still being initialised.
181 */
182 xfs_iflags_set(ip, XFS_INEW);
183 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
184 242
185 /* clear the radix tree reclaim flag as well. */ 243 error = -inode_init_always(mp->m_super, inode);
186 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 244 if (error) {
187 } else if (!igrab(VFS_I(ip))) { 245 /*
246 * Re-initializing the inode failed, and we are in deep
247 * trouble. Try to re-add it to the reclaim list.
248 */
249 read_lock(&pag->pag_ici_lock);
250 spin_lock(&ip->i_flags_lock);
251
252 ip->i_flags &= ~XFS_INEW;
253 ip->i_flags |= XFS_IRECLAIMABLE;
254 __xfs_inode_set_reclaim_tag(pag, ip);
255 goto out_error;
256 }
257 inode->i_state = I_LOCK|I_NEW;
258 } else {
188 /* If the VFS inode is being torn down, pause and try again. */ 259 /* If the VFS inode is being torn down, pause and try again. */
189 XFS_STATS_INC(xs_ig_frecycle); 260 if (!igrab(inode)) {
190 goto out_error; 261 error = EAGAIN;
191 } else if (xfs_iflags_test(ip, XFS_INEW)) { 262 goto out_error;
192 /* 263 }
193 * We are racing with another cache hit that is
194 * currently recycling this inode out of the XFS_IRECLAIMABLE
195 * state. Wait for the initialisation to complete before
196 * continuing.
197 */
198 wait_on_inode(VFS_I(ip));
199 }
200 264
201 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 265 /* We've got a live one. */
202 error = ENOENT; 266 spin_unlock(&ip->i_flags_lock);
203 iput(VFS_I(ip)); 267 read_unlock(&pag->pag_ici_lock);
204 goto out_error;
205 } 268 }
206 269
207 /* We've got a live one. */
208 read_unlock(&pag->pag_ici_lock);
209
210 if (lock_flags != 0) 270 if (lock_flags != 0)
211 xfs_ilock(ip, lock_flags); 271 xfs_ilock(ip, lock_flags);
212 272
@@ -216,6 +276,7 @@ xfs_iget_cache_hit(
216 return 0; 276 return 0;
217 277
218out_error: 278out_error:
279 spin_unlock(&ip->i_flags_lock);
219 read_unlock(&pag->pag_ici_lock); 280 read_unlock(&pag->pag_ici_lock);
220 return error; 281 return error;
221} 282}
@@ -299,7 +360,8 @@ out_preload_end:
299 if (lock_flags) 360 if (lock_flags)
300 xfs_iunlock(ip, lock_flags); 361 xfs_iunlock(ip, lock_flags);
301out_destroy: 362out_destroy:
302 xfs_destroy_inode(ip); 363 __destroy_inode(VFS_I(ip));
364 xfs_inode_free(ip);
303 return error; 365 return error;
304} 366}
305 367
@@ -504,62 +566,7 @@ xfs_ireclaim(
504 xfs_qm_dqdetach(ip); 566 xfs_qm_dqdetach(ip);
505 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 567 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
506 568
507 switch (ip->i_d.di_mode & S_IFMT) { 569 xfs_inode_free(ip);
508 case S_IFREG:
509 case S_IFDIR:
510 case S_IFLNK:
511 xfs_idestroy_fork(ip, XFS_DATA_FORK);
512 break;
513 }
514
515 if (ip->i_afp)
516 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
517
518#ifdef XFS_INODE_TRACE
519 ktrace_free(ip->i_trace);
520#endif
521#ifdef XFS_BMAP_TRACE
522 ktrace_free(ip->i_xtrace);
523#endif
524#ifdef XFS_BTREE_TRACE
525 ktrace_free(ip->i_btrace);
526#endif
527#ifdef XFS_RW_TRACE
528 ktrace_free(ip->i_rwtrace);
529#endif
530#ifdef XFS_ILOCK_TRACE
531 ktrace_free(ip->i_lock_trace);
532#endif
533#ifdef XFS_DIR2_TRACE
534 ktrace_free(ip->i_dir_trace);
535#endif
536 if (ip->i_itemp) {
537 /*
538 * Only if we are shutting down the fs will we see an
539 * inode still in the AIL. If it is there, we should remove
540 * it to prevent a use-after-free from occurring.
541 */
542 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
543 struct xfs_ail *ailp = lip->li_ailp;
544
545 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
546 XFS_FORCED_SHUTDOWN(ip->i_mount));
547 if (lip->li_flags & XFS_LI_IN_AIL) {
548 spin_lock(&ailp->xa_lock);
549 if (lip->li_flags & XFS_LI_IN_AIL)
550 xfs_trans_ail_delete(ailp, lip);
551 else
552 spin_unlock(&ailp->xa_lock);
553 }
554 xfs_inode_item_destroy(ip);
555 ip->i_itemp = NULL;
556 }
557 /* asserts to verify all state is correct here */
558 ASSERT(atomic_read(&ip->i_iocount) == 0);
559 ASSERT(atomic_read(&ip->i_pincount) == 0);
560 ASSERT(!spin_is_locked(&ip->i_flags_lock));
561 ASSERT(completion_done(&ip->i_flush));
562 kmem_zone_free(xfs_inode_zone, ip);
563} 570}
564 571
565/* 572/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f22d65fed0a..da428b3fe0f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -343,6 +343,16 @@ xfs_iformat(
343 return XFS_ERROR(EFSCORRUPTED); 343 return XFS_ERROR(EFSCORRUPTED);
344 } 344 }
345 345
346 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
347 !ip->i_mount->m_rtdev_targp)) {
348 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
349 "corrupt dinode %Lu, has realtime flag set.",
350 ip->i_ino);
351 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
352 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
353 return XFS_ERROR(EFSCORRUPTED);
354 }
355
346 switch (ip->i_d.di_mode & S_IFMT) { 356 switch (ip->i_d.di_mode & S_IFMT) {
347 case S_IFIFO: 357 case S_IFIFO:
348 case S_IFCHR: 358 case S_IFCHR:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1804f866a71d..65f24a3cc992 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -310,23 +310,6 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
310} 310}
311 311
312/* 312/*
313 * Get rid of a partially initialized inode.
314 *
315 * We have to go through destroy_inode to make sure allocations
316 * from init_inode_always like the security data are undone.
317 *
318 * We mark the inode bad so that it takes the short cut in
319 * the reclaim path instead of going through the flush path
320 * which doesn't make sense for an inode that has never seen the
321 * light of day.
322 */
323static inline void xfs_destroy_inode(struct xfs_inode *ip)
324{
325 make_bad_inode(VFS_I(ip));
326 return destroy_inode(VFS_I(ip));
327}
328
329/*
330 * i_flags helper functions 313 * i_flags helper functions
331 */ 314 */
332static inline void 315static inline void
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3750f04ede0b..9dbdff3ea484 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3180,7 +3180,7 @@ try_again:
3180STATIC void 3180STATIC void
3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) 3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3182{ 3182{
3183 ASSERT(spin_is_locked(&log->l_icloglock)); 3183 assert_spin_locked(&log->l_icloglock);
3184 3184
3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3186 xlog_state_switch_iclogs(log, iclog, 0); 3186 xlog_state_switch_iclogs(log, iclog, 0);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c4eca5ed5dab..492d75bae2bf 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -538,7 +538,9 @@ xfs_readlink_bmap(
538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
540 540
541 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); 541 bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
542 XBF_LOCK | XBF_MAPPED |
543 XBF_DONT_BLOCK);
542 error = XFS_BUF_GETERROR(bp); 544 error = XFS_BUF_GETERROR(bp);
543 if (error) { 545 if (error) {
544 xfs_ioerror_alert("xfs_readlink", 546 xfs_ioerror_alert("xfs_readlink",