aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/delayed-ref.h2
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/qgroup.c8
-rw-r--r--fs/cifs/cifs_unicode.c2
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/smb2pdu.h4
-rw-r--r--fs/dcache.c12
-rw-r--r--fs/debugfs/file.c76
-rw-r--r--fs/ecryptfs/file.c10
-rw-r--r--fs/ecryptfs/inode.c5
-rw-r--r--fs/ecryptfs/main.c1
-rw-r--r--fs/ext2/balloc.c2
-rw-r--r--fs/ext3/balloc.c2
-rw-r--r--fs/ext3/inode.c19
-rw-r--r--fs/ext4/inode.c14
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/fs-writeback.c4
-rw-r--r--fs/fuse/control.c4
-rw-r--r--fs/fuse/cuse.c4
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/inode.c12
-rw-r--r--fs/gfs2/aops.c11
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c35
-rw-r--r--fs/gfs2/glock.c60
-rw-r--r--fs/gfs2/glops.c1
-rw-r--r--fs/gfs2/incore.h30
-rw-r--r--fs/gfs2/inode.c28
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/gfs2/quota.c11
-rw-r--r--fs/gfs2/rgrp.c1221
-rw-r--r--fs/gfs2/rgrp.h28
-rw-r--r--fs/gfs2/super.c9
-rw-r--r--fs/gfs2/trace_gfs2.h20
-rw-r--r--fs/gfs2/trans.h7
-rw-r--r--fs/gfs2/xattr.c96
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/lockd/svclock.c3
-rw-r--r--fs/namespace.c10
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4proc.c55
-rw-r--r--fs/nfs/nfs4xdr.c17
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/proc/proc_sysctl.c5
-rw-r--r--fs/stat.c6
-rw-r--r--fs/udf/file.c35
-rw-r--r--fs/xfs/xfs_buf.c5
-rw-r--r--fs/xfs/xfs_buf.h41
-rw-r--r--fs/xfs/xfs_super.c1
53 files changed, 959 insertions, 996 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0d195b50766..9821b672f5a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -116,7 +116,7 @@ struct btrfs_ordered_sum;
116#define BTRFS_FREE_SPACE_OBJECTID -11ULL 116#define BTRFS_FREE_SPACE_OBJECTID -11ULL
117 117
118/* 118/*
119 * The inode number assigned to the special inode for sotring 119 * The inode number assigned to the special inode for storing
120 * free ino cache 120 * free ino cache
121 */ 121 */
122#define BTRFS_FREE_INO_OBJECTID -12ULL 122#define BTRFS_FREE_INO_OBJECTID -12ULL
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index ab530059584..c9d703693df 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -18,7 +18,7 @@
18#ifndef __DELAYED_REF__ 18#ifndef __DELAYED_REF__
19#define __DELAYED_REF__ 19#define __DELAYED_REF__
20 20
21/* these are the possible values of struct btrfs_delayed_ref->action */ 21/* these are the possible values of struct btrfs_delayed_ref_node->action */
22#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ 22#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
23#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ 23#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
24#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ 24#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ec154f95464..316b07a866d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1971,8 +1971,8 @@ out:
1971 ordered_extent->len - 1, NULL, GFP_NOFS); 1971 ordered_extent->len - 1, NULL, GFP_NOFS);
1972 1972
1973 /* 1973 /*
1974 * This needs to be dont to make sure anybody waiting knows we are done 1974 * This needs to be done to make sure anybody waiting knows we are done
1975 * upating everything for this ordered extent. 1975 * updating everything for this ordered extent.
1976 */ 1976 */
1977 btrfs_remove_ordered_extent(inode, ordered_extent); 1977 btrfs_remove_ordered_extent(inode, ordered_extent);
1978 1978
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 38b42e7bc91..b6501558174 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1371,10 +1371,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1371 1371
1372 if (srcid) { 1372 if (srcid) {
1373 srcgroup = find_qgroup_rb(fs_info, srcid); 1373 srcgroup = find_qgroup_rb(fs_info, srcid);
1374 if (!srcgroup) { 1374 if (!srcgroup)
1375 ret = -EINVAL;
1376 goto unlock; 1375 goto unlock;
1377 }
1378 dstgroup->rfer = srcgroup->rfer - level_size; 1376 dstgroup->rfer = srcgroup->rfer - level_size;
1379 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; 1377 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1380 srcgroup->excl = level_size; 1378 srcgroup->excl = level_size;
@@ -1383,10 +1381,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1383 qgroup_dirty(fs_info, srcgroup); 1381 qgroup_dirty(fs_info, srcgroup);
1384 } 1382 }
1385 1383
1386 if (!inherit) { 1384 if (!inherit)
1387 ret = -EINVAL;
1388 goto unlock; 1385 goto unlock;
1389 }
1390 1386
1391 i_qgroups = (u64 *)(inherit + 1); 1387 i_qgroups = (u64 *)(inherit + 1);
1392 for (i = 0; i < inherit->num_qgroups; ++i) { 1388 for (i = 0; i < inherit->num_qgroups; ++i) {
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 7dab9c04ad5..53cf2aabce8 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -328,7 +328,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
328 } 328 }
329 329
330ctoUTF16_out: 330ctoUTF16_out:
331 return i; 331 return j;
332} 332}
333 333
334#ifdef CONFIG_CIFS_SMB2 334#ifdef CONFIG_CIFS_SMB2
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9154192b068..71e9ad9f596 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -917,7 +917,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
917 if (!buf) { 917 if (!buf) {
918 mutex_unlock(&cinode->lock_mutex); 918 mutex_unlock(&cinode->lock_mutex);
919 free_xid(xid); 919 free_xid(xid);
920 return rc; 920 return -ENOMEM;
921 } 921 }
922 922
923 for (i = 0; i < 2; i++) { 923 for (i = 0; i < 2; i++) {
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c5fbfac5d57..15dc8eea827 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -96,7 +96,7 @@
96 * 96 *
97 */ 97 */
98 98
99#define SMB2_HEADER_STRUCTURE_SIZE __constant_le16_to_cpu(64) 99#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
100 100
101struct smb2_hdr { 101struct smb2_hdr {
102 __be32 smb2_buf_length; /* big endian on wire */ 102 __be32 smb2_buf_length; /* big endian on wire */
@@ -140,7 +140,7 @@ struct smb2_pdu {
140 * 140 *
141 */ 141 */
142 142
143#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9) 143#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
144 144
145struct smb2_err_rsp { 145struct smb2_err_rsp {
146 struct smb2_hdr hdr; 146 struct smb2_hdr hdr;
diff --git a/fs/dcache.c b/fs/dcache.c
index 8086636bf79..693f95bf1ca 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -389,7 +389,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
389 * Inform try_to_ascend() that we are no longer attached to the 389 * Inform try_to_ascend() that we are no longer attached to the
390 * dentry tree 390 * dentry tree
391 */ 391 */
392 dentry->d_flags |= DCACHE_DISCONNECTED; 392 dentry->d_flags |= DCACHE_DENTRY_KILLED;
393 if (parent) 393 if (parent)
394 spin_unlock(&parent->d_lock); 394 spin_unlock(&parent->d_lock);
395 dentry_iput(dentry); 395 dentry_iput(dentry);
@@ -1048,7 +1048,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
1048 * or deletion 1048 * or deletion
1049 */ 1049 */
1050 if (new != old->d_parent || 1050 if (new != old->d_parent ||
1051 (old->d_flags & DCACHE_DISCONNECTED) || 1051 (old->d_flags & DCACHE_DENTRY_KILLED) ||
1052 (!locked && read_seqretry(&rename_lock, seq))) { 1052 (!locked && read_seqretry(&rename_lock, seq))) {
1053 spin_unlock(&new->d_lock); 1053 spin_unlock(&new->d_lock);
1054 new = NULL; 1054 new = NULL;
@@ -1134,6 +1134,8 @@ positive:
1134 return 1; 1134 return 1;
1135 1135
1136rename_retry: 1136rename_retry:
1137 if (locked)
1138 goto again;
1137 locked = 1; 1139 locked = 1;
1138 write_seqlock(&rename_lock); 1140 write_seqlock(&rename_lock);
1139 goto again; 1141 goto again;
@@ -1141,7 +1143,7 @@ rename_retry:
1141EXPORT_SYMBOL(have_submounts); 1143EXPORT_SYMBOL(have_submounts);
1142 1144
1143/* 1145/*
1144 * Search the dentry child list for the specified parent, 1146 * Search the dentry child list of the specified parent,
1145 * and move any unused dentries to the end of the unused 1147 * and move any unused dentries to the end of the unused
1146 * list for prune_dcache(). We descend to the next level 1148 * list for prune_dcache(). We descend to the next level
1147 * whenever the d_subdirs list is non-empty and continue 1149 * whenever the d_subdirs list is non-empty and continue
@@ -1236,6 +1238,8 @@ out:
1236rename_retry: 1238rename_retry:
1237 if (found) 1239 if (found)
1238 return found; 1240 return found;
1241 if (locked)
1242 goto again;
1239 locked = 1; 1243 locked = 1;
1240 write_seqlock(&rename_lock); 1244 write_seqlock(&rename_lock);
1241 goto again; 1245 goto again;
@@ -3035,6 +3039,8 @@ resume:
3035 return; 3039 return;
3036 3040
3037rename_retry: 3041rename_retry:
3042 if (locked)
3043 goto again;
3038 locked = 1; 3044 locked = 1;
3039 write_seqlock(&rename_lock); 3045 write_seqlock(&rename_lock);
3040 goto again; 3046 goto again;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 2340f6978d6..c5ca6ae5a30 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -526,73 +526,51 @@ struct array_data {
526 u32 elements; 526 u32 elements;
527}; 527};
528 528
529static int u32_array_open(struct inode *inode, struct file *file) 529static size_t u32_format_array(char *buf, size_t bufsize,
530{ 530 u32 *array, int array_size)
531 file->private_data = NULL;
532 return nonseekable_open(inode, file);
533}
534
535static size_t format_array(char *buf, size_t bufsize, const char *fmt,
536 u32 *array, u32 array_size)
537{ 531{
538 size_t ret = 0; 532 size_t ret = 0;
539 u32 i;
540 533
541 for (i = 0; i < array_size; i++) { 534 while (--array_size >= 0) {
542 size_t len; 535 size_t len;
536 char term = array_size ? ' ' : '\n';
543 537
544 len = snprintf(buf, bufsize, fmt, array[i]); 538 len = snprintf(buf, bufsize, "%u%c", *array++, term);
545 len++; /* ' ' or '\n' */
546 ret += len; 539 ret += len;
547 540
548 if (buf) { 541 buf += len;
549 buf += len; 542 bufsize -= len;
550 bufsize -= len;
551 buf[-1] = (i == array_size-1) ? '\n' : ' ';
552 }
553 } 543 }
554
555 ret++; /* \0 */
556 if (buf)
557 *buf = '\0';
558
559 return ret; 544 return ret;
560} 545}
561 546
562static char *format_array_alloc(const char *fmt, u32 *array, 547static int u32_array_open(struct inode *inode, struct file *file)
563 u32 array_size)
564{ 548{
565 size_t len = format_array(NULL, 0, fmt, array, array_size); 549 struct array_data *data = inode->i_private;
566 char *ret; 550 int size, elements = data->elements;
567 551 char *buf;
568 ret = kmalloc(len, GFP_KERNEL); 552
569 if (ret == NULL) 553 /*
570 return NULL; 554 * Max size:
555 * - 10 digits + ' '/'\n' = 11 bytes per number
556 * - terminating NUL character
557 */
558 size = elements*11;
559 buf = kmalloc(size+1, GFP_KERNEL);
560 if (!buf)
561 return -ENOMEM;
562 buf[size] = 0;
563
564 file->private_data = buf;
565 u32_format_array(buf, size, data->array, data->elements);
571 566
572 format_array(ret, len, fmt, array, array_size); 567 return nonseekable_open(inode, file);
573 return ret;
574} 568}
575 569
576static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, 570static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
577 loff_t *ppos) 571 loff_t *ppos)
578{ 572{
579 struct inode *inode = file->f_path.dentry->d_inode; 573 size_t size = strlen(file->private_data);
580 struct array_data *data = inode->i_private;
581 size_t size;
582
583 if (*ppos == 0) {
584 if (file->private_data) {
585 kfree(file->private_data);
586 file->private_data = NULL;
587 }
588
589 file->private_data = format_array_alloc("%u", data->array,
590 data->elements);
591 }
592
593 size = 0;
594 if (file->private_data)
595 size = strlen(file->private_data);
596 574
597 return simple_read_from_buffer(buf, len, ppos, 575 return simple_read_from_buffer(buf, len, ppos,
598 file->private_data, size); 576 file->private_data, size);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 44ce5c6a541..d45ba456812 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -275,8 +275,14 @@ out:
275 275
276static int ecryptfs_flush(struct file *file, fl_owner_t td) 276static int ecryptfs_flush(struct file *file, fl_owner_t td)
277{ 277{
278 return file->f_mode & FMODE_WRITE 278 struct file *lower_file = ecryptfs_file_to_lower(file);
279 ? filemap_write_and_wait(file->f_mapping) : 0; 279
280 if (lower_file->f_op && lower_file->f_op->flush) {
281 filemap_write_and_wait(file->f_mapping);
282 return lower_file->f_op->flush(lower_file, td);
283 }
284
285 return 0;
280} 286}
281 287
282static int ecryptfs_release(struct inode *inode, struct file *file) 288static int ecryptfs_release(struct inode *inode, struct file *file)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 534b129ea67..cc7709e7c50 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -619,6 +619,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
619 struct dentry *lower_old_dir_dentry; 619 struct dentry *lower_old_dir_dentry;
620 struct dentry *lower_new_dir_dentry; 620 struct dentry *lower_new_dir_dentry;
621 struct dentry *trap = NULL; 621 struct dentry *trap = NULL;
622 struct inode *target_inode;
622 623
623 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); 624 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
624 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); 625 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -626,6 +627,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
626 dget(lower_new_dentry); 627 dget(lower_new_dentry);
627 lower_old_dir_dentry = dget_parent(lower_old_dentry); 628 lower_old_dir_dentry = dget_parent(lower_old_dentry);
628 lower_new_dir_dentry = dget_parent(lower_new_dentry); 629 lower_new_dir_dentry = dget_parent(lower_new_dentry);
630 target_inode = new_dentry->d_inode;
629 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 631 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
630 /* source should not be ancestor of target */ 632 /* source should not be ancestor of target */
631 if (trap == lower_old_dentry) { 633 if (trap == lower_old_dentry) {
@@ -641,6 +643,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
641 lower_new_dir_dentry->d_inode, lower_new_dentry); 643 lower_new_dir_dentry->d_inode, lower_new_dentry);
642 if (rc) 644 if (rc)
643 goto out_lock; 645 goto out_lock;
646 if (target_inode)
647 fsstack_copy_attr_all(target_inode,
648 ecryptfs_inode_to_lower(target_inode));
644 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); 649 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
645 if (new_dir != old_dir) 650 if (new_dir != old_dir)
646 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); 651 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 2768138eefe..9b627c15010 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode)
162 inode_info = ecryptfs_inode_to_private(inode); 162 inode_info = ecryptfs_inode_to_private(inode);
163 if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, 163 if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
164 &inode_info->lower_file_mutex)) { 164 &inode_info->lower_file_mutex)) {
165 filemap_write_and_wait(inode->i_mapping);
165 fput(inode_info->lower_file); 166 fput(inode_info->lower_file);
166 inode_info->lower_file = NULL; 167 inode_info->lower_file = NULL;
167 mutex_unlock(&inode_info->lower_file_mutex); 168 mutex_unlock(&inode_info->lower_file_mutex);
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 376aa77f3ca..2616d0ea5c5 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -479,7 +479,7 @@ void ext2_discard_reservation(struct inode *inode)
479/** 479/**
480 * ext2_free_blocks() -- Free given blocks and update quota and i_blocks 480 * ext2_free_blocks() -- Free given blocks and update quota and i_blocks
481 * @inode: inode 481 * @inode: inode
482 * @block: start physcial block to free 482 * @block: start physical block to free
483 * @count: number of blocks to free 483 * @count: number of blocks to free
484 */ 484 */
485void ext2_free_blocks (struct inode * inode, unsigned long block, 485void ext2_free_blocks (struct inode * inode, unsigned long block,
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 90d901f0486..7320a66e958 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -483,7 +483,7 @@ void ext3_discard_reservation(struct inode *inode)
483 * ext3_free_blocks_sb() -- Free given blocks and update quota 483 * ext3_free_blocks_sb() -- Free given blocks and update quota
484 * @handle: handle to this transaction 484 * @handle: handle to this transaction
485 * @sb: super block 485 * @sb: super block
486 * @block: start physcial block to free 486 * @block: start physical block to free
487 * @count: number of blocks to free 487 * @count: number of blocks to free
488 * @pdquot_freed_blocks: pointer to quota 488 * @pdquot_freed_blocks: pointer to quota
489 */ 489 */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a07597307fd..7e87e37a372 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3072,6 +3072,8 @@ static int ext3_do_update_inode(handle_t *handle,
3072 struct ext3_inode_info *ei = EXT3_I(inode); 3072 struct ext3_inode_info *ei = EXT3_I(inode);
3073 struct buffer_head *bh = iloc->bh; 3073 struct buffer_head *bh = iloc->bh;
3074 int err = 0, rc, block; 3074 int err = 0, rc, block;
3075 int need_datasync = 0;
3076 __le32 disksize;
3075 uid_t i_uid; 3077 uid_t i_uid;
3076 gid_t i_gid; 3078 gid_t i_gid;
3077 3079
@@ -3113,7 +3115,11 @@ again:
3113 raw_inode->i_gid_high = 0; 3115 raw_inode->i_gid_high = 0;
3114 } 3116 }
3115 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 3117 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
3116 raw_inode->i_size = cpu_to_le32(ei->i_disksize); 3118 disksize = cpu_to_le32(ei->i_disksize);
3119 if (disksize != raw_inode->i_size) {
3120 need_datasync = 1;
3121 raw_inode->i_size = disksize;
3122 }
3117 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 3123 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
3118 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 3124 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
3119 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 3125 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
@@ -3129,8 +3135,11 @@ again:
3129 if (!S_ISREG(inode->i_mode)) { 3135 if (!S_ISREG(inode->i_mode)) {
3130 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); 3136 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
3131 } else { 3137 } else {
3132 raw_inode->i_size_high = 3138 disksize = cpu_to_le32(ei->i_disksize >> 32);
3133 cpu_to_le32(ei->i_disksize >> 32); 3139 if (disksize != raw_inode->i_size_high) {
3140 raw_inode->i_size_high = disksize;
3141 need_datasync = 1;
3142 }
3134 if (ei->i_disksize > 0x7fffffffULL) { 3143 if (ei->i_disksize > 0x7fffffffULL) {
3135 struct super_block *sb = inode->i_sb; 3144 struct super_block *sb = inode->i_sb;
3136 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, 3145 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -3183,6 +3192,8 @@ again:
3183 ext3_clear_inode_state(inode, EXT3_STATE_NEW); 3192 ext3_clear_inode_state(inode, EXT3_STATE_NEW);
3184 3193
3185 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); 3194 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
3195 if (need_datasync)
3196 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
3186out_brelse: 3197out_brelse:
3187 brelse (bh); 3198 brelse (bh);
3188 ext3_std_error(inode->i_sb, err); 3199 ext3_std_error(inode->i_sb, err);
@@ -3196,7 +3207,7 @@ out_brelse:
3196 * 3207 *
3197 * - Within generic_file_write() for O_SYNC files. 3208 * - Within generic_file_write() for O_SYNC files.
3198 * Here, there will be no transaction running. We wait for any running 3209 * Here, there will be no transaction running. We wait for any running
3199 * trasnaction to commit. 3210 * transaction to commit.
3200 * 3211 *
3201 * - Within sys_sync(), kupdate and such. 3212 * - Within sys_sync(), kupdate and such.
3202 * We wait on commit, if tol to. 3213 * We wait on commit, if tol to.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dff171c3a12..c862ee5fe79 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3313,7 +3313,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
3313 * handle: The journal handle 3313 * handle: The journal handle
3314 * inode: The files inode 3314 * inode: The files inode
3315 * page: A locked page that contains the offset "from" 3315 * page: A locked page that contains the offset "from"
3316 * from: The starting byte offset (from the begining of the file) 3316 * from: The starting byte offset (from the beginning of the file)
3317 * to begin discarding 3317 * to begin discarding
3318 * len: The length of bytes to discard 3318 * len: The length of bytes to discard
3319 * flags: Optional flags that may be used: 3319 * flags: Optional flags that may be used:
@@ -3321,11 +3321,11 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
3321 * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 3321 * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
3322 * Only zero the regions of the page whose buffer heads 3322 * Only zero the regions of the page whose buffer heads
3323 * have already been unmapped. This flag is appropriate 3323 * have already been unmapped. This flag is appropriate
3324 * for updateing the contents of a page whose blocks may 3324 * for updating the contents of a page whose blocks may
3325 * have already been released, and we only want to zero 3325 * have already been released, and we only want to zero
3326 * out the regions that correspond to those released blocks. 3326 * out the regions that correspond to those released blocks.
3327 * 3327 *
3328 * Returns zero on sucess or negative on failure. 3328 * Returns zero on success or negative on failure.
3329 */ 3329 */
3330static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 3330static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3331 struct inode *inode, struct page *page, loff_t from, 3331 struct inode *inode, struct page *page, loff_t from,
@@ -3486,7 +3486,7 @@ int ext4_can_truncate(struct inode *inode)
3486 * @offset: The offset where the hole will begin 3486 * @offset: The offset where the hole will begin
3487 * @len: The length of the hole 3487 * @len: The length of the hole
3488 * 3488 *
3489 * Returns: 0 on sucess or negative on failure 3489 * Returns: 0 on success or negative on failure
3490 */ 3490 */
3491 3491
3492int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) 3492int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
@@ -4008,7 +4008,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
4008 4008
4009 if (i_blocks <= ~0U) { 4009 if (i_blocks <= ~0U) {
4010 /* 4010 /*
4011 * i_blocks can be represnted in a 32 bit variable 4011 * i_blocks can be represented in a 32 bit variable
4012 * as multiple of 512 bytes 4012 * as multiple of 512 bytes
4013 */ 4013 */
4014 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4014 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
@@ -4169,7 +4169,7 @@ out_brelse:
4169 * 4169 *
4170 * - Within generic_file_write() for O_SYNC files. 4170 * - Within generic_file_write() for O_SYNC files.
4171 * Here, there will be no transaction running. We wait for any running 4171 * Here, there will be no transaction running. We wait for any running
4172 * trasnaction to commit. 4172 * transaction to commit.
4173 * 4173 *
4174 * - Within sys_sync(), kupdate and such. 4174 * - Within sys_sync(), kupdate and such.
4175 * We wait on commit, if tol to. 4175 * We wait on commit, if tol to.
@@ -4413,7 +4413,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4413 * worse case, the indexs blocks spread over different block groups 4413 * worse case, the indexs blocks spread over different block groups
4414 * 4414 *
4415 * If datablocks are discontiguous, they are possible to spread over 4415 * If datablocks are discontiguous, they are possible to spread over
4416 * different block groups too. If they are contiuguous, with flexbg, 4416 * different block groups too. If they are contiguous, with flexbg,
4417 * they could still across block group boundary. 4417 * they could still across block group boundary.
4418 * 4418 *
4419 * Also account for superblock, inode, quota and xattr blocks 4419 * Also account for superblock, inode, quota and xattr blocks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8eae94771c4..08778f6cdfe 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4709,7 +4709,7 @@ error_return:
4709 * ext4_group_add_blocks() -- Add given blocks to an existing group 4709 * ext4_group_add_blocks() -- Add given blocks to an existing group
4710 * @handle: handle to this transaction 4710 * @handle: handle to this transaction
4711 * @sb: super block 4711 * @sb: super block
4712 * @block: start physcial block to add to the block group 4712 * @block: start physical block to add to the block group
4713 * @count: number of blocks to free 4713 * @count: number of blocks to free
4714 * 4714 *
4715 * This marks the blocks as free in the bitmap and buddy. 4715 * This marks the blocks as free in the bitmap and buddy.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index be3efc4f64f..6d46c0d7833 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -577,10 +577,6 @@ static long writeback_chunk_size(struct backing_dev_info *bdi,
577/* 577/*
578 * Write a portion of b_io inodes which belong to @sb. 578 * Write a portion of b_io inodes which belong to @sb.
579 * 579 *
580 * If @only_this_sb is true, then find and write all such
581 * inodes. Otherwise write only ones which go sequentially
582 * in reverse order.
583 *
584 * Return the number of pages and/or inodes written. 580 * Return the number of pages and/or inodes written.
585 */ 581 */
586static long writeback_sb_inodes(struct super_block *sb, 582static long writeback_sb_inodes(struct super_block *sb,
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 03ff5b1eba9..75a20c092dd 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file,
117 const char __user *buf, 117 const char __user *buf,
118 size_t count, loff_t *ppos) 118 size_t count, loff_t *ppos)
119{ 119{
120 unsigned val; 120 unsigned uninitialized_var(val);
121 ssize_t ret; 121 ssize_t ret;
122 122
123 ret = fuse_conn_limit_write(file, buf, count, ppos, &val, 123 ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
@@ -154,7 +154,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
154 const char __user *buf, 154 const char __user *buf,
155 size_t count, loff_t *ppos) 155 size_t count, loff_t *ppos)
156{ 156{
157 unsigned val; 157 unsigned uninitialized_var(val);
158 ssize_t ret; 158 ssize_t ret;
159 159
160 ret = fuse_conn_limit_write(file, buf, count, ppos, &val, 160 ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 3426521f320..ee8d5504229 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -396,7 +396,7 @@ err_device:
396err_region: 396err_region:
397 unregister_chrdev_region(devt, 1); 397 unregister_chrdev_region(devt, 1);
398err: 398err:
399 fc->conn_error = 1; 399 fuse_conn_kill(fc);
400 goto out; 400 goto out;
401} 401}
402 402
@@ -532,8 +532,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
532 cdev_del(cc->cdev); 532 cdev_del(cc->cdev);
533 } 533 }
534 534
535 /* kill connection and shutdown channel */
536 fuse_conn_kill(&cc->fc);
537 rc = fuse_dev_release(inode, file); /* puts the base reference */ 535 rc = fuse_dev_release(inode, file); /* puts the base reference */
538 536
539 return rc; 537 return rc;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7df2b5e8fbe..f4246cfc8d8 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1576,6 +1576,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1576 req->pages[req->num_pages] = page; 1576 req->pages[req->num_pages] = page;
1577 req->num_pages++; 1577 req->num_pages++;
1578 1578
1579 offset = 0;
1579 num -= this_num; 1580 num -= this_num;
1580 total_len += this_num; 1581 total_len += this_num;
1581 index++; 1582 index++;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ce0a2838ccd..fca222dabe3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -367,11 +367,6 @@ void fuse_conn_kill(struct fuse_conn *fc)
367 wake_up_all(&fc->waitq); 367 wake_up_all(&fc->waitq);
368 wake_up_all(&fc->blocked_waitq); 368 wake_up_all(&fc->blocked_waitq);
369 wake_up_all(&fc->reserved_req_waitq); 369 wake_up_all(&fc->reserved_req_waitq);
370 mutex_lock(&fuse_mutex);
371 list_del(&fc->entry);
372 fuse_ctl_remove_conn(fc);
373 mutex_unlock(&fuse_mutex);
374 fuse_bdi_destroy(fc);
375} 370}
376EXPORT_SYMBOL_GPL(fuse_conn_kill); 371EXPORT_SYMBOL_GPL(fuse_conn_kill);
377 372
@@ -380,7 +375,14 @@ static void fuse_put_super(struct super_block *sb)
380 struct fuse_conn *fc = get_fuse_conn_super(sb); 375 struct fuse_conn *fc = get_fuse_conn_super(sb);
381 376
382 fuse_send_destroy(fc); 377 fuse_send_destroy(fc);
378
383 fuse_conn_kill(fc); 379 fuse_conn_kill(fc);
380 mutex_lock(&fuse_mutex);
381 list_del(&fc->entry);
382 fuse_ctl_remove_conn(fc);
383 mutex_unlock(&fuse_mutex);
384 fuse_bdi_destroy(fc);
385
384 fuse_conn_put(fc); 386 fuse_conn_put(fc);
385} 387}
386 388
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index d6526347d38..01c4975da4b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
612 struct gfs2_sbd *sdp = GFS2_SB(mapping->host); 612 struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
613 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 613 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
614 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 614 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
615 unsigned requested = 0;
615 int alloc_required; 616 int alloc_required;
616 int error = 0; 617 int error = 0;
617 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 618 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
@@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
641 if (error) 642 if (error)
642 goto out_unlock; 643 goto out_unlock;
643 644
644 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 645 requested = data_blocks + ind_blocks;
646 error = gfs2_inplace_reserve(ip, requested);
645 if (error) 647 if (error)
646 goto out_qunlock; 648 goto out_qunlock;
647 } 649 }
@@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
654 if (&ip->i_inode == sdp->sd_rindex) 656 if (&ip->i_inode == sdp->sd_rindex)
655 rblocks += 2 * RES_STATFS; 657 rblocks += 2 * RES_STATFS;
656 if (alloc_required) 658 if (alloc_required)
657 rblocks += gfs2_rg_blocks(ip); 659 rblocks += gfs2_rg_blocks(ip, requested);
658 660
659 error = gfs2_trans_begin(sdp, rblocks, 661 error = gfs2_trans_begin(sdp, rblocks,
660 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); 662 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
@@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
868 brelse(dibh); 870 brelse(dibh);
869failed: 871failed:
870 gfs2_trans_end(sdp); 872 gfs2_trans_end(sdp);
871 if (gfs2_mb_reserved(ip)) 873 gfs2_inplace_release(ip);
872 gfs2_inplace_release(ip);
873 if (ip->i_res->rs_qa_qd_num) 874 if (ip->i_res->rs_qa_qd_num)
874 gfs2_quota_unlock(ip); 875 gfs2_quota_unlock(ip);
875 if (inode == sdp->sd_rindex) { 876 if (inode == sdp->sd_rindex) {
@@ -1023,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1023 offset, nr_segs, gfs2_get_block_direct, 1024 offset, nr_segs, gfs2_get_block_direct,
1024 NULL, NULL, 0); 1025 NULL, NULL, 0);
1025out: 1026out:
1026 gfs2_glock_dq_m(1, &gh); 1027 gfs2_glock_dq(&gh);
1027 gfs2_holder_uninit(&gh); 1028 gfs2_holder_uninit(&gh);
1028 return rv; 1029 return rv;
1029} 1030}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 49cd7dd4a9f..1fd3ae237bd 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
786 goto out_rlist; 786 goto out_rlist;
787 787
788 if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ 788 if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
789 gfs2_rs_deltree(ip->i_res); 789 gfs2_rs_deltree(ip, ip->i_res);
790 790
791 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + 791 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
792 RES_INDIRECT + RES_STATFS + RES_QUOTA, 792 RES_INDIRECT + RES_STATFS + RES_QUOTA,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d1d791ef38d..30e21997a1a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -323,6 +323,29 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
323} 323}
324 324
325/** 325/**
326 * gfs2_size_hint - Give a hint to the size of a write request
327 * @file: The struct file
328 * @offset: The file offset of the write
329 * @size: The length of the write
330 *
331 * When we are about to do a write, this function records the total
332 * write size in order to provide a suitable hint to the lower layers
333 * about how many blocks will be required.
334 *
335 */
336
337static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
338{
339 struct inode *inode = filep->f_dentry->d_inode;
340 struct gfs2_sbd *sdp = GFS2_SB(inode);
341 struct gfs2_inode *ip = GFS2_I(inode);
342 size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
343 int hint = min_t(size_t, INT_MAX, blks);
344
345 atomic_set(&ip->i_res->rs_sizehint, hint);
346}
347
348/**
326 * gfs2_allocate_page_backing - Use bmap to allocate blocks 349 * gfs2_allocate_page_backing - Use bmap to allocate blocks
327 * @page: The (locked) page to allocate backing for 350 * @page: The (locked) page to allocate backing for
328 * 351 *
@@ -382,8 +405,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
382 if (ret) 405 if (ret)
383 return ret; 406 return ret;
384 407
385 atomic_set(&ip->i_res->rs_sizehint, 408 gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
386 PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift);
387 409
388 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 410 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
389 ret = gfs2_glock_nq(&gh); 411 ret = gfs2_glock_nq(&gh);
@@ -419,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
419 rblocks += data_blocks ? data_blocks : 1; 441 rblocks += data_blocks ? data_blocks : 1;
420 if (ind_blocks || data_blocks) { 442 if (ind_blocks || data_blocks) {
421 rblocks += RES_STATFS + RES_QUOTA; 443 rblocks += RES_STATFS + RES_QUOTA;
422 rblocks += gfs2_rg_blocks(ip); 444 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
423 } 445 }
424 ret = gfs2_trans_begin(sdp, rblocks, 0); 446 ret = gfs2_trans_begin(sdp, rblocks, 0);
425 if (ret) 447 if (ret)
@@ -663,7 +685,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
663 if (ret) 685 if (ret)
664 return ret; 686 return ret;
665 687
666 atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift); 688 gfs2_size_hint(file, pos, writesize);
689
667 if (file->f_flags & O_APPEND) { 690 if (file->f_flags & O_APPEND) {
668 struct gfs2_holder gh; 691 struct gfs2_holder gh;
669 692
@@ -789,7 +812,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
789 if (unlikely(error)) 812 if (unlikely(error))
790 goto out_uninit; 813 goto out_uninit;
791 814
792 atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift); 815 gfs2_size_hint(file, offset, len);
793 816
794 while (len > 0) { 817 while (len > 0) {
795 if (len < bytes) 818 if (len < bytes)
@@ -822,7 +845,7 @@ retry:
822 &max_bytes, &data_blocks, &ind_blocks); 845 &max_bytes, &data_blocks, &ind_blocks);
823 846
824 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + 847 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
825 RES_RG_HDR + gfs2_rg_blocks(ip); 848 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
826 if (gfs2_is_jdata(ip)) 849 if (gfs2_is_jdata(ip))
827 rblocks += data_blocks ? data_blocks : 1; 850 rblocks += data_blocks ? data_blocks : 1;
828 851
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1ed81f40da0..e6c2fd53cab 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -186,20 +186,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
186} 186}
187 187
188/** 188/**
189 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
190 * @gl: the glock
191 *
192 * If the glock is demotable, then we add it (or move it) to the end
193 * of the glock LRU list.
194 */
195
196static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
197{
198 if (demote_ok(gl))
199 gfs2_glock_add_to_lru(gl);
200}
201
202/**
203 * gfs2_glock_put_nolock() - Decrement reference count on glock 189 * gfs2_glock_put_nolock() - Decrement reference count on glock
204 * @gl: The glock to put 190 * @gl: The glock to put
205 * 191 *
@@ -883,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word)
883 return 0; 869 return 0;
884} 870}
885 871
886static void wait_on_holder(struct gfs2_holder *gh) 872/**
873 * gfs2_glock_wait - wait on a glock acquisition
874 * @gh: the glock holder
875 *
876 * Returns: 0 on success
877 */
878
879int gfs2_glock_wait(struct gfs2_holder *gh)
887{ 880{
888 unsigned long time1 = jiffies; 881 unsigned long time1 = jiffies;
889 882
@@ -894,12 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh)
894 gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + 887 gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
895 GL_GLOCK_HOLD_INCR, 888 GL_GLOCK_HOLD_INCR,
896 GL_GLOCK_MAX_HOLD); 889 GL_GLOCK_MAX_HOLD);
897} 890 return gh->gh_error;
898
899static void wait_on_demote(struct gfs2_glock *gl)
900{
901 might_sleep();
902 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
903} 891}
904 892
905/** 893/**
@@ -929,19 +917,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
929 trace_gfs2_demote_rq(gl); 917 trace_gfs2_demote_rq(gl);
930} 918}
931 919
932/**
933 * gfs2_glock_wait - wait on a glock acquisition
934 * @gh: the glock holder
935 *
936 * Returns: 0 on success
937 */
938
939int gfs2_glock_wait(struct gfs2_holder *gh)
940{
941 wait_on_holder(gh);
942 return gh->gh_error;
943}
944
945void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 920void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
946{ 921{
947 struct va_format vaf; 922 struct va_format vaf;
@@ -979,7 +954,7 @@ __acquires(&gl->gl_spin)
979 struct gfs2_sbd *sdp = gl->gl_sbd; 954 struct gfs2_sbd *sdp = gl->gl_sbd;
980 struct list_head *insert_pt = NULL; 955 struct list_head *insert_pt = NULL;
981 struct gfs2_holder *gh2; 956 struct gfs2_holder *gh2;
982 int try_lock = 0; 957 int try_futile = 0;
983 958
984 BUG_ON(gh->gh_owner_pid == NULL); 959 BUG_ON(gh->gh_owner_pid == NULL);
985 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 960 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
@@ -987,7 +962,7 @@ __acquires(&gl->gl_spin)
987 962
988 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { 963 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
989 if (test_bit(GLF_LOCK, &gl->gl_flags)) 964 if (test_bit(GLF_LOCK, &gl->gl_flags))
990 try_lock = 1; 965 try_futile = !may_grant(gl, gh);
991 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) 966 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
992 goto fail; 967 goto fail;
993 } 968 }
@@ -996,9 +971,8 @@ __acquires(&gl->gl_spin)
996 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && 971 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
997 (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) 972 (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
998 goto trap_recursive; 973 goto trap_recursive;
999 if (try_lock && 974 if (try_futile &&
1000 !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && 975 !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
1001 !may_grant(gl, gh)) {
1002fail: 976fail:
1003 gh->gh_error = GLR_TRYFAILED; 977 gh->gh_error = GLR_TRYFAILED;
1004 gfs2_holder_wake(gh); 978 gfs2_holder_wake(gh);
@@ -1121,8 +1095,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1121 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1095 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1122 fast_path = 1; 1096 fast_path = 1;
1123 } 1097 }
1124 if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) 1098 if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
1125 __gfs2_glock_schedule_for_reclaim(gl); 1099 gfs2_glock_add_to_lru(gl);
1100
1126 trace_gfs2_glock_queue(gh, 0); 1101 trace_gfs2_glock_queue(gh, 0);
1127 spin_unlock(&gl->gl_spin); 1102 spin_unlock(&gl->gl_spin);
1128 if (likely(fast_path)) 1103 if (likely(fast_path))
@@ -1141,7 +1116,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1141{ 1116{
1142 struct gfs2_glock *gl = gh->gh_gl; 1117 struct gfs2_glock *gl = gh->gh_gl;
1143 gfs2_glock_dq(gh); 1118 gfs2_glock_dq(gh);
1144 wait_on_demote(gl); 1119 might_sleep();
1120 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
1145} 1121}
1146 1122
1147/** 1123/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4bdcf378418..32cc4fde975 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
94 /* A shortened, inline version of gfs2_trans_begin() */ 94 /* A shortened, inline version of gfs2_trans_begin() */
95 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); 95 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
96 tr.tr_ip = (unsigned long)__builtin_return_address(0); 96 tr.tr_ip = (unsigned long)__builtin_return_address(0);
97 sb_start_intwrite(sdp->sd_vfs);
97 gfs2_log_reserve(sdp, tr.tr_reserved); 98 gfs2_log_reserve(sdp, tr.tr_reserved);
98 BUG_ON(current->journal_info); 99 BUG_ON(current->journal_info);
99 current->journal_info = &tr; 100 current->journal_info = &tr;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index aaecc8085fc..3d469d37345 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -99,9 +99,26 @@ struct gfs2_rgrpd {
99#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ 99#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */
100 spinlock_t rd_rsspin; /* protects reservation related vars */ 100 spinlock_t rd_rsspin; /* protects reservation related vars */
101 struct rb_root rd_rstree; /* multi-block reservation tree */ 101 struct rb_root rd_rstree; /* multi-block reservation tree */
102 u32 rd_rs_cnt; /* count of current reservations */
103}; 102};
104 103
104struct gfs2_rbm {
105 struct gfs2_rgrpd *rgd;
106 struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */
107 u32 offset; /* The offset is bitmap relative */
108};
109
110static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
111{
112 return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset;
113}
114
115static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
116 const struct gfs2_rbm *rbm2)
117{
118 return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) &&
119 (rbm1->offset == rbm2->offset);
120}
121
105enum gfs2_state_bits { 122enum gfs2_state_bits {
106 BH_Pinned = BH_PrivateStart, 123 BH_Pinned = BH_PrivateStart,
107 BH_Escaped = BH_PrivateStart + 1, 124 BH_Escaped = BH_PrivateStart + 1,
@@ -250,18 +267,11 @@ struct gfs2_blkreserv {
250 /* components used during write (step 1): */ 267 /* components used during write (step 1): */
251 atomic_t rs_sizehint; /* hint of the write size */ 268 atomic_t rs_sizehint; /* hint of the write size */
252 269
253 /* components used during inplace_reserve (step 2): */
254 u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
255
256 /* components used during get_local_rgrp (step 3): */
257 struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */
258 struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ 270 struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
259 struct rb_node rs_node; /* link to other block reservations */ 271 struct rb_node rs_node; /* link to other block reservations */
260 272 struct gfs2_rbm rs_rbm; /* Start of reservation */
261 /* components used during block searches and assignments (step 4): */
262 struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */
263 u32 rs_biblk; /* start block relative to the bi */
264 u32 rs_free; /* how many blocks are still free */ 273 u32 rs_free; /* how many blocks are still free */
274 u64 rs_inum; /* Inode number for reservation */
265 275
266 /* ancillary quota stuff */ 276 /* ancillary quota stuff */
267 struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; 277 struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4ce22e54730..381893ceefa 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
712 if (error) 712 if (error)
713 goto fail_gunlock2; 713 goto fail_gunlock2;
714 714
715 /* The newly created inode needs a reservation so it can allocate 715 error = gfs2_rs_alloc(ip);
716 xattrs. At the same time, we want new blocks allocated to the new 716 if (error)
717 dinode to be as contiguous as possible. Since we allocated the 717 goto fail_gunlock2;
718 dinode block under the directory's reservation, we transfer
719 ownership of that reservation to the new inode. The directory
720 doesn't need a reservation unless it needs a new allocation. */
721 ip->i_res = dip->i_res;
722 dip->i_res = NULL;
723 718
724 error = gfs2_acl_create(dip, inode); 719 error = gfs2_acl_create(dip, inode);
725 if (error) 720 if (error)
@@ -737,10 +732,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
737 brelse(bh); 732 brelse(bh);
738 733
739 gfs2_trans_end(sdp); 734 gfs2_trans_end(sdp);
740 /* Check if we reserved space in the rgrp. Function link_dinode may 735 gfs2_inplace_release(dip);
741 not, depending on whether alloc is required. */
742 if (gfs2_mb_reserved(dip))
743 gfs2_inplace_release(dip);
744 gfs2_quota_unlock(dip); 736 gfs2_quota_unlock(dip);
745 mark_inode_dirty(inode); 737 mark_inode_dirty(inode);
746 gfs2_glock_dq_uninit_m(2, ghs); 738 gfs2_glock_dq_uninit_m(2, ghs);
@@ -897,7 +889,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
897 goto out_gunlock_q; 889 goto out_gunlock_q;
898 890
899 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 891 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
900 gfs2_rg_blocks(dip) + 892 gfs2_rg_blocks(dip, sdp->sd_max_dirres) +
901 2 * RES_DINODE + RES_STATFS + 893 2 * RES_DINODE + RES_STATFS +
902 RES_QUOTA, 0); 894 RES_QUOTA, 0);
903 if (error) 895 if (error)
@@ -1378,7 +1370,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1378 goto out_gunlock_q; 1370 goto out_gunlock_q;
1379 1371
1380 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 1372 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
1381 gfs2_rg_blocks(ndip) + 1373 gfs2_rg_blocks(ndip, sdp->sd_max_dirres) +
1382 4 * RES_DINODE + 4 * RES_LEAF + 1374 4 * RES_DINODE + 4 * RES_LEAF +
1383 RES_STATFS + RES_QUOTA + 4, 0); 1375 RES_STATFS + RES_QUOTA + 4, 0);
1384 if (error) 1376 if (error)
@@ -1722,7 +1714,9 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
1722 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1714 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1723 ret = gfs2_glock_nq(&gh); 1715 ret = gfs2_glock_nq(&gh);
1724 if (ret == 0) { 1716 if (ret == 0) {
1725 ret = generic_setxattr(dentry, name, data, size, flags); 1717 ret = gfs2_rs_alloc(ip);
1718 if (ret == 0)
1719 ret = generic_setxattr(dentry, name, data, size, flags);
1726 gfs2_glock_dq(&gh); 1720 gfs2_glock_dq(&gh);
1727 } 1721 }
1728 gfs2_holder_uninit(&gh); 1722 gfs2_holder_uninit(&gh);
@@ -1757,7 +1751,9 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
1757 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1751 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1758 ret = gfs2_glock_nq(&gh); 1752 ret = gfs2_glock_nq(&gh);
1759 if (ret == 0) { 1753 if (ret == 0) {
1760 ret = generic_removexattr(dentry, name); 1754 ret = gfs2_rs_alloc(ip);
1755 if (ret == 0)
1756 ret = generic_removexattr(dentry, name);
1761 gfs2_glock_dq(&gh); 1757 gfs2_glock_dq(&gh);
1762 } 1758 }
1763 gfs2_holder_uninit(&gh); 1759 gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e5af9dc420e..e443966c810 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -19,6 +19,7 @@
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
21#include <linux/quotaops.h> 21#include <linux/quotaops.h>
22#include <linux/lockdep.h>
22 23
23#include "gfs2.h" 24#include "gfs2.h"
24#include "incore.h" 25#include "incore.h"
@@ -766,6 +767,7 @@ fail:
766 return error; 767 return error;
767} 768}
768 769
770static struct lock_class_key gfs2_quota_imutex_key;
769 771
770static int init_inodes(struct gfs2_sbd *sdp, int undo) 772static int init_inodes(struct gfs2_sbd *sdp, int undo)
771{ 773{
@@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
803 fs_err(sdp, "can't get quota file inode: %d\n", error); 805 fs_err(sdp, "can't get quota file inode: %d\n", error);
804 goto fail_rindex; 806 goto fail_rindex;
805 } 807 }
808 /*
809 * i_mutex on quota files is special. Since this inode is hidden system
810 * file, we are safe to define locking ourselves.
811 */
812 lockdep_set_class(&sdp->sd_quota_inode->i_mutex,
813 &gfs2_quota_imutex_key);
806 814
807 error = gfs2_rindex_update(sdp); 815 error = gfs2_rindex_update(sdp);
808 if (error) 816 if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a3bde91645c..4021deca61e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
765 struct gfs2_holder *ghs, i_gh; 765 struct gfs2_holder *ghs, i_gh;
766 unsigned int qx, x; 766 unsigned int qx, x;
767 struct gfs2_quota_data *qd; 767 struct gfs2_quota_data *qd;
768 unsigned reserved;
768 loff_t offset; 769 loff_t offset;
769 unsigned int nalloc = 0, blocks; 770 unsigned int nalloc = 0, blocks;
770 int error; 771 int error;
@@ -781,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
781 return -ENOMEM; 782 return -ENOMEM;
782 783
783 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); 784 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
784 mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); 785 mutex_lock(&ip->i_inode.i_mutex);
785 for (qx = 0; qx < num_qd; qx++) { 786 for (qx = 0; qx < num_qd; qx++) {
786 error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, 787 error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
787 GL_NOCACHE, &ghs[qx]); 788 GL_NOCACHE, &ghs[qx]);
@@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
811 * two blocks need to be updated instead of 1 */ 812 * two blocks need to be updated instead of 1 */
812 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; 813 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
813 814
814 error = gfs2_inplace_reserve(ip, 1 + 815 reserved = 1 + (nalloc * (data_blocks + ind_blocks));
815 (nalloc * (data_blocks + ind_blocks))); 816 error = gfs2_inplace_reserve(ip, reserved);
816 if (error) 817 if (error)
817 goto out_alloc; 818 goto out_alloc;
818 819
819 if (nalloc) 820 if (nalloc)
820 blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS; 821 blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS;
821 822
822 error = gfs2_trans_begin(sdp, blocks, 0); 823 error = gfs2_trans_begin(sdp, blocks, 0);
823 if (error) 824 if (error)
@@ -1598,7 +1599,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1598 error = gfs2_inplace_reserve(ip, blocks); 1599 error = gfs2_inplace_reserve(ip, blocks);
1599 if (error) 1600 if (error)
1600 goto out_i; 1601 goto out_i;
1601 blocks += gfs2_rg_blocks(ip); 1602 blocks += gfs2_rg_blocks(ip, blocks);
1602 } 1603 }
1603 1604
1604 /* Some quotas span block boundaries and can update two blocks, 1605 /* Some quotas span block boundaries and can update two blocks,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 4d34887a601..3cc402ce6fe 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,9 +35,6 @@
35#define BFITNOENT ((u32)~0) 35#define BFITNOENT ((u32)~0)
36#define NO_BLOCK ((u64)~0) 36#define NO_BLOCK ((u64)~0)
37 37
38#define RSRV_CONTENTION_FACTOR 4
39#define RGRP_RSRV_MAX_CONTENDERS 2
40
41#if BITS_PER_LONG == 32 38#if BITS_PER_LONG == 32
42#define LBITMASK (0x55555555UL) 39#define LBITMASK (0x55555555UL)
43#define LBITSKIP55 (0x55555555UL) 40#define LBITSKIP55 (0x55555555UL)
@@ -67,53 +64,48 @@ static const char valid_change[16] = {
67 1, 0, 0, 0 64 1, 0, 0, 0
68}; 65};
69 66
70static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 67static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
71 unsigned char old_state, 68 const struct gfs2_inode *ip, bool nowrap);
72 struct gfs2_bitmap **rbi); 69
73 70
74/** 71/**
75 * gfs2_setbit - Set a bit in the bitmaps 72 * gfs2_setbit - Set a bit in the bitmaps
76 * @rgd: the resource group descriptor 73 * @rbm: The position of the bit to set
77 * @buf2: the clone buffer that holds the bitmaps 74 * @do_clone: Also set the clone bitmap, if it exists
78 * @bi: the bitmap structure
79 * @block: the block to set
80 * @new_state: the new state of the block 75 * @new_state: the new state of the block
81 * 76 *
82 */ 77 */
83 78
84static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, 79static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
85 struct gfs2_bitmap *bi, u32 block,
86 unsigned char new_state) 80 unsigned char new_state)
87{ 81{
88 unsigned char *byte1, *byte2, *end, cur_state; 82 unsigned char *byte1, *byte2, *end, cur_state;
89 unsigned int buflen = bi->bi_len; 83 unsigned int buflen = rbm->bi->bi_len;
90 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 84 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
91 85
92 byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); 86 byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
93 end = bi->bi_bh->b_data + bi->bi_offset + buflen; 87 end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen;
94 88
95 BUG_ON(byte1 >= end); 89 BUG_ON(byte1 >= end);
96 90
97 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 91 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
98 92
99 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 93 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
100 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " 94 printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
101 "new_state=%d\n", 95 "new_state=%d\n", rbm->offset, cur_state, new_state);
102 (unsigned long long)block, cur_state, new_state); 96 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
103 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", 97 (unsigned long long)rbm->rgd->rd_addr,
104 (unsigned long long)rgd->rd_addr, 98 rbm->bi->bi_start);
105 (unsigned long)bi->bi_start); 99 printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
106 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", 100 rbm->bi->bi_offset, rbm->bi->bi_len);
107 (unsigned long)bi->bi_offset,
108 (unsigned long)bi->bi_len);
109 dump_stack(); 101 dump_stack();
110 gfs2_consist_rgrpd(rgd); 102 gfs2_consist_rgrpd(rbm->rgd);
111 return; 103 return;
112 } 104 }
113 *byte1 ^= (cur_state ^ new_state) << bit; 105 *byte1 ^= (cur_state ^ new_state) << bit;
114 106
115 if (buf2) { 107 if (do_clone && rbm->bi->bi_clone) {
116 byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); 108 byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
117 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 109 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
118 *byte2 ^= (cur_state ^ new_state) << bit; 110 *byte2 ^= (cur_state ^ new_state) << bit;
119 } 111 }
@@ -121,30 +113,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
121 113
122/** 114/**
123 * gfs2_testbit - test a bit in the bitmaps 115 * gfs2_testbit - test a bit in the bitmaps
124 * @rgd: the resource group descriptor 116 * @rbm: The bit to test
125 * @buffer: the buffer that holds the bitmaps
126 * @buflen: the length (in bytes) of the buffer
127 * @block: the block to read
128 * 117 *
118 * Returns: The two bit block state of the requested bit
129 */ 119 */
130 120
131static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, 121static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
132 const unsigned char *buffer,
133 unsigned int buflen, u32 block)
134{ 122{
135 const unsigned char *byte, *end; 123 const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset;
136 unsigned char cur_state; 124 const u8 *byte;
137 unsigned int bit; 125 unsigned int bit;
138 126
139 byte = buffer + (block / GFS2_NBBY); 127 byte = buffer + (rbm->offset / GFS2_NBBY);
140 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 128 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
141 end = buffer + buflen;
142
143 gfs2_assert(rgd->rd_sbd, byte < end);
144 129
145 cur_state = (*byte >> bit) & GFS2_BIT_MASK; 130 return (*byte >> bit) & GFS2_BIT_MASK;
146
147 return cur_state;
148} 131}
149 132
150/** 133/**
@@ -192,7 +175,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
192 */ 175 */
193static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) 176static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
194{ 177{
195 u64 startblk = gfs2_rs_startblk(rs); 178 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
196 179
197 if (blk >= startblk + rs->rs_free) 180 if (blk >= startblk + rs->rs_free)
198 return 1; 181 return 1;
@@ -202,36 +185,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
202} 185}
203 186
204/** 187/**
205 * rs_find - Find a rgrp multi-block reservation that contains a given block
206 * @rgd: The rgrp
207 * @rgblk: The block we're looking for, relative to the rgrp
208 */
209static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
210{
211 struct rb_node **newn;
212 int rc;
213 u64 fsblk = rgblk + rgd->rd_data0;
214
215 spin_lock(&rgd->rd_rsspin);
216 newn = &rgd->rd_rstree.rb_node;
217 while (*newn) {
218 struct gfs2_blkreserv *cur =
219 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
220 rc = rs_cmp(fsblk, 1, cur);
221 if (rc < 0)
222 newn = &((*newn)->rb_left);
223 else if (rc > 0)
224 newn = &((*newn)->rb_right);
225 else {
226 spin_unlock(&rgd->rd_rsspin);
227 return cur;
228 }
229 }
230 spin_unlock(&rgd->rd_rsspin);
231 return NULL;
232}
233
234/**
235 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 188 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
236 * a block in a given allocation state. 189 * a block in a given allocation state.
237 * @buf: the buffer that holds the bitmaps 190 * @buf: the buffer that holds the bitmaps
@@ -262,8 +215,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
262 u64 mask = 0x5555555555555555ULL; 215 u64 mask = 0x5555555555555555ULL;
263 u32 bit; 216 u32 bit;
264 217
265 BUG_ON(state > 3);
266
267 /* Mask off bits we don't care about at the start of the search */ 218 /* Mask off bits we don't care about at the start of the search */
268 mask <<= spoint; 219 mask <<= spoint;
269 tmp = gfs2_bit_search(ptr, mask, state); 220 tmp = gfs2_bit_search(ptr, mask, state);
@@ -285,6 +236,131 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
285} 236}
286 237
287/** 238/**
239 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
240 * @rbm: The rbm with rgd already set correctly
241 * @block: The block number (filesystem relative)
242 *
243 * This sets the bi and offset members of an rbm based on a
244 * resource group and a filesystem relative block number. The
245 * resource group must be set in the rbm on entry, the bi and
246 * offset members will be set by this function.
247 *
248 * Returns: 0 on success, or an error code
249 */
250
251static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
252{
253 u64 rblock = block - rbm->rgd->rd_data0;
254 u32 goal = (u32)rblock;
255 int x;
256
257 if (WARN_ON_ONCE(rblock > UINT_MAX))
258 return -EINVAL;
259 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
260 return -E2BIG;
261
262 for (x = 0; x < rbm->rgd->rd_length; x++) {
263 rbm->bi = rbm->rgd->rd_bits + x;
264 if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
265 rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
266 break;
267 }
268 }
269
270 return 0;
271}
272
273/**
274 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
275 * @rbm: Position to search (value/result)
276 * @n_unaligned: Number of unaligned blocks to check
277 * @len: Decremented for each block found (terminate on zero)
278 *
279 * Returns: true if a non-free block is encountered
280 */
281
282static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
283{
284 u64 block;
285 u32 n;
286 u8 res;
287
288 for (n = 0; n < n_unaligned; n++) {
289 res = gfs2_testbit(rbm);
290 if (res != GFS2_BLKST_FREE)
291 return true;
292 (*len)--;
293 if (*len == 0)
294 return true;
295 block = gfs2_rbm_to_block(rbm);
296 if (gfs2_rbm_from_block(rbm, block + 1))
297 return true;
298 }
299
300 return false;
301}
302
303/**
304 * gfs2_free_extlen - Return extent length of free blocks
305 * @rbm: Starting position
306 * @len: Max length to check
307 *
308 * Starting at the block specified by the rbm, see how many free blocks
309 * there are, not reading more than len blocks ahead. This can be done
310 * using memchr_inv when the blocks are byte aligned, but has to be done
311 * on a block by block basis in case of unaligned blocks. Also this
312 * function can cope with bitmap boundaries (although it must stop on
313 * a resource group boundary)
314 *
315 * Returns: Number of free blocks in the extent
316 */
317
318static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
319{
320 struct gfs2_rbm rbm = *rrbm;
321 u32 n_unaligned = rbm.offset & 3;
322 u32 size = len;
323 u32 bytes;
324 u32 chunk_size;
325 u8 *ptr, *start, *end;
326 u64 block;
327
328 if (n_unaligned &&
329 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
330 goto out;
331
332 n_unaligned = len & 3;
333 /* Start is now byte aligned */
334 while (len > 3) {
335 start = rbm.bi->bi_bh->b_data;
336 if (rbm.bi->bi_clone)
337 start = rbm.bi->bi_clone;
338 end = start + rbm.bi->bi_bh->b_size;
339 start += rbm.bi->bi_offset;
340 BUG_ON(rbm.offset & 3);
341 start += (rbm.offset / GFS2_NBBY);
342 bytes = min_t(u32, len / GFS2_NBBY, (end - start));
343 ptr = memchr_inv(start, 0, bytes);
344 chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
345 chunk_size *= GFS2_NBBY;
346 BUG_ON(len < chunk_size);
347 len -= chunk_size;
348 block = gfs2_rbm_to_block(&rbm);
349 gfs2_rbm_from_block(&rbm, block + chunk_size);
350 n_unaligned = 3;
351 if (ptr)
352 break;
353 n_unaligned = len & 3;
354 }
355
356 /* Deal with any bits left over at the end */
357 if (n_unaligned)
358 gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
359out:
360 return size - len;
361}
362
363/**
288 * gfs2_bitcount - count the number of bits in a certain state 364 * gfs2_bitcount - count the number of bits in a certain state
289 * @rgd: the resource group descriptor 365 * @rgd: the resource group descriptor
290 * @buffer: the buffer that holds the bitmaps 366 * @buffer: the buffer that holds the bitmaps
@@ -487,6 +563,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
487 if (!res) 563 if (!res)
488 error = -ENOMEM; 564 error = -ENOMEM;
489 565
566 RB_CLEAR_NODE(&res->rs_node);
567
490 down_write(&ip->i_rw_mutex); 568 down_write(&ip->i_rw_mutex);
491 if (ip->i_res) 569 if (ip->i_res)
492 kmem_cache_free(gfs2_rsrv_cachep, res); 570 kmem_cache_free(gfs2_rsrv_cachep, res);
@@ -496,11 +574,12 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
496 return error; 574 return error;
497} 575}
498 576
499static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) 577static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
500{ 578{
501 gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", 579 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
502 rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, 580 (unsigned long long)rs->rs_inum,
503 rs->rs_free); 581 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
582 rs->rs_rbm.offset, rs->rs_free);
504} 583}
505 584
506/** 585/**
@@ -508,41 +587,26 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
508 * @rs: The reservation to remove 587 * @rs: The reservation to remove
509 * 588 *
510 */ 589 */
511static void __rs_deltree(struct gfs2_blkreserv *rs) 590static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
512{ 591{
513 struct gfs2_rgrpd *rgd; 592 struct gfs2_rgrpd *rgd;
514 593
515 if (!gfs2_rs_active(rs)) 594 if (!gfs2_rs_active(rs))
516 return; 595 return;
517 596
518 rgd = rs->rs_rgd; 597 rgd = rs->rs_rbm.rgd;
519 /* We can't do this: The reason is that when the rgrp is invalidated, 598 trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
520 it's in the "middle" of acquiring the glock, but the HOLDER bit 599 rb_erase(&rs->rs_node, &rgd->rd_rstree);
521 isn't set yet: 600 RB_CLEAR_NODE(&rs->rs_node);
522 BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
523 trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
524
525 if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
526 rb_erase(&rs->rs_node, &rgd->rd_rstree);
527 BUG_ON(!rgd->rd_rs_cnt);
528 rgd->rd_rs_cnt--;
529 601
530 if (rs->rs_free) { 602 if (rs->rs_free) {
531 /* return reserved blocks to the rgrp and the ip */ 603 /* return reserved blocks to the rgrp and the ip */
532 BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); 604 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
533 rs->rs_rgd->rd_reserved -= rs->rs_free; 605 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
534 rs->rs_free = 0; 606 rs->rs_free = 0;
535 clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); 607 clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags);
536 smp_mb__after_clear_bit(); 608 smp_mb__after_clear_bit();
537 } 609 }
538 /* We can't change any of the step 1 or step 2 components of the rs.
539 E.g. We can't set rs_rgd to NULL because the rgd glock is held and
540 dequeued through this pointer.
541 Can't: atomic_set(&rs->rs_sizehint, 0);
542 Can't: rs->rs_requested = 0;
543 Can't: rs->rs_rgd = NULL;*/
544 rs->rs_bi = NULL;
545 rs->rs_biblk = 0;
546} 610}
547 611
548/** 612/**
@@ -550,17 +614,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
550 * @rs: The reservation to remove 614 * @rs: The reservation to remove
551 * 615 *
552 */ 616 */
553void gfs2_rs_deltree(struct gfs2_blkreserv *rs) 617void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
554{ 618{
555 struct gfs2_rgrpd *rgd; 619 struct gfs2_rgrpd *rgd;
556 620
557 if (!gfs2_rs_active(rs)) 621 rgd = rs->rs_rbm.rgd;
558 return; 622 if (rgd) {
559 623 spin_lock(&rgd->rd_rsspin);
560 rgd = rs->rs_rgd; 624 __rs_deltree(ip, rs);
561 spin_lock(&rgd->rd_rsspin); 625 spin_unlock(&rgd->rd_rsspin);
562 __rs_deltree(rs); 626 }
563 spin_unlock(&rgd->rd_rsspin);
564} 627}
565 628
566/** 629/**
@@ -572,8 +635,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
572{ 635{
573 down_write(&ip->i_rw_mutex); 636 down_write(&ip->i_rw_mutex);
574 if (ip->i_res) { 637 if (ip->i_res) {
575 gfs2_rs_deltree(ip->i_res); 638 gfs2_rs_deltree(ip, ip->i_res);
576 trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
577 BUG_ON(ip->i_res->rs_free); 639 BUG_ON(ip->i_res->rs_free);
578 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); 640 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
579 ip->i_res = NULL; 641 ip->i_res = NULL;
@@ -597,7 +659,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd)
597 spin_lock(&rgd->rd_rsspin); 659 spin_lock(&rgd->rd_rsspin);
598 while ((n = rb_first(&rgd->rd_rstree))) { 660 while ((n = rb_first(&rgd->rd_rstree))) {
599 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 661 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
600 __rs_deltree(rs); 662 __rs_deltree(NULL, rs);
601 } 663 }
602 spin_unlock(&rgd->rd_rsspin); 664 spin_unlock(&rgd->rd_rsspin);
603} 665}
@@ -1270,211 +1332,276 @@ out:
1270 1332
1271/** 1333/**
1272 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree 1334 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1273 * @bi: the bitmap with the blocks
1274 * @ip: the inode structure 1335 * @ip: the inode structure
1275 * @biblk: the 32-bit block number relative to the start of the bitmap
1276 * @amount: the number of blocks to reserve
1277 * 1336 *
1278 * Returns: NULL - reservation was already taken, so not inserted
1279 * pointer to the inserted reservation
1280 */ 1337 */
1281static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, 1338static void rs_insert(struct gfs2_inode *ip)
1282 struct gfs2_inode *ip, u32 biblk,
1283 int amount)
1284{ 1339{
1285 struct rb_node **newn, *parent = NULL; 1340 struct rb_node **newn, *parent = NULL;
1286 int rc; 1341 int rc;
1287 struct gfs2_blkreserv *rs = ip->i_res; 1342 struct gfs2_blkreserv *rs = ip->i_res;
1288 struct gfs2_rgrpd *rgd = rs->rs_rgd; 1343 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1289 u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; 1344 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1345
1346 BUG_ON(gfs2_rs_active(rs));
1290 1347
1291 spin_lock(&rgd->rd_rsspin); 1348 spin_lock(&rgd->rd_rsspin);
1292 newn = &rgd->rd_rstree.rb_node; 1349 newn = &rgd->rd_rstree.rb_node;
1293 BUG_ON(!ip->i_res);
1294 BUG_ON(gfs2_rs_active(rs));
1295 /* Figure out where to put new node */
1296 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
1297 while (*newn) { 1350 while (*newn) {
1298 struct gfs2_blkreserv *cur = 1351 struct gfs2_blkreserv *cur =
1299 rb_entry(*newn, struct gfs2_blkreserv, rs_node); 1352 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1300 1353
1301 parent = *newn; 1354 parent = *newn;
1302 rc = rs_cmp(fsblock, amount, cur); 1355 rc = rs_cmp(fsblock, rs->rs_free, cur);
1303 if (rc > 0) 1356 if (rc > 0)
1304 newn = &((*newn)->rb_right); 1357 newn = &((*newn)->rb_right);
1305 else if (rc < 0) 1358 else if (rc < 0)
1306 newn = &((*newn)->rb_left); 1359 newn = &((*newn)->rb_left);
1307 else { 1360 else {
1308 spin_unlock(&rgd->rd_rsspin); 1361 spin_unlock(&rgd->rd_rsspin);
1309 return NULL; /* reservation already in use */ 1362 WARN_ON(1);
1363 return;
1310 } 1364 }
1311 } 1365 }
1312 1366
1313 /* Do our reservation work */
1314 rs = ip->i_res;
1315 rs->rs_free = amount;
1316 rs->rs_biblk = biblk;
1317 rs->rs_bi = bi;
1318 rb_link_node(&rs->rs_node, parent, newn); 1367 rb_link_node(&rs->rs_node, parent, newn);
1319 rb_insert_color(&rs->rs_node, &rgd->rd_rstree); 1368 rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1320 1369
1321 /* Do our inode accounting for the reservation */
1322 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
1323
1324 /* Do our rgrp accounting for the reservation */ 1370 /* Do our rgrp accounting for the reservation */
1325 rgd->rd_reserved += amount; /* blocks reserved */ 1371 rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1326 rgd->rd_rs_cnt++; /* number of in-tree reservations */
1327 spin_unlock(&rgd->rd_rsspin); 1372 spin_unlock(&rgd->rd_rsspin);
1328 trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); 1373 trace_gfs2_rs(rs, TRACE_RS_INSERT);
1329 return rs;
1330} 1374}
1331 1375
1332/** 1376/**
1333 * unclaimed_blocks - return number of blocks that aren't spoken for 1377 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1334 */
1335static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
1336{
1337 return rgd->rd_free_clone - rgd->rd_reserved;
1338}
1339
1340/**
1341 * rg_mblk_search - find a group of multiple free blocks
1342 * @rgd: the resource group descriptor 1378 * @rgd: the resource group descriptor
1343 * @rs: the block reservation
1344 * @ip: pointer to the inode for which we're reserving blocks 1379 * @ip: pointer to the inode for which we're reserving blocks
1380 * @requested: number of blocks required for this allocation
1345 * 1381 *
1346 * This is very similar to rgblk_search, except we're looking for whole
1347 * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
1348 * on aligned dwords for speed's sake.
1349 *
1350 * Returns: 0 if successful or BFITNOENT if there isn't enough free space
1351 */ 1382 */
1352 1383
1353static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1384static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1385 unsigned requested)
1354{ 1386{
1355 struct gfs2_bitmap *bi = rgd->rd_bits; 1387 struct gfs2_rbm rbm = { .rgd = rgd, };
1356 const u32 length = rgd->rd_length; 1388 u64 goal;
1357 u32 blk; 1389 struct gfs2_blkreserv *rs = ip->i_res;
1358 unsigned int buf, x, search_bytes; 1390 u32 extlen;
1359 u8 *buffer = NULL; 1391 u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1360 u8 *ptr, *end, *nonzero; 1392 int ret;
1361 u32 goal, rsv_bytes; 1393
1362 struct gfs2_blkreserv *rs; 1394 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
1363 u32 best_rs_bytes, unclaimed; 1395 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1364 int best_rs_blocks; 1396 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1397 return;
1365 1398
1366 /* Find bitmap block that contains bits for goal block */ 1399 /* Find bitmap block that contains bits for goal block */
1367 if (rgrp_contains_block(rgd, ip->i_goal)) 1400 if (rgrp_contains_block(rgd, ip->i_goal))
1368 goal = ip->i_goal - rgd->rd_data0; 1401 goal = ip->i_goal;
1369 else 1402 else
1370 goal = rgd->rd_last_alloc; 1403 goal = rgd->rd_last_alloc + rgd->rd_data0;
1371 for (buf = 0; buf < length; buf++) { 1404
1372 bi = rgd->rd_bits + buf; 1405 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1373 /* Convert scope of "goal" from rgrp-wide to within 1406 return;
1374 found bit block */ 1407
1375 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { 1408 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true);
1376 goal -= bi->bi_start * GFS2_NBBY; 1409 if (ret == 0) {
1377 goto do_search; 1410 rs->rs_rbm = rbm;
1378 } 1411 rs->rs_free = extlen;
1412 rs->rs_inum = ip->i_no_addr;
1413 rs_insert(ip);
1379 } 1414 }
1380 buf = 0; 1415}
1381 goal = 0;
1382
1383do_search:
1384 best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
1385 (RGRP_RSRV_MINBLKS * rgd->rd_length));
1386 best_rs_bytes = (best_rs_blocks *
1387 (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
1388 GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
1389 best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
1390 unclaimed = unclaimed_blocks(rgd);
1391 if (best_rs_bytes * GFS2_NBBY > unclaimed)
1392 best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
1393
1394 for (x = 0; x <= length; x++) {
1395 bi = rgd->rd_bits + buf;
1396 1416
1397 if (test_bit(GBF_FULL, &bi->bi_flags)) 1417/**
1398 goto skip; 1418 * gfs2_next_unreserved_block - Return next block that is not reserved
1419 * @rgd: The resource group
1420 * @block: The starting block
1421 * @length: The required length
1422 * @ip: Ignore any reservations for this inode
1423 *
1424 * If the block does not appear in any reservation, then return the
1425 * block number unchanged. If it does appear in the reservation, then
1426 * keep looking through the tree of reservations in order to find the
1427 * first block number which is not reserved.
1428 */
1399 1429
1400 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1430static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1401 if (bi->bi_clone) 1431 u32 length,
1402 buffer = bi->bi_clone + bi->bi_offset; 1432 const struct gfs2_inode *ip)
1433{
1434 struct gfs2_blkreserv *rs;
1435 struct rb_node *n;
1436 int rc;
1437
1438 spin_lock(&rgd->rd_rsspin);
1439 n = rgd->rd_rstree.rb_node;
1440 while (n) {
1441 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1442 rc = rs_cmp(block, length, rs);
1443 if (rc < 0)
1444 n = n->rb_left;
1445 else if (rc > 0)
1446 n = n->rb_right;
1403 else 1447 else
1404 buffer = bi->bi_bh->b_data + bi->bi_offset; 1448 break;
1405 1449 }
1406 /* We have to keep the reservations aligned on u64 boundaries 1450
1407 otherwise we could get situations where a byte can't be 1451 if (n) {
1408 used because it's after a reservation, but a free bit still 1452 while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1409 is within the reservation's area. */ 1453 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1410 ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); 1454 n = n->rb_right;
1411 end = (buffer + bi->bi_len); 1455 if (n == NULL)
1412 while (ptr < end) { 1456 break;
1413 rsv_bytes = 0; 1457 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1414 if ((ptr + best_rs_bytes) <= end)
1415 search_bytes = best_rs_bytes;
1416 else
1417 search_bytes = end - ptr;
1418 BUG_ON(!search_bytes);
1419 nonzero = memchr_inv(ptr, 0, search_bytes);
1420 /* If the lot is all zeroes, reserve the whole size. If
1421 there's enough zeroes to satisfy the request, use
1422 what we can. If there's not enough, keep looking. */
1423 if (nonzero == NULL)
1424 rsv_bytes = search_bytes;
1425 else if ((nonzero - ptr) * GFS2_NBBY >=
1426 ip->i_res->rs_requested)
1427 rsv_bytes = (nonzero - ptr);
1428
1429 if (rsv_bytes) {
1430 blk = ((ptr - buffer) * GFS2_NBBY);
1431 BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
1432 rs = rs_insert(bi, ip, blk,
1433 rsv_bytes * GFS2_NBBY);
1434 if (IS_ERR(rs))
1435 return PTR_ERR(rs);
1436 if (rs)
1437 return 0;
1438 }
1439 ptr += ALIGN(search_bytes, sizeof(u64));
1440 } 1458 }
1441skip:
1442 /* Try next bitmap block (wrap back to rgrp header
1443 if at end) */
1444 buf++;
1445 buf %= length;
1446 goal = 0;
1447 } 1459 }
1448 1460
1449 return BFITNOENT; 1461 spin_unlock(&rgd->rd_rsspin);
1462 return block;
1450} 1463}
1451 1464
1452/** 1465/**
1453 * try_rgrp_fit - See if a given reservation will fit in a given RG 1466 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1454 * @rgd: the RG data 1467 * @rbm: The current position in the resource group
1455 * @ip: the inode 1468 * @ip: The inode for which we are searching for blocks
1469 * @minext: The minimum extent length
1456 * 1470 *
1457 * If there's room for the requested blocks to be allocated from the RG: 1471 * This checks the current position in the rgrp to see whether there is
1458 * This will try to get a multi-block reservation first, and if that doesn't 1472 * a reservation covering this block. If not then this function is a
1459 * fit, it will take what it can. 1473 * no-op. If there is, then the position is moved to the end of the
1474 * contiguous reservation(s) so that we are pointing at the first
1475 * non-reserved block.
1460 * 1476 *
1461 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) 1477 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1462 */ 1478 */
1463 1479
1464static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1480static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1481 const struct gfs2_inode *ip,
1482 u32 minext)
1465{ 1483{
1466 struct gfs2_blkreserv *rs = ip->i_res; 1484 u64 block = gfs2_rbm_to_block(rbm);
1485 u32 extlen = 1;
1486 u64 nblock;
1487 int ret;
1467 1488
1468 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 1489 /*
1490 * If we have a minimum extent length, then skip over any extent
1491 * which is less than the min extent length in size.
1492 */
1493 if (minext) {
1494 extlen = gfs2_free_extlen(rbm, minext);
1495 nblock = block + extlen;
1496 if (extlen < minext)
1497 goto fail;
1498 }
1499
1500 /*
1501 * Check the extent which has been found against the reservations
1502 * and skip if parts of it are already reserved
1503 */
1504 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1505 if (nblock == block)
1469 return 0; 1506 return 0;
1470 /* Look for a multi-block reservation. */ 1507fail:
1471 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && 1508 ret = gfs2_rbm_from_block(rbm, nblock);
1472 rg_mblk_search(rgd, ip) != BFITNOENT) 1509 if (ret < 0)
1473 return 1; 1510 return ret;
1474 if (unclaimed_blocks(rgd) >= rs->rs_requested) 1511 return 1;
1475 return 1; 1512}
1476 1513
1477 return 0; 1514/**
1515 * gfs2_rbm_find - Look for blocks of a particular state
1516 * @rbm: Value/result starting position and final position
1517 * @state: The state which we want to find
1518 * @minext: The requested extent length (0 for a single block)
1519 * @ip: If set, check for reservations
1520 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1521 * around until we've reached the starting point.
1522 *
1523 * Side effects:
1524 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1525 * has no free blocks in it.
1526 *
1527 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1528 */
1529
1530static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
1531 const struct gfs2_inode *ip, bool nowrap)
1532{
1533 struct buffer_head *bh;
1534 struct gfs2_bitmap *initial_bi;
1535 u32 initial_offset;
1536 u32 offset;
1537 u8 *buffer;
1538 int index;
1539 int n = 0;
1540 int iters = rbm->rgd->rd_length;
1541 int ret;
1542
1543 /* If we are not starting at the beginning of a bitmap, then we
1544 * need to add one to the bitmap count to ensure that we search
1545 * the starting bitmap twice.
1546 */
1547 if (rbm->offset != 0)
1548 iters++;
1549
1550 while(1) {
1551 if (test_bit(GBF_FULL, &rbm->bi->bi_flags) &&
1552 (state == GFS2_BLKST_FREE))
1553 goto next_bitmap;
1554
1555 bh = rbm->bi->bi_bh;
1556 buffer = bh->b_data + rbm->bi->bi_offset;
1557 WARN_ON(!buffer_uptodate(bh));
1558 if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
1559 buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
1560 initial_offset = rbm->offset;
1561 offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state);
1562 if (offset == BFITNOENT)
1563 goto bitmap_full;
1564 rbm->offset = offset;
1565 if (ip == NULL)
1566 return 0;
1567
1568 initial_bi = rbm->bi;
1569 ret = gfs2_reservation_check_and_update(rbm, ip, minext);
1570 if (ret == 0)
1571 return 0;
1572 if (ret > 0) {
1573 n += (rbm->bi - initial_bi);
1574 goto next_iter;
1575 }
1576 if (ret == -E2BIG) {
1577 index = 0;
1578 rbm->offset = 0;
1579 n += (rbm->bi - initial_bi);
1580 goto res_covered_end_of_rgrp;
1581 }
1582 return ret;
1583
1584bitmap_full: /* Mark bitmap as full and fall through */
1585 if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
1586 set_bit(GBF_FULL, &rbm->bi->bi_flags);
1587
1588next_bitmap: /* Find next bitmap in the rgrp */
1589 rbm->offset = 0;
1590 index = rbm->bi - rbm->rgd->rd_bits;
1591 index++;
1592 if (index == rbm->rgd->rd_length)
1593 index = 0;
1594res_covered_end_of_rgrp:
1595 rbm->bi = &rbm->rgd->rd_bits[index];
1596 if ((index == 0) && nowrap)
1597 break;
1598 n++;
1599next_iter:
1600 if (n >= iters)
1601 break;
1602 }
1603
1604 return -ENOSPC;
1478} 1605}
1479 1606
1480/** 1607/**
@@ -1489,34 +1616,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1489 1616
1490static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 1617static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1491{ 1618{
1492 u32 goal = 0, block; 1619 u64 block;
1493 u64 no_addr;
1494 struct gfs2_sbd *sdp = rgd->rd_sbd; 1620 struct gfs2_sbd *sdp = rgd->rd_sbd;
1495 struct gfs2_glock *gl; 1621 struct gfs2_glock *gl;
1496 struct gfs2_inode *ip; 1622 struct gfs2_inode *ip;
1497 int error; 1623 int error;
1498 int found = 0; 1624 int found = 0;
1499 struct gfs2_bitmap *bi; 1625 struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 };
1500 1626
1501 while (goal < rgd->rd_data) { 1627 while (1) {
1502 down_write(&sdp->sd_log_flush_lock); 1628 down_write(&sdp->sd_log_flush_lock);
1503 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); 1629 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true);
1504 up_write(&sdp->sd_log_flush_lock); 1630 up_write(&sdp->sd_log_flush_lock);
1505 if (block == BFITNOENT) 1631 if (error == -ENOSPC)
1632 break;
1633 if (WARN_ON_ONCE(error))
1506 break; 1634 break;
1507 1635
1508 block = gfs2_bi2rgd_blk(bi, block); 1636 block = gfs2_rbm_to_block(&rbm);
1509 /* rgblk_search can return a block < goal, so we need to 1637 if (gfs2_rbm_from_block(&rbm, block + 1))
1510 keep it marching forward. */ 1638 break;
1511 no_addr = block + rgd->rd_data0; 1639 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1512 goal = max(block + 1, goal + 1);
1513 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
1514 continue; 1640 continue;
1515 if (no_addr == skip) 1641 if (block == skip)
1516 continue; 1642 continue;
1517 *last_unlinked = no_addr; 1643 *last_unlinked = block;
1518 1644
1519 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); 1645 error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1520 if (error) 1646 if (error)
1521 continue; 1647 continue;
1522 1648
@@ -1543,6 +1669,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
1543 return; 1669 return;
1544} 1670}
1545 1671
1672static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1673{
1674 struct gfs2_rgrpd *rgd = *pos;
1675
1676 rgd = gfs2_rgrpd_get_next(rgd);
1677 if (rgd == NULL)
1678 rgd = gfs2_rgrpd_get_next(NULL);
1679 *pos = rgd;
1680 if (rgd != begin) /* If we didn't wrap */
1681 return true;
1682 return false;
1683}
1684
1546/** 1685/**
1547 * gfs2_inplace_reserve - Reserve space in the filesystem 1686 * gfs2_inplace_reserve - Reserve space in the filesystem
1548 * @ip: the inode to reserve space for 1687 * @ip: the inode to reserve space for
@@ -1562,103 +1701,96 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1562 1701
1563 if (sdp->sd_args.ar_rgrplvb) 1702 if (sdp->sd_args.ar_rgrplvb)
1564 flags |= GL_SKIP; 1703 flags |= GL_SKIP;
1565 rs->rs_requested = requested; 1704 if (gfs2_assert_warn(sdp, requested))
1566 if (gfs2_assert_warn(sdp, requested)) { 1705 return -EINVAL;
1567 error = -EINVAL;
1568 goto out;
1569 }
1570 if (gfs2_rs_active(rs)) { 1706 if (gfs2_rs_active(rs)) {
1571 begin = rs->rs_rgd; 1707 begin = rs->rs_rbm.rgd;
1572 flags = 0; /* Yoda: Do or do not. There is no try */ 1708 flags = 0; /* Yoda: Do or do not. There is no try */
1573 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { 1709 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1574 rs->rs_rgd = begin = ip->i_rgd; 1710 rs->rs_rbm.rgd = begin = ip->i_rgd;
1575 } else { 1711 } else {
1576 rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1712 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1577 } 1713 }
1578 if (rs->rs_rgd == NULL) 1714 if (rs->rs_rbm.rgd == NULL)
1579 return -EBADSLT; 1715 return -EBADSLT;
1580 1716
1581 while (loops < 3) { 1717 while (loops < 3) {
1582 rg_locked = 0; 1718 rg_locked = 1;
1583 1719
1584 if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { 1720 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1585 rg_locked = 1; 1721 rg_locked = 0;
1586 error = 0; 1722 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1587 } else if (!loops && !gfs2_rs_active(rs) &&
1588 rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
1589 /* If the rgrp already is maxed out for contenders,
1590 we can eliminate it as a "first pass" without even
1591 requesting the rgrp glock. */
1592 error = GLR_TRYFAILED;
1593 } else {
1594 error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
1595 LM_ST_EXCLUSIVE, flags, 1723 LM_ST_EXCLUSIVE, flags,
1596 &rs->rs_rgd_gh); 1724 &rs->rs_rgd_gh);
1597 if (!error && sdp->sd_args.ar_rgrplvb) { 1725 if (error == GLR_TRYFAILED)
1598 error = update_rgrp_lvb(rs->rs_rgd); 1726 goto next_rgrp;
1599 if (error) { 1727 if (unlikely(error))
1728 return error;
1729 if (sdp->sd_args.ar_rgrplvb) {
1730 error = update_rgrp_lvb(rs->rs_rbm.rgd);
1731 if (unlikely(error)) {
1600 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1732 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1601 return error; 1733 return error;
1602 } 1734 }
1603 } 1735 }
1604 } 1736 }
1605 switch (error) {
1606 case 0:
1607 if (gfs2_rs_active(rs)) {
1608 if (unclaimed_blocks(rs->rs_rgd) +
1609 rs->rs_free >= rs->rs_requested) {
1610 ip->i_rgd = rs->rs_rgd;
1611 return 0;
1612 }
1613 /* We have a multi-block reservation, but the
1614 rgrp doesn't have enough free blocks to
1615 satisfy the request. Free the reservation
1616 and look for a suitable rgrp. */
1617 gfs2_rs_deltree(rs);
1618 }
1619 if (try_rgrp_fit(rs->rs_rgd, ip)) {
1620 if (sdp->sd_args.ar_rgrplvb)
1621 gfs2_rgrp_bh_get(rs->rs_rgd);
1622 ip->i_rgd = rs->rs_rgd;
1623 return 0;
1624 }
1625 if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
1626 if (sdp->sd_args.ar_rgrplvb)
1627 gfs2_rgrp_bh_get(rs->rs_rgd);
1628 try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
1629 ip->i_no_addr);
1630 }
1631 if (!rg_locked)
1632 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1633 /* fall through */
1634 case GLR_TRYFAILED:
1635 rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
1636 rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
1637 if (rs->rs_rgd != begin) /* If we didn't wrap */
1638 break;
1639 1737
1640 flags &= ~LM_FLAG_TRY; 1738 /* Skip unuseable resource groups */
1641 loops++; 1739 if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
1642 /* Check that fs hasn't grown if writing to rindex */ 1740 goto skip_rgrp;
1643 if (ip == GFS2_I(sdp->sd_rindex) && 1741
1644 !sdp->sd_rindex_uptodate) { 1742 if (sdp->sd_args.ar_rgrplvb)
1645 error = gfs2_ri_update(ip); 1743 gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
1646 if (error) 1744
1647 goto out; 1745 /* Get a reservation if we don't already have one */
1648 } else if (loops == 2) 1746 if (!gfs2_rs_active(rs))
1649 /* Flushing the log may release space */ 1747 rg_mblk_search(rs->rs_rbm.rgd, ip, requested);
1650 gfs2_log_flush(sdp, NULL); 1748
1651 break; 1749 /* Skip rgrps when we can't get a reservation on first pass */
1652 default: 1750 if (!gfs2_rs_active(rs) && (loops < 1))
1653 goto out; 1751 goto check_rgrp;
1752
1753 /* If rgrp has enough free space, use it */
1754 if (rs->rs_rbm.rgd->rd_free_clone >= requested) {
1755 ip->i_rgd = rs->rs_rbm.rgd;
1756 return 0;
1757 }
1758
1759 /* Drop reservation, if we couldn't use reserved rgrp */
1760 if (gfs2_rs_active(rs))
1761 gfs2_rs_deltree(ip, rs);
1762check_rgrp:
1763 /* Check for unlinked inodes which can be reclaimed */
1764 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
1765 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
1766 ip->i_no_addr);
1767skip_rgrp:
1768 /* Unlock rgrp if required */
1769 if (!rg_locked)
1770 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1771next_rgrp:
1772 /* Find the next rgrp, and continue looking */
1773 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1774 continue;
1775
1776 /* If we've scanned all the rgrps, but found no free blocks
1777 * then this checks for some less likely conditions before
1778 * trying again.
1779 */
1780 flags &= ~LM_FLAG_TRY;
1781 loops++;
1782 /* Check that fs hasn't grown if writing to rindex */
1783 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
1784 error = gfs2_ri_update(ip);
1785 if (error)
1786 return error;
1654 } 1787 }
1788 /* Flushing the log may release space */
1789 if (loops == 2)
1790 gfs2_log_flush(sdp, NULL);
1655 } 1791 }
1656 error = -ENOSPC;
1657 1792
1658out: 1793 return -ENOSPC;
1659 if (error)
1660 rs->rs_requested = 0;
1661 return error;
1662} 1794}
1663 1795
1664/** 1796/**
@@ -1672,15 +1804,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1672{ 1804{
1673 struct gfs2_blkreserv *rs = ip->i_res; 1805 struct gfs2_blkreserv *rs = ip->i_res;
1674 1806
1675 if (!rs)
1676 return;
1677
1678 if (!rs->rs_free)
1679 gfs2_rs_deltree(rs);
1680
1681 if (rs->rs_rgd_gh.gh_gl) 1807 if (rs->rs_rgd_gh.gh_gl)
1682 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1808 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1683 rs->rs_requested = 0;
1684} 1809}
1685 1810
1686/** 1811/**
@@ -1693,173 +1818,47 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1693 1818
1694static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1819static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1695{ 1820{
1696 struct gfs2_bitmap *bi = NULL; 1821 struct gfs2_rbm rbm = { .rgd = rgd, };
1697 u32 length, rgrp_block, buf_block; 1822 int ret;
1698 unsigned int buf;
1699 unsigned char type;
1700
1701 length = rgd->rd_length;
1702 rgrp_block = block - rgd->rd_data0;
1703
1704 for (buf = 0; buf < length; buf++) {
1705 bi = rgd->rd_bits + buf;
1706 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1707 break;
1708 }
1709 1823
1710 gfs2_assert(rgd->rd_sbd, buf < length); 1824 ret = gfs2_rbm_from_block(&rbm, block);
1711 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; 1825 WARN_ON_ONCE(ret != 0);
1712 1826
1713 type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1827 return gfs2_testbit(&rbm);
1714 bi->bi_len, buf_block);
1715
1716 return type;
1717} 1828}
1718 1829
1719/**
1720 * rgblk_search - find a block in @state
1721 * @rgd: the resource group descriptor
1722 * @goal: the goal block within the RG (start here to search for avail block)
1723 * @state: GFS2_BLKST_XXX the before-allocation state to find
1724 * @rbi: address of the pointer to the bitmap containing the block found
1725 *
1726 * Walk rgrp's bitmap to find bits that represent a block in @state.
1727 *
1728 * This function never fails, because we wouldn't call it unless we
1729 * know (from reservation results, etc.) that a block is available.
1730 *
1731 * Scope of @goal is just within rgrp, not the whole filesystem.
1732 * Scope of @returned block is just within bitmap, not the whole filesystem.
1733 *
1734 * Returns: the block number found relative to the bitmap rbi
1735 */
1736
1737static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state,
1738 struct gfs2_bitmap **rbi)
1739{
1740 struct gfs2_bitmap *bi = NULL;
1741 const u32 length = rgd->rd_length;
1742 u32 biblk = BFITNOENT;
1743 unsigned int buf, x;
1744 const u8 *buffer = NULL;
1745
1746 *rbi = NULL;
1747 /* Find bitmap block that contains bits for goal block */
1748 for (buf = 0; buf < length; buf++) {
1749 bi = rgd->rd_bits + buf;
1750 /* Convert scope of "goal" from rgrp-wide to within found bit block */
1751 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
1752 goal -= bi->bi_start * GFS2_NBBY;
1753 goto do_search;
1754 }
1755 }
1756 buf = 0;
1757 goal = 0;
1758
1759do_search:
1760 /* Search (up to entire) bitmap in this rgrp for allocatable block.
1761 "x <= length", instead of "x < length", because we typically start
1762 the search in the middle of a bit block, but if we can't find an
1763 allocatable block anywhere else, we want to be able wrap around and
1764 search in the first part of our first-searched bit block. */
1765 for (x = 0; x <= length; x++) {
1766 bi = rgd->rd_bits + buf;
1767
1768 if (test_bit(GBF_FULL, &bi->bi_flags) &&
1769 (state == GFS2_BLKST_FREE))
1770 goto skip;
1771
1772 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1773 bitmaps, so we must search the originals for that. */
1774 buffer = bi->bi_bh->b_data + bi->bi_offset;
1775 WARN_ON(!buffer_uptodate(bi->bi_bh));
1776 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1777 buffer = bi->bi_clone + bi->bi_offset;
1778
1779 while (1) {
1780 struct gfs2_blkreserv *rs;
1781 u32 rgblk;
1782
1783 biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
1784 if (biblk == BFITNOENT)
1785 break;
1786 /* Check if this block is reserved() */
1787 rgblk = gfs2_bi2rgd_blk(bi, biblk);
1788 rs = rs_find(rgd, rgblk);
1789 if (rs == NULL)
1790 break;
1791
1792 BUG_ON(rs->rs_bi != bi);
1793 biblk = BFITNOENT;
1794 /* This should jump to the first block after the
1795 reservation. */
1796 goal = rs->rs_biblk + rs->rs_free;
1797 if (goal >= bi->bi_len * GFS2_NBBY)
1798 break;
1799 }
1800 if (biblk != BFITNOENT)
1801 break;
1802
1803 if ((goal == 0) && (state == GFS2_BLKST_FREE))
1804 set_bit(GBF_FULL, &bi->bi_flags);
1805
1806 /* Try next bitmap block (wrap back to rgrp header if at end) */
1807skip:
1808 buf++;
1809 buf %= length;
1810 goal = 0;
1811 }
1812
1813 if (biblk != BFITNOENT)
1814 *rbi = bi;
1815
1816 return biblk;
1817}
1818 1830
1819/** 1831/**
1820 * gfs2_alloc_extent - allocate an extent from a given bitmap 1832 * gfs2_alloc_extent - allocate an extent from a given bitmap
1821 * @rgd: the resource group descriptor 1833 * @rbm: the resource group information
1822 * @bi: the bitmap within the rgrp
1823 * @blk: the block within the bitmap
1824 * @dinode: TRUE if the first block we allocate is for a dinode 1834 * @dinode: TRUE if the first block we allocate is for a dinode
1825 * @n: The extent length 1835 * @n: The extent length (value/result)
1826 * 1836 *
1827 * Add the found bitmap buffer to the transaction. 1837 * Add the bitmap buffer to the transaction.
1828 * Set the found bits to @new_state to change block's allocation state. 1838 * Set the found bits to @new_state to change block's allocation state.
1829 * Returns: starting block number of the extent (fs scope)
1830 */ 1839 */
1831static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, 1840static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
1832 u32 blk, bool dinode, unsigned int *n) 1841 unsigned int *n)
1833{ 1842{
1843 struct gfs2_rbm pos = { .rgd = rbm->rgd, };
1834 const unsigned int elen = *n; 1844 const unsigned int elen = *n;
1835 u32 goal, rgblk; 1845 u64 block;
1836 const u8 *buffer = NULL; 1846 int ret;
1837 struct gfs2_blkreserv *rs; 1847
1838 1848 *n = 1;
1839 *n = 0; 1849 block = gfs2_rbm_to_block(rbm);
1840 buffer = bi->bi_bh->b_data + bi->bi_offset; 1850 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1);
1841 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1851 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1842 gfs2_setbit(rgd, bi->bi_clone, bi, blk, 1852 block++;
1843 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1844 (*n)++;
1845 goal = blk;
1846 while (*n < elen) { 1853 while (*n < elen) {
1847 goal++; 1854 ret = gfs2_rbm_from_block(&pos, block);
1848 if (goal >= (bi->bi_len * GFS2_NBBY)) 1855 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
1849 break;
1850 rgblk = gfs2_bi2rgd_blk(bi, goal);
1851 rs = rs_find(rgd, rgblk);
1852 if (rs) /* Oops, we bumped into someone's reservation */
1853 break;
1854 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1855 GFS2_BLKST_FREE)
1856 break; 1856 break;
1857 gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); 1857 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
1858 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
1858 (*n)++; 1859 (*n)++;
1860 block++;
1859 } 1861 }
1860 blk = gfs2_bi2rgd_blk(bi, blk);
1861 rgd->rd_last_alloc = blk + *n - 1;
1862 return rgd->rd_data0 + blk;
1863} 1862}
1864 1863
1865/** 1864/**
@@ -1875,46 +1874,30 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1875static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, 1874static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1876 u32 blen, unsigned char new_state) 1875 u32 blen, unsigned char new_state)
1877{ 1876{
1878 struct gfs2_rgrpd *rgd; 1877 struct gfs2_rbm rbm;
1879 struct gfs2_bitmap *bi = NULL;
1880 u32 length, rgrp_blk, buf_blk;
1881 unsigned int buf;
1882 1878
1883 rgd = gfs2_blk2rgrpd(sdp, bstart, 1); 1879 rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
1884 if (!rgd) { 1880 if (!rbm.rgd) {
1885 if (gfs2_consist(sdp)) 1881 if (gfs2_consist(sdp))
1886 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1882 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1887 return NULL; 1883 return NULL;
1888 } 1884 }
1889 1885
1890 length = rgd->rd_length;
1891
1892 rgrp_blk = bstart - rgd->rd_data0;
1893
1894 while (blen--) { 1886 while (blen--) {
1895 for (buf = 0; buf < length; buf++) { 1887 gfs2_rbm_from_block(&rbm, bstart);
1896 bi = rgd->rd_bits + buf; 1888 bstart++;
1897 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) 1889 if (!rbm.bi->bi_clone) {
1898 break; 1890 rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size,
1899 } 1891 GFP_NOFS | __GFP_NOFAIL);
1900 1892 memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset,
1901 gfs2_assert(rgd->rd_sbd, buf < length); 1893 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
1902 1894 rbm.bi->bi_len);
1903 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1904 rgrp_blk++;
1905
1906 if (!bi->bi_clone) {
1907 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1908 GFP_NOFS | __GFP_NOFAIL);
1909 memcpy(bi->bi_clone + bi->bi_offset,
1910 bi->bi_bh->b_data + bi->bi_offset,
1911 bi->bi_len);
1912 } 1895 }
1913 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1896 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1);
1914 gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); 1897 gfs2_setbit(&rbm, false, new_state);
1915 } 1898 }
1916 1899
1917 return rgd; 1900 return rbm.rgd;
1918} 1901}
1919 1902
1920/** 1903/**
@@ -1956,56 +1939,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1956} 1939}
1957 1940
1958/** 1941/**
1959 * claim_reserved_blks - Claim previously reserved blocks 1942 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
1960 * @ip: the inode that's claiming the reservation 1943 * @ip: The inode we have just allocated blocks for
1961 * @dinode: 1 if this block is a dinode block, otherwise data block 1944 * @rbm: The start of the allocated blocks
1962 * @nblocks: desired extent length 1945 * @len: The extent length
1963 * 1946 *
1964 * Lay claim to previously allocated block reservation blocks. 1947 * Adjusts a reservation after an allocation has taken place. If the
1965 * Returns: Starting block number of the blocks claimed. 1948 * reservation does not match the allocation, or if it is now empty
1966 * Sets *nblocks to the actual extent length allocated. 1949 * then it is removed.
1967 */ 1950 */
1968static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, 1951
1969 unsigned int *nblocks) 1952static void gfs2_adjust_reservation(struct gfs2_inode *ip,
1953 const struct gfs2_rbm *rbm, unsigned len)
1970{ 1954{
1971 struct gfs2_blkreserv *rs = ip->i_res; 1955 struct gfs2_blkreserv *rs = ip->i_res;
1972 struct gfs2_rgrpd *rgd = rs->rs_rgd; 1956 struct gfs2_rgrpd *rgd = rbm->rgd;
1973 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1957 unsigned rlen;
1974 struct gfs2_bitmap *bi; 1958 u64 block;
1975 u64 start_block = gfs2_rs_startblk(rs); 1959 int ret;
1976 const unsigned int elen = *nblocks;
1977
1978 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
1979 gfs2_assert_withdraw(sdp, rgd);
1980 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
1981 bi = rs->rs_bi;
1982 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1983
1984 for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
1985 /* Make sure the bitmap hasn't changed */
1986 gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
1987 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1988 rs->rs_biblk++;
1989 rs->rs_free--;
1990
1991 BUG_ON(!rgd->rd_reserved);
1992 rgd->rd_reserved--;
1993 dinode = false;
1994 trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
1995 }
1996
1997 if (!rs->rs_free) {
1998 struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
1999 1960
2000 gfs2_rs_deltree(rs); 1961 spin_lock(&rgd->rd_rsspin);
2001 /* -nblocks because we haven't returned to do the math yet. 1962 if (gfs2_rs_active(rs)) {
2002 I'm doing the math backwards to prevent negative numbers, 1963 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2003 but think of it as: 1964 block = gfs2_rbm_to_block(rbm);
2004 if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ 1965 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2005 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) 1966 rlen = min(rs->rs_free, len);
2006 rg_mblk_search(rgd, ip); 1967 rs->rs_free -= rlen;
1968 rgd->rd_reserved -= rlen;
1969 trace_gfs2_rs(rs, TRACE_RS_CLAIM);
1970 if (rs->rs_free && !ret)
1971 goto out;
1972 }
1973 __rs_deltree(ip, rs);
2007 } 1974 }
2008 return start_block; 1975out:
1976 spin_unlock(&rgd->rd_rsspin);
2009} 1977}
2010 1978
2011/** 1979/**
@@ -2024,47 +1992,40 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2024{ 1992{
2025 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1993 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2026 struct buffer_head *dibh; 1994 struct buffer_head *dibh;
2027 struct gfs2_rgrpd *rgd; 1995 struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2028 unsigned int ndata; 1996 unsigned int ndata;
2029 u32 goal, blk; /* block, within the rgrp scope */ 1997 u64 goal;
2030 u64 block; /* block, within the file system scope */ 1998 u64 block; /* block, within the file system scope */
2031 int error; 1999 int error;
2032 struct gfs2_bitmap *bi;
2033 2000
2034 /* Only happens if there is a bug in gfs2, return something distinctive 2001 if (gfs2_rs_active(ip->i_res))
2035 * to ensure that it is noticed. 2002 goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm);
2036 */ 2003 else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
2037 if (ip->i_res->rs_requested == 0) 2004 goal = ip->i_goal;
2038 return -ECANCELED; 2005 else
2039 2006 goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
2040 /* Check if we have a multi-block reservation, and if so, claim the
2041 next free block from it. */
2042 if (gfs2_rs_active(ip->i_res)) {
2043 BUG_ON(!ip->i_res->rs_free);
2044 rgd = ip->i_res->rs_rgd;
2045 block = claim_reserved_blks(ip, dinode, nblocks);
2046 } else {
2047 rgd = ip->i_rgd;
2048 2007
2049 if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) 2008 gfs2_rbm_from_block(&rbm, goal);
2050 goal = ip->i_goal - rgd->rd_data0; 2009 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false);
2051 else
2052 goal = rgd->rd_last_alloc;
2053
2054 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
2055
2056 /* Since all blocks are reserved in advance, this shouldn't
2057 happen */
2058 if (blk == BFITNOENT) {
2059 printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
2060 *nblocks);
2061 printk(KERN_WARNING "FULL=%d\n",
2062 test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
2063 goto rgrp_error;
2064 }
2065 2010
2066 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); 2011 if (error == -ENOSPC) {
2012 gfs2_rbm_from_block(&rbm, goal);
2013 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false);
2014 }
2015
2016 /* Since all blocks are reserved in advance, this shouldn't happen */
2017 if (error) {
2018 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n",
2019 (unsigned long long)ip->i_no_addr, error, *nblocks,
2020 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
2021 goto rgrp_error;
2067 } 2022 }
2023
2024 gfs2_alloc_extent(&rbm, dinode, nblocks);
2025 block = gfs2_rbm_to_block(&rbm);
2026 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2027 if (gfs2_rs_active(ip->i_res))
2028 gfs2_adjust_reservation(ip, &rbm, *nblocks);
2068 ndata = *nblocks; 2029 ndata = *nblocks;
2069 if (dinode) 2030 if (dinode)
2070 ndata--; 2031 ndata--;
@@ -2081,22 +2042,22 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2081 brelse(dibh); 2042 brelse(dibh);
2082 } 2043 }
2083 } 2044 }
2084 if (rgd->rd_free < *nblocks) { 2045 if (rbm.rgd->rd_free < *nblocks) {
2085 printk(KERN_WARNING "nblocks=%u\n", *nblocks); 2046 printk(KERN_WARNING "nblocks=%u\n", *nblocks);
2086 goto rgrp_error; 2047 goto rgrp_error;
2087 } 2048 }
2088 2049
2089 rgd->rd_free -= *nblocks; 2050 rbm.rgd->rd_free -= *nblocks;
2090 if (dinode) { 2051 if (dinode) {
2091 rgd->rd_dinodes++; 2052 rbm.rgd->rd_dinodes++;
2092 *generation = rgd->rd_igeneration++; 2053 *generation = rbm.rgd->rd_igeneration++;
2093 if (*generation == 0) 2054 if (*generation == 0)
2094 *generation = rgd->rd_igeneration++; 2055 *generation = rbm.rgd->rd_igeneration++;
2095 } 2056 }
2096 2057
2097 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2058 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1);
2098 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2059 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2099 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2060 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2100 2061
2101 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 2062 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2102 if (dinode) 2063 if (dinode)
@@ -2110,14 +2071,14 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2110 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, 2071 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
2111 ip->i_inode.i_gid); 2072 ip->i_inode.i_gid);
2112 2073
2113 rgd->rd_free_clone -= *nblocks; 2074 rbm.rgd->rd_free_clone -= *nblocks;
2114 trace_gfs2_block_alloc(ip, rgd, block, *nblocks, 2075 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2115 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 2076 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2116 *bn = block; 2077 *bn = block;
2117 return 0; 2078 return 0;
2118 2079
2119rgrp_error: 2080rgrp_error:
2120 gfs2_rgrp_error(rgd); 2081 gfs2_rgrp_error(rbm.rgd);
2121 return -EIO; 2082 return -EIO;
2122} 2083}
2123 2084
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index ca6e26729b8..24077958dcf 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -46,7 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
46 bool dinode, u64 *generation); 46 bool dinode, u64 *generation);
47 47
48extern int gfs2_rs_alloc(struct gfs2_inode *ip); 48extern int gfs2_rs_alloc(struct gfs2_inode *ip);
49extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); 49extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs);
50extern void gfs2_rs_delete(struct gfs2_inode *ip); 50extern void gfs2_rs_delete(struct gfs2_inode *ip);
51extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); 51extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
52extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 52extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
@@ -73,30 +73,10 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
73 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); 73 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
74extern int gfs2_fitrim(struct file *filp, void __user *argp); 74extern int gfs2_fitrim(struct file *filp, void __user *argp);
75 75
76/* This is how to tell if a multi-block reservation is "inplace" reserved: */ 76/* This is how to tell if a reservation is in the rgrp tree: */
77static inline int gfs2_mb_reserved(struct gfs2_inode *ip) 77static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
78{ 78{
79 if (ip->i_res && ip->i_res->rs_requested) 79 return rs && !RB_EMPTY_NODE(&rs->rs_node);
80 return 1;
81 return 0;
82}
83
84/* This is how to tell if a multi-block reservation is in the rgrp tree: */
85static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
86{
87 if (rs && rs->rs_bi)
88 return 1;
89 return 0;
90}
91
92static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
93{
94 return (bi->bi_start * GFS2_NBBY) + blk;
95}
96
97static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
98{
99 return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
100} 80}
101 81
102#endif /* __RGRP_DOT_H__ */ 82#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fc3168f47a1..a8d90f2f576 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1366 val = sdp->sd_tune.gt_statfs_quantum; 1366 val = sdp->sd_tune.gt_statfs_quantum;
1367 if (val != 30) 1367 if (val != 30)
1368 seq_printf(s, ",statfs_quantum=%d", val); 1368 seq_printf(s, ",statfs_quantum=%d", val);
1369 else if (sdp->sd_tune.gt_statfs_slow)
1370 seq_puts(s, ",statfs_quantum=0");
1369 val = sdp->sd_tune.gt_quota_quantum; 1371 val = sdp->sd_tune.gt_quota_quantum;
1370 if (val != 60) 1372 if (val != 60)
1371 seq_printf(s, ",quota_quantum=%d", val); 1373 seq_printf(s, ",quota_quantum=%d", val);
@@ -1543,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode)
1543 1545
1544out_truncate: 1546out_truncate:
1545 gfs2_log_flush(sdp, ip->i_gl); 1547 gfs2_log_flush(sdp, ip->i_gl);
1548 if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
1549 struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
1550 filemap_fdatawrite(metamapping);
1551 filemap_fdatawait(metamapping);
1552 }
1546 write_inode_now(inode, 1); 1553 write_inode_now(inode, 1);
1547 gfs2_ail_flush(ip->i_gl, 0); 1554 gfs2_ail_flush(ip->i_gl, 0);
1548 1555
@@ -1557,7 +1564,7 @@ out_truncate:
1557out_unlock: 1564out_unlock:
1558 /* Error path for case 1 */ 1565 /* Error path for case 1 */
1559 if (gfs2_rs_active(ip->i_res)) 1566 if (gfs2_rs_active(ip->i_res))
1560 gfs2_rs_deltree(ip->i_res); 1567 gfs2_rs_deltree(ip, ip->i_res);
1561 1568
1562 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) 1569 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
1563 gfs2_glock_dq(&ip->i_iopen_gh); 1570 gfs2_glock_dq(&ip->i_iopen_gh);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index a25c252fe41..bbdc78af60c 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -509,10 +509,9 @@ TRACE_EVENT(gfs2_block_alloc,
509/* Keep track of multi-block reservations as they are allocated/freed */ 509/* Keep track of multi-block reservations as they are allocated/freed */
510TRACE_EVENT(gfs2_rs, 510TRACE_EVENT(gfs2_rs,
511 511
512 TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, 512 TP_PROTO(const struct gfs2_blkreserv *rs, u8 func),
513 u8 func),
514 513
515 TP_ARGS(ip, rs, func), 514 TP_ARGS(rs, func),
516 515
517 TP_STRUCT__entry( 516 TP_STRUCT__entry(
518 __field( dev_t, dev ) 517 __field( dev_t, dev )
@@ -526,18 +525,17 @@ TRACE_EVENT(gfs2_rs,
526 ), 525 ),
527 526
528 TP_fast_assign( 527 TP_fast_assign(
529 __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; 528 __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev;
530 __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; 529 __entry->rd_addr = rs->rs_rbm.rgd->rd_addr;
531 __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0; 530 __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone;
532 __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0; 531 __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved;
533 __entry->inum = ip ? ip->i_no_addr : 0; 532 __entry->inum = rs->rs_inum;
534 __entry->start = gfs2_rs_startblk(rs); 533 __entry->start = gfs2_rbm_to_block(&rs->rs_rbm);
535 __entry->free = rs->rs_free; 534 __entry->free = rs->rs_free;
536 __entry->func = func; 535 __entry->func = func;
537 ), 536 ),
538 537
539 TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " 538 TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
540 "f:%lu",
541 MAJOR(__entry->dev), MINOR(__entry->dev), 539 MAJOR(__entry->dev), MINOR(__entry->dev),
542 (unsigned long long)__entry->inum, 540 (unsigned long long)__entry->inum,
543 (unsigned long long)__entry->start, 541 (unsigned long long)__entry->start,
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 41f42cdccbb..bf2ae9aeee7 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -28,11 +28,10 @@ struct gfs2_glock;
28 28
29/* reserve either the number of blocks to be allocated plus the rg header 29/* reserve either the number of blocks to be allocated plus the rg header
30 * block, or all of the blocks in the rg, whichever is smaller */ 30 * block, or all of the blocks in the rg, whichever is smaller */
31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) 31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested)
32{ 32{
33 const struct gfs2_blkreserv *rs = ip->i_res; 33 if (requested < ip->i_rgd->rd_length)
34 if (rs && rs->rs_requested < ip->i_rgd->rd_length) 34 return requested + 1;
35 return rs->rs_requested + 1;
36 return ip->i_rgd->rd_length; 35 return ip->i_rgd->rd_length;
37} 36}
38 37
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 27a0b4a901f..db330e5518c 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
448} 448}
449 449
450/** 450/**
451 * ea_get_unstuffed - actually copies the unstuffed data into the 451 * ea_iter_unstuffed - copies the unstuffed xattr data to/from the
452 * request buffer 452 * request buffer
453 * @ip: The GFS2 inode 453 * @ip: The GFS2 inode
454 * @ea: The extended attribute header structure 454 * @ea: The extended attribute header structure
455 * @data: The data to be copied 455 * @din: The data to be copied in
456 * @dout: The data to be copied out (one of din,dout will be NULL)
456 * 457 *
457 * Returns: errno 458 * Returns: errno
458 */ 459 */
459 460
460static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, 461static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
461 char *data) 462 const char *din, char *dout)
462{ 463{
463 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 464 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
464 struct buffer_head **bh; 465 struct buffer_head **bh;
@@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
467 __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); 468 __be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
468 unsigned int x; 469 unsigned int x;
469 int error = 0; 470 int error = 0;
471 unsigned char *pos;
472 unsigned cp_size;
470 473
471 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); 474 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
472 if (!bh) 475 if (!bh)
@@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
497 goto out; 500 goto out;
498 } 501 }
499 502
500 memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header), 503 pos = bh[x]->b_data + sizeof(struct gfs2_meta_header);
501 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); 504 cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize;
502 505
503 amount -= sdp->sd_jbsize; 506 if (dout) {
504 data += sdp->sd_jbsize; 507 memcpy(dout, pos, cp_size);
508 dout += sdp->sd_jbsize;
509 }
510
511 if (din) {
512 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
513 memcpy(pos, din, cp_size);
514 din += sdp->sd_jbsize;
515 }
505 516
517 amount -= sdp->sd_jbsize;
506 brelse(bh[x]); 518 brelse(bh[x]);
507 } 519 }
508 520
@@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
523 memcpy(data, GFS2_EA2DATA(el->el_ea), len); 535 memcpy(data, GFS2_EA2DATA(el->el_ea), len);
524 return len; 536 return len;
525 } 537 }
526 ret = ea_get_unstuffed(ip, el->el_ea, data); 538 ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data);
527 if (ret < 0) 539 if (ret < 0)
528 return ret; 540 return ret;
529 return len; 541 return len;
@@ -727,7 +739,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
727 goto out_gunlock_q; 739 goto out_gunlock_q;
728 740
729 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), 741 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
730 blks + gfs2_rg_blocks(ip) + 742 blks + gfs2_rg_blocks(ip, blks) +
731 RES_DINODE + RES_STATFS + RES_QUOTA, 0); 743 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
732 if (error) 744 if (error)
733 goto out_ipres; 745 goto out_ipres;
@@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const char *name,
1220 size, flags, type); 1232 size, flags, type);
1221} 1233}
1222 1234
1235
1223static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, 1236static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1224 struct gfs2_ea_header *ea, char *data) 1237 struct gfs2_ea_header *ea, char *data)
1225{ 1238{
1226 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1239 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1227 struct buffer_head **bh;
1228 unsigned int amount = GFS2_EA_DATA_LEN(ea); 1240 unsigned int amount = GFS2_EA_DATA_LEN(ea);
1229 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); 1241 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
1230 __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); 1242 int ret;
1231 unsigned int x;
1232 int error;
1233
1234 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
1235 if (!bh)
1236 return -ENOMEM;
1237
1238 error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1239 if (error)
1240 goto out;
1241
1242 for (x = 0; x < nptrs; x++) {
1243 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
1244 bh + x);
1245 if (error) {
1246 while (x--)
1247 brelse(bh[x]);
1248 goto fail;
1249 }
1250 dataptrs++;
1251 }
1252
1253 for (x = 0; x < nptrs; x++) {
1254 error = gfs2_meta_wait(sdp, bh[x]);
1255 if (error) {
1256 for (; x < nptrs; x++)
1257 brelse(bh[x]);
1258 goto fail;
1259 }
1260 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
1261 for (; x < nptrs; x++)
1262 brelse(bh[x]);
1263 error = -EIO;
1264 goto fail;
1265 }
1266
1267 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
1268
1269 memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data,
1270 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
1271
1272 amount -= sdp->sd_jbsize;
1273 data += sdp->sd_jbsize;
1274
1275 brelse(bh[x]);
1276 }
1277 1243
1278out: 1244 ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1279 kfree(bh); 1245 if (ret)
1280 return error; 1246 return ret;
1281 1247
1282fail: 1248 ret = gfs2_iter_unstuffed(ip, ea, data, NULL);
1283 gfs2_trans_end(sdp); 1249 gfs2_trans_end(sdp);
1284 kfree(bh); 1250
1285 return error; 1251 return ret;
1286} 1252}
1287 1253
1288int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1254int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
diff --git a/fs/libfs.c b/fs/libfs.c
index a74cb1725ac..7cc37ca19cd 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -874,7 +874,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
874EXPORT_SYMBOL_GPL(generic_fh_to_dentry); 874EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
875 875
876/** 876/**
877 * generic_fh_to_dentry - generic helper for the fh_to_parent export operation 877 * generic_fh_to_parent - generic helper for the fh_to_parent export operation
878 * @sb: filesystem to do the file handle conversion on 878 * @sb: filesystem to do the file handle conversion on
879 * @fid: file handle to convert 879 * @fid: file handle to convert
880 * @fh_len: length of the file handle in bytes 880 * @fh_len: length of the file handle in bytes
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index fb1a2bedbe9..8d80c990dff 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -289,7 +289,6 @@ static void nlmsvc_free_block(struct kref *kref)
289 dprintk("lockd: freeing block %p...\n", block); 289 dprintk("lockd: freeing block %p...\n", block);
290 290
291 /* Remove block from file's list of blocks */ 291 /* Remove block from file's list of blocks */
292 mutex_lock(&file->f_mutex);
293 list_del_init(&block->b_flist); 292 list_del_init(&block->b_flist);
294 mutex_unlock(&file->f_mutex); 293 mutex_unlock(&file->f_mutex);
295 294
@@ -303,7 +302,7 @@ static void nlmsvc_free_block(struct kref *kref)
303static void nlmsvc_release_block(struct nlm_block *block) 302static void nlmsvc_release_block(struct nlm_block *block)
304{ 303{
305 if (block != NULL) 304 if (block != NULL)
306 kref_put(&block->b_count, nlmsvc_free_block); 305 kref_put_mutex(&block->b_count, nlmsvc_free_block, &block->b_file->f_mutex);
307} 306}
308 307
309/* 308/*
diff --git a/fs/namespace.c b/fs/namespace.c
index 4d31f73e256..7bdf7907413 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1886,8 +1886,14 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1886 return err; 1886 return err;
1887 1887
1888 err = -EINVAL; 1888 err = -EINVAL;
1889 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt))) 1889 if (unlikely(!check_mnt(real_mount(path->mnt)))) {
1890 goto unlock; 1890 /* that's acceptable only for automounts done in private ns */
1891 if (!(mnt_flags & MNT_SHRINKABLE))
1892 goto unlock;
1893 /* ... and for those we'd better have mountpoint still alive */
1894 if (!real_mount(path->mnt)->mnt_ns)
1895 goto unlock;
1896 }
1891 1897
1892 /* Refuse the same filesystem on the same mount point */ 1898 /* Refuse the same filesystem on the same mount point */
1893 err = -EBUSY; 1899 err = -EBUSY;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 75d6d0a3d32..6a7fcab7ecb 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -287,10 +287,12 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
287 struct inode *inode = file->f_path.dentry->d_inode; 287 struct inode *inode = file->f_path.dentry->d_inode;
288 288
289 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 289 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
290 if (ret != 0)
291 goto out;
290 mutex_lock(&inode->i_mutex); 292 mutex_lock(&inode->i_mutex);
291 ret = nfs_file_fsync_commit(file, start, end, datasync); 293 ret = nfs_file_fsync_commit(file, start, end, datasync);
292 mutex_unlock(&inode->i_mutex); 294 mutex_unlock(&inode->i_mutex);
293 295out:
294 return ret; 296 return ret;
295} 297}
296 298
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c6e895f0fbf..9b47610338f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -154,7 +154,7 @@ static void nfs_zap_caches_locked(struct inode *inode)
154 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 154 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
155 nfsi->attrtimeo_timestamp = jiffies; 155 nfsi->attrtimeo_timestamp = jiffies;
156 156
157 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); 157 memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
158 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) 158 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
159 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; 159 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
160 else 160 else
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d6b3b5f2d77..69322096c32 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -643,7 +643,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
643 u64 cookie, struct page **pages, unsigned int count, int plus) 643 u64 cookie, struct page **pages, unsigned int count, int plus)
644{ 644{
645 struct inode *dir = dentry->d_inode; 645 struct inode *dir = dentry->d_inode;
646 __be32 *verf = NFS_COOKIEVERF(dir); 646 __be32 *verf = NFS_I(dir)->cookieverf;
647 struct nfs3_readdirargs arg = { 647 struct nfs3_readdirargs arg = {
648 .fh = NFS_FH(dir), 648 .fh = NFS_FH(dir),
649 .cookie = cookie, 649 .cookie = cookie,
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index acb65e7887f..eb5eb8eef4d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -96,13 +96,15 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
96 struct inode *inode = file->f_path.dentry->d_inode; 96 struct inode *inode = file->f_path.dentry->d_inode;
97 97
98 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 98 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
99 if (ret != 0)
100 goto out;
99 mutex_lock(&inode->i_mutex); 101 mutex_lock(&inode->i_mutex);
100 ret = nfs_file_fsync_commit(file, start, end, datasync); 102 ret = nfs_file_fsync_commit(file, start, end, datasync);
101 if (!ret && !datasync) 103 if (!ret && !datasync)
102 /* application has asked for meta-data sync */ 104 /* application has asked for meta-data sync */
103 ret = pnfs_layoutcommit_inode(inode, true); 105 ret = pnfs_layoutcommit_inode(inode, true);
104 mutex_unlock(&inode->i_mutex); 106 mutex_unlock(&inode->i_mutex);
105 107out:
106 return ret; 108 return ret;
107} 109}
108 110
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 635274140b1..1e50326d00d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3215,11 +3215,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
3215 dentry->d_parent->d_name.name, 3215 dentry->d_parent->d_name.name,
3216 dentry->d_name.name, 3216 dentry->d_name.name,
3217 (unsigned long long)cookie); 3217 (unsigned long long)cookie);
3218 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 3218 nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
3219 res.pgbase = args.pgbase; 3219 res.pgbase = args.pgbase;
3220 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); 3220 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
3221 if (status >= 0) { 3221 if (status >= 0) {
3222 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 3222 memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
3223 status += args.pgbase; 3223 status += args.pgbase;
3224 } 3224 }
3225 3225
@@ -3653,11 +3653,11 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
3653 && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); 3653 && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
3654} 3654}
3655 3655
3656/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that 3656/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
3657 * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on 3657 * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on
3658 * the stack. 3658 * the stack.
3659 */ 3659 */
3660#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) 3660#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
3661 3661
3662static int buf_to_pages_noslab(const void *buf, size_t buflen, 3662static int buf_to_pages_noslab(const void *buf, size_t buflen,
3663 struct page **pages, unsigned int *pgbase) 3663 struct page **pages, unsigned int *pgbase)
@@ -3668,7 +3668,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen,
3668 spages = pages; 3668 spages = pages;
3669 3669
3670 do { 3670 do {
3671 len = min_t(size_t, PAGE_CACHE_SIZE, buflen); 3671 len = min_t(size_t, PAGE_SIZE, buflen);
3672 newpage = alloc_page(GFP_KERNEL); 3672 newpage = alloc_page(GFP_KERNEL);
3673 3673
3674 if (newpage == NULL) 3674 if (newpage == NULL)
@@ -3739,7 +3739,7 @@ static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size
3739 struct nfs4_cached_acl *acl; 3739 struct nfs4_cached_acl *acl;
3740 size_t buflen = sizeof(*acl) + acl_len; 3740 size_t buflen = sizeof(*acl) + acl_len;
3741 3741
3742 if (pages && buflen <= PAGE_SIZE) { 3742 if (buflen <= PAGE_SIZE) {
3743 acl = kmalloc(buflen, GFP_KERNEL); 3743 acl = kmalloc(buflen, GFP_KERNEL);
3744 if (acl == NULL) 3744 if (acl == NULL)
3745 goto out; 3745 goto out;
@@ -3782,17 +3782,15 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3782 .rpc_argp = &args, 3782 .rpc_argp = &args,
3783 .rpc_resp = &res, 3783 .rpc_resp = &res,
3784 }; 3784 };
3785 int ret = -ENOMEM, npages, i; 3785 unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
3786 size_t acl_len = 0; 3786 int ret = -ENOMEM, i;
3787 3787
3788 npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
3789 /* As long as we're doing a round trip to the server anyway, 3788 /* As long as we're doing a round trip to the server anyway,
3790 * let's be prepared for a page of acl data. */ 3789 * let's be prepared for a page of acl data. */
3791 if (npages == 0) 3790 if (npages == 0)
3792 npages = 1; 3791 npages = 1;
3793 3792 if (npages > ARRAY_SIZE(pages))
3794 /* Add an extra page to handle the bitmap returned */ 3793 return -ERANGE;
3795 npages++;
3796 3794
3797 for (i = 0; i < npages; i++) { 3795 for (i = 0; i < npages; i++) {
3798 pages[i] = alloc_page(GFP_KERNEL); 3796 pages[i] = alloc_page(GFP_KERNEL);
@@ -3808,11 +3806,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3808 args.acl_len = npages * PAGE_SIZE; 3806 args.acl_len = npages * PAGE_SIZE;
3809 args.acl_pgbase = 0; 3807 args.acl_pgbase = 0;
3810 3808
3811 /* Let decode_getfacl know not to fail if the ACL data is larger than
3812 * the page we send as a guess */
3813 if (buf == NULL)
3814 res.acl_flags |= NFS4_ACL_LEN_REQUEST;
3815
3816 dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", 3809 dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
3817 __func__, buf, buflen, npages, args.acl_len); 3810 __func__, buf, buflen, npages, args.acl_len);
3818 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), 3811 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
@@ -3820,20 +3813,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3820 if (ret) 3813 if (ret)
3821 goto out_free; 3814 goto out_free;
3822 3815
3823 acl_len = res.acl_len; 3816 /* Handle the case where the passed-in buffer is too short */
3824 if (acl_len > args.acl_len) 3817 if (res.acl_flags & NFS4_ACL_TRUNC) {
3825 nfs4_write_cached_acl(inode, NULL, 0, acl_len); 3818 /* Did the user only issue a request for the acl length? */
3826 else 3819 if (buf == NULL)
3827 nfs4_write_cached_acl(inode, pages, res.acl_data_offset, 3820 goto out_ok;
3828 acl_len);
3829 if (buf) {
3830 ret = -ERANGE; 3821 ret = -ERANGE;
3831 if (acl_len > buflen) 3822 goto out_free;
3832 goto out_free;
3833 _copy_from_pages(buf, pages, res.acl_data_offset,
3834 acl_len);
3835 } 3823 }
3836 ret = acl_len; 3824 nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
3825 if (buf)
3826 _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len);
3827out_ok:
3828 ret = res.acl_len;
3837out_free: 3829out_free:
3838 for (i = 0; i < npages; i++) 3830 for (i = 0; i < npages; i++)
3839 if (pages[i]) 3831 if (pages[i])
@@ -3891,10 +3883,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3891 .rpc_argp = &arg, 3883 .rpc_argp = &arg,
3892 .rpc_resp = &res, 3884 .rpc_resp = &res,
3893 }; 3885 };
3886 unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
3894 int ret, i; 3887 int ret, i;
3895 3888
3896 if (!nfs4_server_supports_acls(server)) 3889 if (!nfs4_server_supports_acls(server))
3897 return -EOPNOTSUPP; 3890 return -EOPNOTSUPP;
3891 if (npages > ARRAY_SIZE(pages))
3892 return -ERANGE;
3898 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 3893 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3899 if (i < 0) 3894 if (i < 0)
3900 return i; 3895 return i;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1bfbd67c556..8dba6bd4855 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5072,18 +5072,14 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
5072 * are stored with the acl data to handle the problem of 5072 * are stored with the acl data to handle the problem of
5073 * variable length bitmaps.*/ 5073 * variable length bitmaps.*/
5074 res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset; 5074 res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
5075
5076 /* We ignore &savep and don't do consistency checks on
5077 * the attr length. Let userspace figure it out.... */
5078 res->acl_len = attrlen; 5075 res->acl_len = attrlen;
5079 if (attrlen > (xdr->nwords << 2)) { 5076
5080 if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { 5077 /* Check for receive buffer overflow */
5081 /* getxattr interface called with a NULL buf */ 5078 if (res->acl_len > (xdr->nwords << 2) ||
5082 goto out; 5079 res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
5083 } 5080 res->acl_flags |= NFS4_ACL_TRUNC;
5084 dprintk("NFS: acl reply: attrlen %u > page_len %u\n", 5081 dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
5085 attrlen, xdr->nwords << 2); 5082 attrlen, xdr->nwords << 2);
5086 return -EINVAL;
5087 } 5083 }
5088 } else 5084 } else
5089 status = -EOPNOTSUPP; 5085 status = -EOPNOTSUPP;
@@ -6229,7 +6225,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6229 status = decode_open(xdr, res); 6225 status = decode_open(xdr, res);
6230 if (status) 6226 if (status)
6231 goto out; 6227 goto out;
6232 if (decode_getfh(xdr, &res->fh) != 0) 6228 status = decode_getfh(xdr, &res->fh);
6229 if (status)
6233 goto out; 6230 goto out;
6234 decode_getfattr(xdr, res->f_attr, res->server); 6231 decode_getfattr(xdr, res->f_attr, res->server);
6235out: 6232out:
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 239aff7338e..d2c7f5db084 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1537,7 +1537,7 @@ static int nfs_parse_mount_options(char *raw,
1537 1537
1538 /* 1538 /*
1539 * verify that any proto=/mountproto= options match the address 1539 * verify that any proto=/mountproto= options match the address
1540 * familiies in the addr=/mountaddr= options. 1540 * families in the addr=/mountaddr= options.
1541 */ 1541 */
1542 if (protofamily != AF_UNSPEC && 1542 if (protofamily != AF_UNSPEC &&
1543 protofamily != mnt->nfs_server.address.ss_family) 1543 protofamily != mnt->nfs_server.address.ss_family)
@@ -1867,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options,
1867 1867
1868 memcpy(sap, &data->addr, sizeof(data->addr)); 1868 memcpy(sap, &data->addr, sizeof(data->addr));
1869 args->nfs_server.addrlen = sizeof(data->addr); 1869 args->nfs_server.addrlen = sizeof(data->addr);
1870 args->nfs_server.port = ntohs(data->addr.sin_port);
1870 if (!nfs_verify_server_address(sap)) 1871 if (!nfs_verify_server_address(sap))
1871 goto out_no_address; 1872 goto out_no_address;
1872 1873
@@ -2564,6 +2565,7 @@ static int nfs4_validate_mount_data(void *options,
2564 return -EFAULT; 2565 return -EFAULT;
2565 if (!nfs_verify_server_address(sap)) 2566 if (!nfs_verify_server_address(sap))
2566 goto out_no_address; 2567 goto out_no_address;
2568 args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
2567 2569
2568 if (data->auth_flavourlen) { 2570 if (data->auth_flavourlen) {
2569 if (data->auth_flavourlen > 1) 2571 if (data->auth_flavourlen > 1)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index dfafeb2b05a..eb7cc91b725 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -462,9 +462,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
462 462
463 err = ERR_PTR(-ENOMEM); 463 err = ERR_PTR(-ENOMEM);
464 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); 464 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
465 if (h)
466 sysctl_head_finish(h);
467
468 if (!inode) 465 if (!inode)
469 goto out; 466 goto out;
470 467
@@ -473,6 +470,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
473 d_add(dentry, inode); 470 d_add(dentry, inode);
474 471
475out: 472out:
473 if (h)
474 sysctl_head_finish(h);
476 sysctl_head_finish(head); 475 sysctl_head_finish(head);
477 return err; 476 return err;
478} 477}
diff --git a/fs/stat.c b/fs/stat.c
index b6ff11825fc..208039eec6c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL(vfs_getattr);
58int vfs_fstat(unsigned int fd, struct kstat *stat) 58int vfs_fstat(unsigned int fd, struct kstat *stat)
59{ 59{
60 int fput_needed; 60 int fput_needed;
61 struct file *f = fget_light(fd, &fput_needed); 61 struct file *f = fget_raw_light(fd, &fput_needed);
62 int error = -EBADF; 62 int error = -EBADF;
63 63
64 if (f) { 64 if (f) {
@@ -326,7 +326,7 @@ SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf,
326 326
327 327
328/* ---------- LFS-64 ----------- */ 328/* ---------- LFS-64 ----------- */
329#ifdef __ARCH_WANT_STAT64 329#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64)
330 330
331#ifndef INIT_STRUCT_STAT64_PADDING 331#ifndef INIT_STRUCT_STAT64_PADDING
332# define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st)) 332# define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st))
@@ -415,7 +415,7 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
415 return error; 415 return error;
416 return cp_new_stat64(&stat, statbuf); 416 return cp_new_stat64(&stat, statbuf);
417} 417}
418#endif /* __ARCH_WANT_STAT64 */ 418#endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
419 419
420/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ 420/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
421void __inode_add_bytes(struct inode *inode, loff_t bytes) 421void __inode_add_bytes(struct inode *inode, loff_t bytes)
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7f3f7ba3df6..d1c6093fd3d 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -39,20 +39,24 @@
39#include "udf_i.h" 39#include "udf_i.h"
40#include "udf_sb.h" 40#include "udf_sb.h"
41 41
42static int udf_adinicb_readpage(struct file *file, struct page *page) 42static void __udf_adinicb_readpage(struct page *page)
43{ 43{
44 struct inode *inode = page->mapping->host; 44 struct inode *inode = page->mapping->host;
45 char *kaddr; 45 char *kaddr;
46 struct udf_inode_info *iinfo = UDF_I(inode); 46 struct udf_inode_info *iinfo = UDF_I(inode);
47 47
48 BUG_ON(!PageLocked(page));
49
50 kaddr = kmap(page); 48 kaddr = kmap(page);
51 memset(kaddr, 0, PAGE_CACHE_SIZE);
52 memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); 49 memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
50 memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
53 flush_dcache_page(page); 51 flush_dcache_page(page);
54 SetPageUptodate(page); 52 SetPageUptodate(page);
55 kunmap(page); 53 kunmap(page);
54}
55
56static int udf_adinicb_readpage(struct file *file, struct page *page)
57{
58 BUG_ON(!PageLocked(page));
59 __udf_adinicb_readpage(page);
56 unlock_page(page); 60 unlock_page(page);
57 61
58 return 0; 62 return 0;
@@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page,
77 return 0; 81 return 0;
78} 82}
79 83
84static int udf_adinicb_write_begin(struct file *file,
85 struct address_space *mapping, loff_t pos,
86 unsigned len, unsigned flags, struct page **pagep,
87 void **fsdata)
88{
89 struct page *page;
90
91 if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
92 return -EIO;
93 page = grab_cache_page_write_begin(mapping, 0, flags);
94 if (!page)
95 return -ENOMEM;
96 *pagep = page;
97
98 if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
99 __udf_adinicb_readpage(page);
100 return 0;
101}
102
80static int udf_adinicb_write_end(struct file *file, 103static int udf_adinicb_write_end(struct file *file,
81 struct address_space *mapping, 104 struct address_space *mapping,
82 loff_t pos, unsigned len, unsigned copied, 105 loff_t pos, unsigned len, unsigned copied,
@@ -98,8 +121,8 @@ static int udf_adinicb_write_end(struct file *file,
98const struct address_space_operations udf_adinicb_aops = { 121const struct address_space_operations udf_adinicb_aops = {
99 .readpage = udf_adinicb_readpage, 122 .readpage = udf_adinicb_readpage,
100 .writepage = udf_adinicb_writepage, 123 .writepage = udf_adinicb_writepage,
101 .write_begin = simple_write_begin, 124 .write_begin = udf_adinicb_write_begin,
102 .write_end = udf_adinicb_write_end, 125 .write_end = udf_adinicb_write_end,
103}; 126};
104 127
105static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 128static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d7a9dd735e1..933b7930b86 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -96,6 +96,7 @@ xfs_buf_lru_add(
96 atomic_inc(&bp->b_hold); 96 atomic_inc(&bp->b_hold);
97 list_add_tail(&bp->b_lru, &btp->bt_lru); 97 list_add_tail(&bp->b_lru, &btp->bt_lru);
98 btp->bt_lru_nr++; 98 btp->bt_lru_nr++;
99 bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
99 } 100 }
100 spin_unlock(&btp->bt_lru_lock); 101 spin_unlock(&btp->bt_lru_lock);
101} 102}
@@ -154,7 +155,8 @@ xfs_buf_stale(
154 struct xfs_buftarg *btp = bp->b_target; 155 struct xfs_buftarg *btp = bp->b_target;
155 156
156 spin_lock(&btp->bt_lru_lock); 157 spin_lock(&btp->bt_lru_lock);
157 if (!list_empty(&bp->b_lru)) { 158 if (!list_empty(&bp->b_lru) &&
159 !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
158 list_del_init(&bp->b_lru); 160 list_del_init(&bp->b_lru);
159 btp->bt_lru_nr--; 161 btp->bt_lru_nr--;
160 atomic_dec(&bp->b_hold); 162 atomic_dec(&bp->b_hold);
@@ -1501,6 +1503,7 @@ xfs_buftarg_shrink(
1501 */ 1503 */
1502 list_move(&bp->b_lru, &dispose); 1504 list_move(&bp->b_lru, &dispose);
1503 btp->bt_lru_nr--; 1505 btp->bt_lru_nr--;
1506 bp->b_lru_flags |= _XBF_LRU_DISPOSE;
1504 } 1507 }
1505 spin_unlock(&btp->bt_lru_lock); 1508 spin_unlock(&btp->bt_lru_lock);
1506 1509
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d03b73b9604..7c0b6a0a155 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -38,27 +38,28 @@ typedef enum {
38 XBRW_ZERO = 3, /* Zero target memory */ 38 XBRW_ZERO = 3, /* Zero target memory */
39} xfs_buf_rw_t; 39} xfs_buf_rw_t;
40 40
41#define XBF_READ (1 << 0) /* buffer intended for reading from device */ 41#define XBF_READ (1 << 0) /* buffer intended for reading from device */
42#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ 42#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
43#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ 43#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
44#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ 44#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
45#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ 45#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
46#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ 46#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
47 47
48/* I/O hints for the BIO layer */ 48/* I/O hints for the BIO layer */
49#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ 49#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
50#define XBF_FUA (1 << 11)/* force cache write through mode */ 50#define XBF_FUA (1 << 11)/* force cache write through mode */
51#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ 51#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
52 52
53/* flags used only as arguments to access routines */ 53/* flags used only as arguments to access routines */
54#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ 54#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
55#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ 55#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */
56 56
57/* flags used only internally */ 57/* flags used only internally */
58#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ 58#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
59#define _XBF_KMEM (1 << 21)/* backed by heap memory */ 59#define _XBF_KMEM (1 << 21)/* backed by heap memory */
60#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ 60#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
61#define _XBF_COMPOUND (1 << 23)/* compound buffer */ 61#define _XBF_COMPOUND (1 << 23)/* compound buffer */
62#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
62 63
63typedef unsigned int xfs_buf_flags_t; 64typedef unsigned int xfs_buf_flags_t;
64 65
@@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t;
72 { XBF_SYNCIO, "SYNCIO" }, \ 73 { XBF_SYNCIO, "SYNCIO" }, \
73 { XBF_FUA, "FUA" }, \ 74 { XBF_FUA, "FUA" }, \
74 { XBF_FLUSH, "FLUSH" }, \ 75 { XBF_FLUSH, "FLUSH" }, \
75 { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ 76 { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\
76 { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ 77 { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\
77 { _XBF_PAGES, "PAGES" }, \ 78 { _XBF_PAGES, "PAGES" }, \
78 { _XBF_KMEM, "KMEM" }, \ 79 { _XBF_KMEM, "KMEM" }, \
79 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 80 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
80 { _XBF_COMPOUND, "COMPOUND" } 81 { _XBF_COMPOUND, "COMPOUND" }, \
82 { _XBF_LRU_DISPOSE, "LRU_DISPOSE" }
81 83
82typedef struct xfs_buftarg { 84typedef struct xfs_buftarg {
83 dev_t bt_dev; 85 dev_t bt_dev;
@@ -124,7 +126,12 @@ typedef struct xfs_buf {
124 xfs_buf_flags_t b_flags; /* status flags */ 126 xfs_buf_flags_t b_flags; /* status flags */
125 struct semaphore b_sema; /* semaphore for lockables */ 127 struct semaphore b_sema; /* semaphore for lockables */
126 128
129 /*
130 * concurrent access to b_lru and b_lru_flags are protected by
131 * bt_lru_lock and not by b_sema
132 */
127 struct list_head b_lru; /* lru list */ 133 struct list_head b_lru; /* lru list */
134 xfs_buf_flags_t b_lru_flags; /* internal lru status flags */
128 wait_queue_head_t b_waiters; /* unpin waiters */ 135 wait_queue_head_t b_waiters; /* unpin waiters */
129 struct list_head b_list; 136 struct list_head b_list;
130 struct xfs_perag *b_pag; /* contains rbtree root */ 137 struct xfs_perag *b_pag; /* contains rbtree root */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bdaf4cb9f4a..19e2380fb86 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -919,6 +919,7 @@ xfs_fs_put_super(
919 struct xfs_mount *mp = XFS_M(sb); 919 struct xfs_mount *mp = XFS_M(sb);
920 920
921 xfs_filestream_unmount(mp); 921 xfs_filestream_unmount(mp);
922 cancel_delayed_work_sync(&mp->m_sync_work);
922 xfs_unmountfs(mp); 923 xfs_unmountfs(mp);
923 xfs_syncd_stop(mp); 924 xfs_syncd_stop(mp);
924 xfs_freesb(mp); 925 xfs_freesb(mp);