aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-02-26 20:18:52 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-02-26 20:18:52 -0500
commitb305956abc3c50c52598bbf39b7a5f4850058ba8 (patch)
tree9046d97af63236dba36bc3be139c7e0a92e09d41 /fs
parent41630959ed5ce694ec2e8c0f3c69743e011394c8 (diff)
parent398007f863a4af2b4a5a07219c5a617f1a098115 (diff)
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (52 commits) fs/xfs: Correct NULL test xfs: optimize log flushing in xfs_fsync xfs: only clear the suid bit once in xfs_write xfs: kill xfs_bawrite xfs: log changed inodes instead of writing them synchronously xfs: remove invalid barrier optimization from xfs_fsync xfs: kill the unused XFS_QMOPT_* flush flags V2 xfs: Use delay write promotion for dquot flushing xfs: Sort delayed write buffers before dispatch xfs: Don't issue buffer IO direct from AIL push V2 xfs: Use delayed write for inodes rather than async V2 xfs: Make inode reclaim states explicit xfs: more reserved blocks fixups xfs: turn off sign warnings xfs: don't hold onto reserved blocks on remount,ro xfs: quota limit statvfs available blocks xfs: replace KM_LARGE with explicit vmalloc use xfs: cleanup up xfs_log_force calling conventions xfs: kill XLOG_VEC_SET_TYPE xfs: remove duplicate buffer flags ...
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/Makefile2
-rw-r--r--fs/xfs/linux-2.6/kmem.c56
-rw-r--r--fs/xfs/linux-2.6/kmem.h21
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c290
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h52
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c62
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c169
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c186
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h81
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c27
-rw-r--r--fs/xfs/quota/xfs_dquot.c47
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c99
-rw-r--r--fs/xfs/quota/xfs_dquot_item.h4
-rw-r--r--fs/xfs/quota/xfs_qm.c40
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c4
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c49
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_ag.h16
-rw-r--r--fs/xfs/xfs_alloc.c96
-rw-r--r--fs/xfs/xfs_alloc_btree.c9
-rw-r--r--fs/xfs/xfs_attr.c52
-rw-r--r--fs/xfs/xfs_attr.h3
-rw-r--r--fs/xfs/xfs_attr_leaf.c30
-rw-r--r--fs/xfs/xfs_attr_sf.h2
-rw-r--r--fs/xfs/xfs_bmap.c17
-rw-r--r--fs/xfs/xfs_bmap_btree.c2
-rw-r--r--fs/xfs/xfs_bmap_btree.h1
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_buf_item.c72
-rw-r--r--fs/xfs/xfs_da_btree.c4
-rw-r--r--fs/xfs/xfs_da_btree.h5
-rw-r--r--fs/xfs/xfs_dfrag.c43
-rw-r--r--fs/xfs/xfs_dfrag.h3
-rw-r--r--fs/xfs/xfs_dir2.c8
-rw-r--r--fs/xfs/xfs_dir2.h4
-rw-r--r--fs/xfs/xfs_dir2_block.c9
-rw-r--r--fs/xfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/xfs_dir2_node.h2
-rw-r--r--fs/xfs/xfs_dir2_sf.c2
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_filestream.c42
-rw-r--r--fs/xfs/xfs_filestream.h28
-rw-r--r--fs/xfs/xfs_fsops.c42
-rw-r--r--fs/xfs/xfs_ialloc.c62
-rw-r--r--fs/xfs/xfs_iget.c10
-rw-r--r--fs/xfs/xfs_inode.c126
-rw-r--r--fs/xfs/xfs_inode.h11
-rw-r--r--fs/xfs/xfs_inode_item.c129
-rw-r--r--fs/xfs/xfs_inode_item.h6
-rw-r--r--fs/xfs/xfs_itable.c12
-rw-r--r--fs/xfs/xfs_log.c383
-rw-r--r--fs/xfs/xfs_log.h19
-rw-r--r--fs/xfs/xfs_log_priv.h5
-rw-r--r--fs/xfs/xfs_log_recover.c222
-rw-r--r--fs/xfs/xfs_log_recover.h23
-rw-r--r--fs/xfs/xfs_mount.c181
-rw-r--r--fs/xfs/xfs_mount.h27
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_mru_cache.h1
-rw-r--r--fs/xfs/xfs_quota.h9
-rw-r--r--fs/xfs/xfs_rw.c155
-rw-r--r--fs/xfs/xfs_rw.h4
-rw-r--r--fs/xfs/xfs_trans.c7
-rw-r--r--fs/xfs/xfs_trans.h3
-rw-r--r--fs/xfs/xfs_trans_ail.c34
-rw-r--r--fs/xfs/xfs_trans_buf.c27
-rw-r--r--fs/xfs/xfs_types.h4
-rw-r--r--fs/xfs/xfs_vnodeops.c33
-rw-r--r--fs/xfs/xfs_vnodeops.h10
79 files changed, 1666 insertions, 1596 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 56641fe52a23..5c5a366aa332 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,7 +16,7 @@
16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17# 17#
18 18
19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 -funsigned-char 19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6
20 20
21XFS_LINUX := linux-2.6 21XFS_LINUX := linux-2.6
22 22
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 2d3f90afe5f1..bc7405585def 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -16,7 +16,6 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/vmalloc.h>
20#include <linux/highmem.h> 19#include <linux/highmem.h>
21#include <linux/swap.h> 20#include <linux/swap.h>
22#include <linux/blkdev.h> 21#include <linux/blkdev.h>
@@ -24,8 +23,25 @@
24#include "time.h" 23#include "time.h"
25#include "kmem.h" 24#include "kmem.h"
26 25
27#define MAX_VMALLOCS 6 26/*
28#define MAX_SLAB_SIZE 0x20000 27 * Greedy allocation. May fail and may return vmalloced memory.
28 *
29 * Must be freed using kmem_free_large.
30 */
31void *
32kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
33{
34 void *ptr;
35 size_t kmsize = maxsize;
36
37 while (!(ptr = kmem_zalloc_large(kmsize))) {
38 if ((kmsize >>= 1) <= minsize)
39 kmsize = minsize;
40 }
41 if (ptr)
42 *size = kmsize;
43 return ptr;
44}
29 45
30void * 46void *
31kmem_alloc(size_t size, unsigned int __nocast flags) 47kmem_alloc(size_t size, unsigned int __nocast flags)
@@ -34,19 +50,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
34 gfp_t lflags = kmem_flags_convert(flags); 50 gfp_t lflags = kmem_flags_convert(flags);
35 void *ptr; 51 void *ptr;
36 52
37#ifdef DEBUG
38 if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) {
39 printk(KERN_WARNING "Large %s attempt, size=%ld\n",
40 __func__, (long)size);
41 dump_stack();
42 }
43#endif
44
45 do { 53 do {
46 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) 54 ptr = kmalloc(size, lflags);
47 ptr = kmalloc(size, lflags);
48 else
49 ptr = __vmalloc(size, lflags, PAGE_KERNEL);
50 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 55 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
51 return ptr; 56 return ptr;
52 if (!(++retries % 100)) 57 if (!(++retries % 100))
@@ -68,27 +73,6 @@ kmem_zalloc(size_t size, unsigned int __nocast flags)
68 return ptr; 73 return ptr;
69} 74}
70 75
71void *
72kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
73 unsigned int __nocast flags)
74{
75 void *ptr;
76 size_t kmsize = maxsize;
77 unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP;
78
79 while (!(ptr = kmem_zalloc(kmsize, kmflags))) {
80 if ((kmsize <= minsize) && (flags & KM_NOSLEEP))
81 break;
82 if ((kmsize >>= 1) <= minsize) {
83 kmsize = minsize;
84 kmflags = flags;
85 }
86 }
87 if (ptr)
88 *size = kmsize;
89 return ptr;
90}
91
92void 76void
93kmem_free(const void *ptr) 77kmem_free(const void *ptr)
94{ 78{
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 179cbd630f69..f7c8f7a9ea6d 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -21,6 +21,7 @@
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/vmalloc.h>
24 25
25/* 26/*
26 * General memory allocation interfaces 27 * General memory allocation interfaces
@@ -30,7 +31,6 @@
30#define KM_NOSLEEP 0x0002u 31#define KM_NOSLEEP 0x0002u
31#define KM_NOFS 0x0004u 32#define KM_NOFS 0x0004u
32#define KM_MAYFAIL 0x0008u 33#define KM_MAYFAIL 0x0008u
33#define KM_LARGE 0x0010u
34 34
35/* 35/*
36 * We use a special process flag to avoid recursive callbacks into 36 * We use a special process flag to avoid recursive callbacks into
@@ -42,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags)
42{ 42{
43 gfp_t lflags; 43 gfp_t lflags;
44 44
45 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE)); 45 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
46 46
47 if (flags & KM_NOSLEEP) { 47 if (flags & KM_NOSLEEP) {
48 lflags = GFP_ATOMIC | __GFP_NOWARN; 48 lflags = GFP_ATOMIC | __GFP_NOWARN;
@@ -56,10 +56,25 @@ kmem_flags_convert(unsigned int __nocast flags)
56 56
57extern void *kmem_alloc(size_t, unsigned int __nocast); 57extern void *kmem_alloc(size_t, unsigned int __nocast);
58extern void *kmem_zalloc(size_t, unsigned int __nocast); 58extern void *kmem_zalloc(size_t, unsigned int __nocast);
59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
60extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); 59extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
61extern void kmem_free(const void *); 60extern void kmem_free(const void *);
62 61
62static inline void *kmem_zalloc_large(size_t size)
63{
64 void *ptr;
65
66 ptr = vmalloc(size);
67 if (ptr)
68 memset(ptr, 0, size);
69 return ptr;
70}
71static inline void kmem_free_large(void *ptr)
72{
73 vfree(ptr);
74}
75
76extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
77
63/* 78/*
64 * Zone interfaces 79 * Zone interfaces
65 */ 80 */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 883ca5ab8af5..bf85bbe4a9ae 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -106,7 +106,7 @@ xfs_get_acl(struct inode *inode, int type)
106 struct posix_acl *acl; 106 struct posix_acl *acl;
107 struct xfs_acl *xfs_acl; 107 struct xfs_acl *xfs_acl;
108 int len = sizeof(struct xfs_acl); 108 int len = sizeof(struct xfs_acl);
109 char *ea_name; 109 unsigned char *ea_name;
110 int error; 110 int error;
111 111
112 acl = get_cached_acl(inode, type); 112 acl = get_cached_acl(inode, type);
@@ -133,7 +133,8 @@ xfs_get_acl(struct inode *inode, int type)
133 if (!xfs_acl) 133 if (!xfs_acl)
134 return ERR_PTR(-ENOMEM); 134 return ERR_PTR(-ENOMEM);
135 135
136 error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); 136 error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
137 &len, ATTR_ROOT);
137 if (error) { 138 if (error) {
138 /* 139 /*
139 * If the attribute doesn't exist make sure we have a negative 140 * If the attribute doesn't exist make sure we have a negative
@@ -162,7 +163,7 @@ STATIC int
162xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 163xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
163{ 164{
164 struct xfs_inode *ip = XFS_I(inode); 165 struct xfs_inode *ip = XFS_I(inode);
165 char *ea_name; 166 unsigned char *ea_name;
166 int error; 167 int error;
167 168
168 if (S_ISLNK(inode->i_mode)) 169 if (S_ISLNK(inode->i_mode))
@@ -194,7 +195,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
194 (sizeof(struct xfs_acl_entry) * 195 (sizeof(struct xfs_acl_entry) *
195 (XFS_ACL_MAX_ENTRIES - acl->a_count)); 196 (XFS_ACL_MAX_ENTRIES - acl->a_count));
196 197
197 error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, 198 error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
198 len, ATTR_ROOT); 199 len, ATTR_ROOT);
199 200
200 kfree(xfs_acl); 201 kfree(xfs_acl);
@@ -262,7 +263,7 @@ xfs_set_mode(struct inode *inode, mode_t mode)
262} 263}
263 264
264static int 265static int
265xfs_acl_exists(struct inode *inode, char *name) 266xfs_acl_exists(struct inode *inode, unsigned char *name)
266{ 267{
267 int len = sizeof(struct xfs_acl); 268 int len = sizeof(struct xfs_acl);
268 269
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6f3ebb634b8b..6f76ba85f193 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,6 +33,7 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
36 37
37#include "xfs_sb.h" 38#include "xfs_sb.h"
38#include "xfs_inum.h" 39#include "xfs_inum.h"
@@ -1072,22 +1073,30 @@ xfs_buf_ioerror(
1072} 1073}
1073 1074
1074int 1075int
1075xfs_bawrite( 1076xfs_bwrite(
1076 void *mp, 1077 struct xfs_mount *mp,
1077 struct xfs_buf *bp) 1078 struct xfs_buf *bp)
1078{ 1079{
1079 trace_xfs_buf_bawrite(bp, _RET_IP_); 1080 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
1081 int error = 0;
1080 1082
1081 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 1083 bp->b_strat = xfs_bdstrat_cb;
1084 bp->b_mount = mp;
1085 bp->b_flags |= XBF_WRITE;
1086 if (!iowait)
1087 bp->b_flags |= _XBF_RUN_QUEUES;
1082 1088
1083 xfs_buf_delwri_dequeue(bp); 1089 xfs_buf_delwri_dequeue(bp);
1090 xfs_buf_iostrategy(bp);
1084 1091
1085 bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); 1092 if (iowait) {
1086 bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); 1093 error = xfs_buf_iowait(bp);
1094 if (error)
1095 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1096 xfs_buf_relse(bp);
1097 }
1087 1098
1088 bp->b_mount = mp; 1099 return error;
1089 bp->b_strat = xfs_bdstrat_cb;
1090 return xfs_bdstrat_cb(bp);
1091} 1100}
1092 1101
1093void 1102void
@@ -1106,6 +1115,126 @@ xfs_bdwrite(
1106 xfs_buf_delwri_queue(bp, 1); 1115 xfs_buf_delwri_queue(bp, 1);
1107} 1116}
1108 1117
1118/*
1119 * Called when we want to stop a buffer from getting written or read.
1120 * We attach the EIO error, muck with its flags, and call biodone
1121 * so that the proper iodone callbacks get called.
1122 */
1123STATIC int
1124xfs_bioerror(
1125 xfs_buf_t *bp)
1126{
1127#ifdef XFSERRORDEBUG
1128 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
1129#endif
1130
1131 /*
1132 * No need to wait until the buffer is unpinned, we aren't flushing it.
1133 */
1134 XFS_BUF_ERROR(bp, EIO);
1135
1136 /*
1137 * We're calling biodone, so delete XBF_DONE flag.
1138 */
1139 XFS_BUF_UNREAD(bp);
1140 XFS_BUF_UNDELAYWRITE(bp);
1141 XFS_BUF_UNDONE(bp);
1142 XFS_BUF_STALE(bp);
1143
1144 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1145 xfs_biodone(bp);
1146
1147 return EIO;
1148}
1149
1150/*
1151 * Same as xfs_bioerror, except that we are releasing the buffer
1152 * here ourselves, and avoiding the biodone call.
1153 * This is meant for userdata errors; metadata bufs come with
1154 * iodone functions attached, so that we can track down errors.
1155 */
1156STATIC int
1157xfs_bioerror_relse(
1158 struct xfs_buf *bp)
1159{
1160 int64_t fl = XFS_BUF_BFLAGS(bp);
1161 /*
1162 * No need to wait until the buffer is unpinned.
1163 * We aren't flushing it.
1164 *
1165 * chunkhold expects B_DONE to be set, whether
1166 * we actually finish the I/O or not. We don't want to
1167 * change that interface.
1168 */
1169 XFS_BUF_UNREAD(bp);
1170 XFS_BUF_UNDELAYWRITE(bp);
1171 XFS_BUF_DONE(bp);
1172 XFS_BUF_STALE(bp);
1173 XFS_BUF_CLR_IODONE_FUNC(bp);
1174 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1175 if (!(fl & XBF_ASYNC)) {
1176 /*
1177 * Mark b_error and B_ERROR _both_.
1178 * Lot's of chunkcache code assumes that.
1179 * There's no reason to mark error for
1180 * ASYNC buffers.
1181 */
1182 XFS_BUF_ERROR(bp, EIO);
1183 XFS_BUF_FINISH_IOWAIT(bp);
1184 } else {
1185 xfs_buf_relse(bp);
1186 }
1187
1188 return EIO;
1189}
1190
1191
1192/*
1193 * All xfs metadata buffers except log state machine buffers
1194 * get this attached as their b_bdstrat callback function.
1195 * This is so that we can catch a buffer
1196 * after prematurely unpinning it to forcibly shutdown the filesystem.
1197 */
1198int
1199xfs_bdstrat_cb(
1200 struct xfs_buf *bp)
1201{
1202 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1203 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1204 /*
1205 * Metadata write that didn't get logged but
1206 * written delayed anyway. These aren't associated
1207 * with a transaction, and can be ignored.
1208 */
1209 if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
1210 return xfs_bioerror_relse(bp);
1211 else
1212 return xfs_bioerror(bp);
1213 }
1214
1215 xfs_buf_iorequest(bp);
1216 return 0;
1217}
1218
1219/*
1220 * Wrapper around bdstrat so that we can stop data from going to disk in case
1221 * we are shutting down the filesystem. Typically user data goes thru this
1222 * path; one of the exceptions is the superblock.
1223 */
1224void
1225xfsbdstrat(
1226 struct xfs_mount *mp,
1227 struct xfs_buf *bp)
1228{
1229 if (XFS_FORCED_SHUTDOWN(mp)) {
1230 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1231 xfs_bioerror_relse(bp);
1232 return;
1233 }
1234
1235 xfs_buf_iorequest(bp);
1236}
1237
1109STATIC void 1238STATIC void
1110_xfs_buf_ioend( 1239_xfs_buf_ioend(
1111 xfs_buf_t *bp, 1240 xfs_buf_t *bp,
@@ -1324,7 +1453,7 @@ xfs_buf_iomove(
1324 xfs_buf_t *bp, /* buffer to process */ 1453 xfs_buf_t *bp, /* buffer to process */
1325 size_t boff, /* starting buffer offset */ 1454 size_t boff, /* starting buffer offset */
1326 size_t bsize, /* length to copy */ 1455 size_t bsize, /* length to copy */
1327 caddr_t data, /* data address */ 1456 void *data, /* data address */
1328 xfs_buf_rw_t mode) /* read/write/zero flag */ 1457 xfs_buf_rw_t mode) /* read/write/zero flag */
1329{ 1458{
1330 size_t bend, cpoff, csize; 1459 size_t bend, cpoff, csize;
@@ -1406,8 +1535,8 @@ xfs_alloc_bufhash(
1406 1535
1407 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ 1536 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
1408 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; 1537 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
1409 btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * 1538 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
1410 sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); 1539 sizeof(xfs_bufhash_t));
1411 for (i = 0; i < (1 << btp->bt_hashshift); i++) { 1540 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1412 spin_lock_init(&btp->bt_hash[i].bh_lock); 1541 spin_lock_init(&btp->bt_hash[i].bh_lock);
1413 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); 1542 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
@@ -1418,7 +1547,7 @@ STATIC void
1418xfs_free_bufhash( 1547xfs_free_bufhash(
1419 xfs_buftarg_t *btp) 1548 xfs_buftarg_t *btp)
1420{ 1549{
1421 kmem_free(btp->bt_hash); 1550 kmem_free_large(btp->bt_hash);
1422 btp->bt_hash = NULL; 1551 btp->bt_hash = NULL;
1423} 1552}
1424 1553
@@ -1623,6 +1752,11 @@ xfs_buf_delwri_queue(
1623 list_del(&bp->b_list); 1752 list_del(&bp->b_list);
1624 } 1753 }
1625 1754
1755 if (list_empty(dwq)) {
1756 /* start xfsbufd as it is about to have something to do */
1757 wake_up_process(bp->b_target->bt_task);
1758 }
1759
1626 bp->b_flags |= _XBF_DELWRI_Q; 1760 bp->b_flags |= _XBF_DELWRI_Q;
1627 list_add_tail(&bp->b_list, dwq); 1761 list_add_tail(&bp->b_list, dwq);
1628 bp->b_queuetime = jiffies; 1762 bp->b_queuetime = jiffies;
@@ -1654,6 +1788,35 @@ xfs_buf_delwri_dequeue(
1654 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); 1788 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
1655} 1789}
1656 1790
1791/*
1792 * If a delwri buffer needs to be pushed before it has aged out, then promote
1793 * it to the head of the delwri queue so that it will be flushed on the next
1794 * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
1795 * than the age currently needed to flush the buffer. Hence the next time the
1796 * xfsbufd sees it is guaranteed to be considered old enough to flush.
1797 */
1798void
1799xfs_buf_delwri_promote(
1800 struct xfs_buf *bp)
1801{
1802 struct xfs_buftarg *btp = bp->b_target;
1803 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
1804
1805 ASSERT(bp->b_flags & XBF_DELWRI);
1806 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1807
1808 /*
1809 * Check the buffer age before locking the delayed write queue as we
1810 * don't need to promote buffers that are already past the flush age.
1811 */
1812 if (bp->b_queuetime < jiffies - age)
1813 return;
1814 bp->b_queuetime = jiffies - age;
1815 spin_lock(&btp->bt_delwrite_lock);
1816 list_move(&bp->b_list, &btp->bt_delwrite_queue);
1817 spin_unlock(&btp->bt_delwrite_lock);
1818}
1819
1657STATIC void 1820STATIC void
1658xfs_buf_runall_queues( 1821xfs_buf_runall_queues(
1659 struct workqueue_struct *queue) 1822 struct workqueue_struct *queue)
@@ -1672,6 +1835,8 @@ xfsbufd_wakeup(
1672 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { 1835 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1673 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) 1836 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1674 continue; 1837 continue;
1838 if (list_empty(&btp->bt_delwrite_queue))
1839 continue;
1675 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); 1840 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1676 wake_up_process(btp->bt_task); 1841 wake_up_process(btp->bt_task);
1677 } 1842 }
@@ -1722,20 +1887,53 @@ xfs_buf_delwri_split(
1722 1887
1723} 1888}
1724 1889
1890/*
1891 * Compare function is more complex than it needs to be because
1892 * the return value is only 32 bits and we are doing comparisons
1893 * on 64 bit values
1894 */
1895static int
1896xfs_buf_cmp(
1897 void *priv,
1898 struct list_head *a,
1899 struct list_head *b)
1900{
1901 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1902 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1903 xfs_daddr_t diff;
1904
1905 diff = ap->b_bn - bp->b_bn;
1906 if (diff < 0)
1907 return -1;
1908 if (diff > 0)
1909 return 1;
1910 return 0;
1911}
1912
1913void
1914xfs_buf_delwri_sort(
1915 xfs_buftarg_t *target,
1916 struct list_head *list)
1917{
1918 list_sort(NULL, list, xfs_buf_cmp);
1919}
1920
1725STATIC int 1921STATIC int
1726xfsbufd( 1922xfsbufd(
1727 void *data) 1923 void *data)
1728{ 1924{
1729 struct list_head tmp; 1925 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1730 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1731 int count;
1732 xfs_buf_t *bp;
1733 1926
1734 current->flags |= PF_MEMALLOC; 1927 current->flags |= PF_MEMALLOC;
1735 1928
1736 set_freezable(); 1929 set_freezable();
1737 1930
1738 do { 1931 do {
1932 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1933 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1934 int count = 0;
1935 struct list_head tmp;
1936
1739 if (unlikely(freezing(current))) { 1937 if (unlikely(freezing(current))) {
1740 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1938 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1741 refrigerator(); 1939 refrigerator();
@@ -1743,17 +1941,16 @@ xfsbufd(
1743 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1941 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1744 } 1942 }
1745 1943
1746 schedule_timeout_interruptible( 1944 /* sleep for a long time if there is nothing to do. */
1747 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1945 if (list_empty(&target->bt_delwrite_queue))
1946 tout = MAX_SCHEDULE_TIMEOUT;
1947 schedule_timeout_interruptible(tout);
1748 1948
1749 xfs_buf_delwri_split(target, &tmp, 1949 xfs_buf_delwri_split(target, &tmp, age);
1750 xfs_buf_age_centisecs * msecs_to_jiffies(10)); 1950 list_sort(NULL, &tmp, xfs_buf_cmp);
1751
1752 count = 0;
1753 while (!list_empty(&tmp)) { 1951 while (!list_empty(&tmp)) {
1754 bp = list_entry(tmp.next, xfs_buf_t, b_list); 1952 struct xfs_buf *bp;
1755 ASSERT(target == bp->b_target); 1953 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1756
1757 list_del_init(&bp->b_list); 1954 list_del_init(&bp->b_list);
1758 xfs_buf_iostrategy(bp); 1955 xfs_buf_iostrategy(bp);
1759 count++; 1956 count++;
@@ -1779,42 +1976,45 @@ xfs_flush_buftarg(
1779 xfs_buftarg_t *target, 1976 xfs_buftarg_t *target,
1780 int wait) 1977 int wait)
1781{ 1978{
1782 struct list_head tmp; 1979 xfs_buf_t *bp;
1783 xfs_buf_t *bp, *n;
1784 int pincount = 0; 1980 int pincount = 0;
1981 LIST_HEAD(tmp_list);
1982 LIST_HEAD(wait_list);
1785 1983
1786 xfs_buf_runall_queues(xfsconvertd_workqueue); 1984 xfs_buf_runall_queues(xfsconvertd_workqueue);
1787 xfs_buf_runall_queues(xfsdatad_workqueue); 1985 xfs_buf_runall_queues(xfsdatad_workqueue);
1788 xfs_buf_runall_queues(xfslogd_workqueue); 1986 xfs_buf_runall_queues(xfslogd_workqueue);
1789 1987
1790 set_bit(XBT_FORCE_FLUSH, &target->bt_flags); 1988 set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1791 pincount = xfs_buf_delwri_split(target, &tmp, 0); 1989 pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
1792 1990
1793 /* 1991 /*
1794 * Dropped the delayed write list lock, now walk the temporary list 1992 * Dropped the delayed write list lock, now walk the temporary list.
1993 * All I/O is issued async and then if we need to wait for completion
1994 * we do that after issuing all the IO.
1795 */ 1995 */
1796 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1996 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1997 while (!list_empty(&tmp_list)) {
1998 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1797 ASSERT(target == bp->b_target); 1999 ASSERT(target == bp->b_target);
1798 if (wait) 2000 list_del_init(&bp->b_list);
2001 if (wait) {
1799 bp->b_flags &= ~XBF_ASYNC; 2002 bp->b_flags &= ~XBF_ASYNC;
1800 else 2003 list_add(&bp->b_list, &wait_list);
1801 list_del_init(&bp->b_list); 2004 }
1802
1803 xfs_buf_iostrategy(bp); 2005 xfs_buf_iostrategy(bp);
1804 } 2006 }
1805 2007
1806 if (wait) 2008 if (wait) {
2009 /* Expedite and wait for IO to complete. */
1807 blk_run_address_space(target->bt_mapping); 2010 blk_run_address_space(target->bt_mapping);
2011 while (!list_empty(&wait_list)) {
2012 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1808 2013
1809 /* 2014 list_del_init(&bp->b_list);
1810 * Remaining list items must be flushed before returning 2015 xfs_iowait(bp);
1811 */ 2016 xfs_buf_relse(bp);
1812 while (!list_empty(&tmp)) { 2017 }
1813 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1814
1815 list_del_init(&bp->b_list);
1816 xfs_iowait(bp);
1817 xfs_buf_relse(bp);
1818 } 2018 }
1819 2019
1820 return pincount; 2020 return pincount;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a34c7b54822d..386e7361e50e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -232,13 +232,17 @@ extern void xfs_buf_lock(xfs_buf_t *);
232extern void xfs_buf_unlock(xfs_buf_t *); 232extern void xfs_buf_unlock(xfs_buf_t *);
233 233
234/* Buffer Read and Write Routines */ 234/* Buffer Read and Write Routines */
235extern int xfs_bawrite(void *mp, xfs_buf_t *bp); 235extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
236extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); 236extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
237
238extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
239extern int xfs_bdstrat_cb(struct xfs_buf *);
240
237extern void xfs_buf_ioend(xfs_buf_t *, int); 241extern void xfs_buf_ioend(xfs_buf_t *, int);
238extern void xfs_buf_ioerror(xfs_buf_t *, int); 242extern void xfs_buf_ioerror(xfs_buf_t *, int);
239extern int xfs_buf_iorequest(xfs_buf_t *); 243extern int xfs_buf_iorequest(xfs_buf_t *);
240extern int xfs_buf_iowait(xfs_buf_t *); 244extern int xfs_buf_iowait(xfs_buf_t *);
241extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, 245extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
242 xfs_buf_rw_t); 246 xfs_buf_rw_t);
243 247
244static inline int xfs_buf_iostrategy(xfs_buf_t *bp) 248static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
@@ -261,6 +265,7 @@ extern int xfs_buf_ispin(xfs_buf_t *);
261 265
262/* Delayed Write Buffer Routines */ 266/* Delayed Write Buffer Routines */
263extern void xfs_buf_delwri_dequeue(xfs_buf_t *); 267extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
268extern void xfs_buf_delwri_promote(xfs_buf_t *);
264 269
265/* Buffer Daemon Setup Routines */ 270/* Buffer Daemon Setup Routines */
266extern int xfs_buf_init(void); 271extern int xfs_buf_init(void);
@@ -270,33 +275,19 @@ extern void xfs_buf_terminate(void);
270 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) 275 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
271 276
272 277
273#define XFS_B_ASYNC XBF_ASYNC
274#define XFS_B_DELWRI XBF_DELWRI
275#define XFS_B_READ XBF_READ
276#define XFS_B_WRITE XBF_WRITE
277#define XFS_B_STALE XBF_STALE
278
279#define XFS_BUF_TRYLOCK XBF_TRYLOCK
280#define XFS_INCORE_TRYLOCK XBF_TRYLOCK
281#define XFS_BUF_LOCK XBF_LOCK
282#define XFS_BUF_MAPPED XBF_MAPPED
283
284#define BUF_BUSY XBF_DONT_BLOCK
285
286#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) 278#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
287#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ 279#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
288 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) 280 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
289 281
290#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) 282#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE)
291#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) 283#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
292#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) 284#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
293#define XFS_BUF_SUPER_STALE(bp) do { \ 285#define XFS_BUF_SUPER_STALE(bp) do { \
294 XFS_BUF_STALE(bp); \ 286 XFS_BUF_STALE(bp); \
295 xfs_buf_delwri_dequeue(bp); \ 287 xfs_buf_delwri_dequeue(bp); \
296 XFS_BUF_DONE(bp); \ 288 XFS_BUF_DONE(bp); \
297 } while (0) 289 } while (0)
298 290
299#define XFS_BUF_MANAGE XBF_FS_MANAGED
300#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) 291#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
301 292
302#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) 293#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
@@ -385,31 +376,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
385 376
386#define xfs_biomove(bp, off, len, data, rw) \ 377#define xfs_biomove(bp, off, len, data, rw) \
387 xfs_buf_iomove((bp), (off), (len), (data), \ 378 xfs_buf_iomove((bp), (off), (len), (data), \
388 ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) 379 ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
389 380
390#define xfs_biozero(bp, off, len) \ 381#define xfs_biozero(bp, off, len) \
391 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 382 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
392 383
393
394static inline int XFS_bwrite(xfs_buf_t *bp)
395{
396 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
397 int error = 0;
398
399 if (!iowait)
400 bp->b_flags |= _XBF_RUN_QUEUES;
401
402 xfs_buf_delwri_dequeue(bp);
403 xfs_buf_iostrategy(bp);
404 if (iowait) {
405 error = xfs_buf_iowait(bp);
406 xfs_buf_relse(bp);
407 }
408 return error;
409}
410
411#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
412
413#define xfs_iowait(bp) xfs_buf_iowait(bp) 384#define xfs_iowait(bp) xfs_buf_iowait(bp)
414 385
415#define xfs_baread(target, rablkno, ralen) \ 386#define xfs_baread(target, rablkno, ralen) \
@@ -424,6 +395,7 @@ extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
424extern void xfs_wait_buftarg(xfs_buftarg_t *); 395extern void xfs_wait_buftarg(xfs_buftarg_t *);
425extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 396extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
426extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 397extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
398
427#ifdef CONFIG_KDB_MODULES 399#ifdef CONFIG_KDB_MODULES
428extern struct list_head *xfs_get_buftarg_list(void); 400extern struct list_head *xfs_get_buftarg_list(void);
429#endif 401#endif
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 7501b85fd860..b6918d76bc7b 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -79,7 +79,7 @@ xfs_flush_pages(
79 xfs_iflags_clear(ip, XFS_ITRUNCATED); 79 xfs_iflags_clear(ip, XFS_ITRUNCATED);
80 ret = -filemap_fdatawrite(mapping); 80 ret = -filemap_fdatawrite(mapping);
81 } 81 }
82 if (flags & XFS_B_ASYNC) 82 if (flags & XBF_ASYNC)
83 return ret; 83 return ret;
84 ret2 = xfs_wait_on_pages(ip, first, last); 84 ret2 = xfs_wait_on_pages(ip, first, last);
85 if (!ret) 85 if (!ret)
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a034cf624437..4ea1ee18aded 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -447,12 +447,12 @@ xfs_attrlist_by_handle(
447int 447int
448xfs_attrmulti_attr_get( 448xfs_attrmulti_attr_get(
449 struct inode *inode, 449 struct inode *inode,
450 char *name, 450 unsigned char *name,
451 char __user *ubuf, 451 unsigned char __user *ubuf,
452 __uint32_t *len, 452 __uint32_t *len,
453 __uint32_t flags) 453 __uint32_t flags)
454{ 454{
455 char *kbuf; 455 unsigned char *kbuf;
456 int error = EFAULT; 456 int error = EFAULT;
457 457
458 if (*len > XATTR_SIZE_MAX) 458 if (*len > XATTR_SIZE_MAX)
@@ -476,12 +476,12 @@ xfs_attrmulti_attr_get(
476int 476int
477xfs_attrmulti_attr_set( 477xfs_attrmulti_attr_set(
478 struct inode *inode, 478 struct inode *inode,
479 char *name, 479 unsigned char *name,
480 const char __user *ubuf, 480 const unsigned char __user *ubuf,
481 __uint32_t len, 481 __uint32_t len,
482 __uint32_t flags) 482 __uint32_t flags)
483{ 483{
484 char *kbuf; 484 unsigned char *kbuf;
485 int error = EFAULT; 485 int error = EFAULT;
486 486
487 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 487 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -501,7 +501,7 @@ xfs_attrmulti_attr_set(
501int 501int
502xfs_attrmulti_attr_remove( 502xfs_attrmulti_attr_remove(
503 struct inode *inode, 503 struct inode *inode,
504 char *name, 504 unsigned char *name,
505 __uint32_t flags) 505 __uint32_t flags)
506{ 506{
507 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 507 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -519,7 +519,7 @@ xfs_attrmulti_by_handle(
519 xfs_fsop_attrmulti_handlereq_t am_hreq; 519 xfs_fsop_attrmulti_handlereq_t am_hreq;
520 struct dentry *dentry; 520 struct dentry *dentry;
521 unsigned int i, size; 521 unsigned int i, size;
522 char *attr_name; 522 unsigned char *attr_name;
523 523
524 if (!capable(CAP_SYS_ADMIN)) 524 if (!capable(CAP_SYS_ADMIN))
525 return -XFS_ERROR(EPERM); 525 return -XFS_ERROR(EPERM);
@@ -547,7 +547,7 @@ xfs_attrmulti_by_handle(
547 547
548 error = 0; 548 error = 0;
549 for (i = 0; i < am_hreq.opcount; i++) { 549 for (i = 0; i < am_hreq.opcount; i++) {
550 ops[i].am_error = strncpy_from_user(attr_name, 550 ops[i].am_error = strncpy_from_user((char *)attr_name,
551 ops[i].am_attrname, MAXNAMELEN); 551 ops[i].am_attrname, MAXNAMELEN);
552 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 552 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
553 error = -ERANGE; 553 error = -ERANGE;
@@ -1431,6 +1431,9 @@ xfs_file_ioctl(
1431 if (!capable(CAP_SYS_ADMIN)) 1431 if (!capable(CAP_SYS_ADMIN))
1432 return -EPERM; 1432 return -EPERM;
1433 1433
1434 if (mp->m_flags & XFS_MOUNT_RDONLY)
1435 return -XFS_ERROR(EROFS);
1436
1434 if (copy_from_user(&inout, arg, sizeof(inout))) 1437 if (copy_from_user(&inout, arg, sizeof(inout)))
1435 return -XFS_ERROR(EFAULT); 1438 return -XFS_ERROR(EFAULT);
1436 1439
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
index 7bd7c6afc1eb..d56173b34a2a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl.h
@@ -45,23 +45,23 @@ xfs_readlink_by_handle(
45extern int 45extern int
46xfs_attrmulti_attr_get( 46xfs_attrmulti_attr_get(
47 struct inode *inode, 47 struct inode *inode,
48 char *name, 48 unsigned char *name,
49 char __user *ubuf, 49 unsigned char __user *ubuf,
50 __uint32_t *len, 50 __uint32_t *len,
51 __uint32_t flags); 51 __uint32_t flags);
52 52
53extern int 53extern int
54 xfs_attrmulti_attr_set( 54xfs_attrmulti_attr_set(
55 struct inode *inode, 55 struct inode *inode,
56 char *name, 56 unsigned char *name,
57 const char __user *ubuf, 57 const unsigned char __user *ubuf,
58 __uint32_t len, 58 __uint32_t len,
59 __uint32_t flags); 59 __uint32_t flags);
60 60
61extern int 61extern int
62xfs_attrmulti_attr_remove( 62xfs_attrmulti_attr_remove(
63 struct inode *inode, 63 struct inode *inode,
64 char *name, 64 unsigned char *name,
65 __uint32_t flags); 65 __uint32_t flags);
66 66
67extern struct dentry * 67extern struct dentry *
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index be1527b1670c..0bf6d61f0528 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -411,7 +411,7 @@ xfs_compat_attrmulti_by_handle(
411 compat_xfs_fsop_attrmulti_handlereq_t am_hreq; 411 compat_xfs_fsop_attrmulti_handlereq_t am_hreq;
412 struct dentry *dentry; 412 struct dentry *dentry;
413 unsigned int i, size; 413 unsigned int i, size;
414 char *attr_name; 414 unsigned char *attr_name;
415 415
416 if (!capable(CAP_SYS_ADMIN)) 416 if (!capable(CAP_SYS_ADMIN))
417 return -XFS_ERROR(EPERM); 417 return -XFS_ERROR(EPERM);
@@ -440,7 +440,7 @@ xfs_compat_attrmulti_by_handle(
440 440
441 error = 0; 441 error = 0;
442 for (i = 0; i < am_hreq.opcount; i++) { 442 for (i = 0; i < am_hreq.opcount; i++) {
443 ops[i].am_error = strncpy_from_user(attr_name, 443 ops[i].am_error = strncpy_from_user((char *)attr_name,
444 compat_ptr(ops[i].am_attrname), 444 compat_ptr(ops[i].am_attrname),
445 MAXNAMELEN); 445 MAXNAMELEN);
446 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 446 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 225946012d0b..e8566bbf0f00 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -140,10 +140,10 @@ xfs_init_security(
140 struct xfs_inode *ip = XFS_I(inode); 140 struct xfs_inode *ip = XFS_I(inode);
141 size_t length; 141 size_t length;
142 void *value; 142 void *value;
143 char *name; 143 unsigned char *name;
144 int error; 144 int error;
145 145
146 error = security_inode_init_security(inode, dir, &name, 146 error = security_inode_init_security(inode, dir, (char **)&name,
147 &value, &length); 147 &value, &length);
148 if (error) { 148 if (error) {
149 if (error == -EOPNOTSUPP) 149 if (error == -EOPNOTSUPP)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 0d32457abef1..eac6f80d786d 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -630,18 +630,9 @@ start:
630 * by root. This keeps people from modifying setuid and 630 * by root. This keeps people from modifying setuid and
631 * setgid binaries. 631 * setgid binaries.
632 */ 632 */
633 633 error = -file_remove_suid(file);
634 if (((xip->i_d.di_mode & S_ISUID) || 634 if (unlikely(error))
635 ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == 635 goto out_unlock_internal;
636 (S_ISGID | S_IXGRP))) &&
637 !capable(CAP_FSETID)) {
638 error = xfs_write_clear_setuid(xip);
639 if (likely(!error))
640 error = -file_remove_suid(file);
641 if (unlikely(error)) {
642 goto out_unlock_internal;
643 }
644 }
645 636
646 /* We can write back this queue in page reclaim */ 637 /* We can write back this queue in page reclaim */
647 current->backing_dev_info = mapping->backing_dev_info; 638 current->backing_dev_info = mapping->backing_dev_info;
@@ -784,53 +775,6 @@ write_retry:
784} 775}
785 776
786/* 777/*
787 * All xfs metadata buffers except log state machine buffers
788 * get this attached as their b_bdstrat callback function.
789 * This is so that we can catch a buffer
790 * after prematurely unpinning it to forcibly shutdown the filesystem.
791 */
792int
793xfs_bdstrat_cb(struct xfs_buf *bp)
794{
795 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
796 trace_xfs_bdstrat_shut(bp, _RET_IP_);
797 /*
798 * Metadata write that didn't get logged but
799 * written delayed anyway. These aren't associated
800 * with a transaction, and can be ignored.
801 */
802 if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
803 (XFS_BUF_ISREAD(bp)) == 0)
804 return (xfs_bioerror_relse(bp));
805 else
806 return (xfs_bioerror(bp));
807 }
808
809 xfs_buf_iorequest(bp);
810 return 0;
811}
812
813/*
814 * Wrapper around bdstrat so that we can stop data from going to disk in case
815 * we are shutting down the filesystem. Typically user data goes thru this
816 * path; one of the exceptions is the superblock.
817 */
818void
819xfsbdstrat(
820 struct xfs_mount *mp,
821 struct xfs_buf *bp)
822{
823 ASSERT(mp);
824 if (!XFS_FORCED_SHUTDOWN(mp)) {
825 xfs_buf_iorequest(bp);
826 return;
827 }
828
829 trace_xfs_bdstrat_shut(bp, _RET_IP_);
830 xfs_bioerror_relse(bp);
831}
832
833/*
834 * If the underlying (data/log/rt) device is readonly, there are some 778 * If the underlying (data/log/rt) device is readonly, there are some
835 * operations that cannot proceed. 779 * operations that cannot proceed.
836 */ 780 */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index d1f7789c7ffb..342ae8c0d011 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -22,9 +22,6 @@ struct xfs_mount;
22struct xfs_inode; 22struct xfs_inode;
23struct xfs_buf; 23struct xfs_buf;
24 24
25/* errors from xfsbdstrat() must be extracted from the buffer */
26extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
27extern int xfs_bdstrat_cb(struct xfs_buf *);
28extern int xfs_dev_is_read_only(struct xfs_mount *, char *); 25extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
29 26
30extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); 27extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 77414db10dc2..25ea2408118f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -877,12 +877,11 @@ xfsaild(
877{ 877{
878 struct xfs_ail *ailp = data; 878 struct xfs_ail *ailp = data;
879 xfs_lsn_t last_pushed_lsn = 0; 879 xfs_lsn_t last_pushed_lsn = 0;
880 long tout = 0; 880 long tout = 0; /* milliseconds */
881 881
882 while (!kthread_should_stop()) { 882 while (!kthread_should_stop()) {
883 if (tout) 883 schedule_timeout_interruptible(tout ?
884 schedule_timeout_interruptible(msecs_to_jiffies(tout)); 884 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
885 tout = 1000;
886 885
887 /* swsusp */ 886 /* swsusp */
888 try_to_freeze(); 887 try_to_freeze();
@@ -1022,12 +1021,45 @@ xfs_fs_dirty_inode(
1022 XFS_I(inode)->i_update_core = 1; 1021 XFS_I(inode)->i_update_core = 1;
1023} 1022}
1024 1023
1025/* 1024STATIC int
1026 * Attempt to flush the inode, this will actually fail 1025xfs_log_inode(
1027 * if the inode is pinned, but we dirty the inode again 1026 struct xfs_inode *ip)
1028 * at the point when it is unpinned after a log write, 1027{
1029 * since this is when the inode itself becomes flushable. 1028 struct xfs_mount *mp = ip->i_mount;
1030 */ 1029 struct xfs_trans *tp;
1030 int error;
1031
1032 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1033 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1034 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
1035
1036 if (error) {
1037 xfs_trans_cancel(tp, 0);
1038 /* we need to return with the lock hold shared */
1039 xfs_ilock(ip, XFS_ILOCK_SHARED);
1040 return error;
1041 }
1042
1043 xfs_ilock(ip, XFS_ILOCK_EXCL);
1044
1045 /*
1046 * Note - it's possible that we might have pushed ourselves out of the
1047 * way during trans_reserve which would flush the inode. But there's
1048 * no guarantee that the inode buffer has actually gone out yet (it's
1049 * delwri). Plus the buffer could be pinned anyway if it's part of
1050 * an inode in another recent transaction. So we play it safe and
1051 * fire off the transaction anyway.
1052 */
1053 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1054 xfs_trans_ihold(tp, ip);
1055 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1056 xfs_trans_set_sync(tp);
1057 error = xfs_trans_commit(tp, 0);
1058 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1059
1060 return error;
1061}
1062
1031STATIC int 1063STATIC int
1032xfs_fs_write_inode( 1064xfs_fs_write_inode(
1033 struct inode *inode, 1065 struct inode *inode,
@@ -1035,7 +1067,7 @@ xfs_fs_write_inode(
1035{ 1067{
1036 struct xfs_inode *ip = XFS_I(inode); 1068 struct xfs_inode *ip = XFS_I(inode);
1037 struct xfs_mount *mp = ip->i_mount; 1069 struct xfs_mount *mp = ip->i_mount;
1038 int error = 0; 1070 int error = EAGAIN;
1039 1071
1040 xfs_itrace_entry(ip); 1072 xfs_itrace_entry(ip);
1041 1073
@@ -1046,35 +1078,55 @@ xfs_fs_write_inode(
1046 error = xfs_wait_on_pages(ip, 0, -1); 1078 error = xfs_wait_on_pages(ip, 0, -1);
1047 if (error) 1079 if (error)
1048 goto out; 1080 goto out;
1049 }
1050
1051 /*
1052 * Bypass inodes which have already been cleaned by
1053 * the inode flush clustering code inside xfs_iflush
1054 */
1055 if (xfs_inode_clean(ip))
1056 goto out;
1057 1081
1058 /* 1082 /*
1059 * We make this non-blocking if the inode is contended, return 1083 * Make sure the inode has hit stable storage. By using the
1060 * EAGAIN to indicate to the caller that they did not succeed. 1084 * log and the fsync transactions we reduce the IOs we have
1061 * This prevents the flush path from blocking on inodes inside 1085 * to do here from two (log and inode) to just the log.
1062 * another operation right now, they get caught later by xfs_sync. 1086 *
1063 */ 1087 * Note: We still need to do a delwri write of the inode after
1064 if (sync) { 1088 * this to flush it to the backing buffer so that bulkstat
1089 * works properly if this is the first time the inode has been
1090 * written. Because we hold the ilock atomically over the
1091 * transaction commit and the inode flush we are guaranteed
1092 * that the inode is not pinned when it returns. If the flush
1093 * lock is already held, then the inode has already been
1094 * flushed once and we don't need to flush it again. Hence
1095 * the code will only flush the inode if it isn't already
1096 * being flushed.
1097 */
1065 xfs_ilock(ip, XFS_ILOCK_SHARED); 1098 xfs_ilock(ip, XFS_ILOCK_SHARED);
1066 xfs_iflock(ip); 1099 if (ip->i_update_core) {
1067 1100 error = xfs_log_inode(ip);
1068 error = xfs_iflush(ip, XFS_IFLUSH_SYNC); 1101 if (error)
1102 goto out_unlock;
1103 }
1069 } else { 1104 } else {
1070 error = EAGAIN; 1105 /*
1106 * We make this non-blocking if the inode is contended, return
1107 * EAGAIN to indicate to the caller that they did not succeed.
1108 * This prevents the flush path from blocking on inodes inside
1109 * another operation right now, they get caught later by xfs_sync.
1110 */
1071 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) 1111 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1072 goto out; 1112 goto out;
1073 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) 1113 }
1074 goto out_unlock;
1075 1114
1076 error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK); 1115 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1116 goto out_unlock;
1117
1118 /*
1119 * Now we have the flush lock and the inode is not pinned, we can check
1120 * if the inode is really clean as we know that there are no pending
1121 * transaction completions, it is not waiting on the delayed write
1122 * queue and there is no IO in progress.
1123 */
1124 if (xfs_inode_clean(ip)) {
1125 xfs_ifunlock(ip);
1126 error = 0;
1127 goto out_unlock;
1077 } 1128 }
1129 error = xfs_iflush(ip, 0);
1078 1130
1079 out_unlock: 1131 out_unlock:
1080 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1132 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1257,6 +1309,29 @@ xfs_fs_statfs(
1257 return 0; 1309 return 0;
1258} 1310}
1259 1311
1312STATIC void
1313xfs_save_resvblks(struct xfs_mount *mp)
1314{
1315 __uint64_t resblks = 0;
1316
1317 mp->m_resblks_save = mp->m_resblks;
1318 xfs_reserve_blocks(mp, &resblks, NULL);
1319}
1320
1321STATIC void
1322xfs_restore_resvblks(struct xfs_mount *mp)
1323{
1324 __uint64_t resblks;
1325
1326 if (mp->m_resblks_save) {
1327 resblks = mp->m_resblks_save;
1328 mp->m_resblks_save = 0;
1329 } else
1330 resblks = xfs_default_resblks(mp);
1331
1332 xfs_reserve_blocks(mp, &resblks, NULL);
1333}
1334
1260STATIC int 1335STATIC int
1261xfs_fs_remount( 1336xfs_fs_remount(
1262 struct super_block *sb, 1337 struct super_block *sb,
@@ -1336,11 +1411,27 @@ xfs_fs_remount(
1336 } 1411 }
1337 mp->m_update_flags = 0; 1412 mp->m_update_flags = 0;
1338 } 1413 }
1414
1415 /*
1416 * Fill out the reserve pool if it is empty. Use the stashed
1417 * value if it is non-zero, otherwise go with the default.
1418 */
1419 xfs_restore_resvblks(mp);
1339 } 1420 }
1340 1421
1341 /* rw -> ro */ 1422 /* rw -> ro */
1342 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { 1423 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1424 /*
1425 * After we have synced the data but before we sync the
1426 * metadata, we need to free up the reserve block pool so that
1427 * the used block count in the superblock on disk is correct at
1428 * the end of the remount. Stash the current reserve pool size
1429 * so that if we get remounted rw, we can return it to the same
1430 * size.
1431 */
1432
1343 xfs_quiesce_data(mp); 1433 xfs_quiesce_data(mp);
1434 xfs_save_resvblks(mp);
1344 xfs_quiesce_attr(mp); 1435 xfs_quiesce_attr(mp);
1345 mp->m_flags |= XFS_MOUNT_RDONLY; 1436 mp->m_flags |= XFS_MOUNT_RDONLY;
1346 } 1437 }
@@ -1359,11 +1450,22 @@ xfs_fs_freeze(
1359{ 1450{
1360 struct xfs_mount *mp = XFS_M(sb); 1451 struct xfs_mount *mp = XFS_M(sb);
1361 1452
1453 xfs_save_resvblks(mp);
1362 xfs_quiesce_attr(mp); 1454 xfs_quiesce_attr(mp);
1363 return -xfs_fs_log_dummy(mp); 1455 return -xfs_fs_log_dummy(mp);
1364} 1456}
1365 1457
1366STATIC int 1458STATIC int
1459xfs_fs_unfreeze(
1460 struct super_block *sb)
1461{
1462 struct xfs_mount *mp = XFS_M(sb);
1463
1464 xfs_restore_resvblks(mp);
1465 return 0;
1466}
1467
1468STATIC int
1367xfs_fs_show_options( 1469xfs_fs_show_options(
1368 struct seq_file *m, 1470 struct seq_file *m,
1369 struct vfsmount *mnt) 1471 struct vfsmount *mnt)
@@ -1585,6 +1687,7 @@ static const struct super_operations xfs_super_operations = {
1585 .put_super = xfs_fs_put_super, 1687 .put_super = xfs_fs_put_super,
1586 .sync_fs = xfs_fs_sync_fs, 1688 .sync_fs = xfs_fs_sync_fs,
1587 .freeze_fs = xfs_fs_freeze, 1689 .freeze_fs = xfs_fs_freeze,
1690 .unfreeze_fs = xfs_fs_unfreeze,
1588 .statfs = xfs_fs_statfs, 1691 .statfs = xfs_fs_statfs,
1589 .remount_fs = xfs_fs_remount, 1692 .remount_fs = xfs_fs_remount,
1590 .show_options = xfs_fs_show_options, 1693 .show_options = xfs_fs_show_options,
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 1f5e4bb5e970..a9f6d20aff41 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -90,14 +90,13 @@ xfs_inode_ag_lookup(
90STATIC int 90STATIC int
91xfs_inode_ag_walk( 91xfs_inode_ag_walk(
92 struct xfs_mount *mp, 92 struct xfs_mount *mp,
93 xfs_agnumber_t ag, 93 struct xfs_perag *pag,
94 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
95 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
96 int flags, 96 int flags,
97 int tag, 97 int tag,
98 int exclusive) 98 int exclusive)
99{ 99{
100 struct xfs_perag *pag = &mp->m_perag[ag];
101 uint32_t first_index; 100 uint32_t first_index;
102 int last_error = 0; 101 int last_error = 0;
103 int skipped; 102 int skipped;
@@ -141,8 +140,6 @@ restart:
141 delay(1); 140 delay(1);
142 goto restart; 141 goto restart;
143 } 142 }
144
145 xfs_put_perag(mp, pag);
146 return last_error; 143 return last_error;
147} 144}
148 145
@@ -160,10 +157,16 @@ xfs_inode_ag_iterator(
160 xfs_agnumber_t ag; 157 xfs_agnumber_t ag;
161 158
162 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 159 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
163 if (!mp->m_perag[ag].pag_ici_init) 160 struct xfs_perag *pag;
161
162 pag = xfs_perag_get(mp, ag);
163 if (!pag->pag_ici_init) {
164 xfs_perag_put(pag);
164 continue; 165 continue;
165 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag, 166 }
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
166 exclusive); 168 exclusive);
169 xfs_perag_put(pag);
167 if (error) { 170 if (error) {
168 last_error = error; 171 last_error = error;
169 if (error == EFSCORRUPTED) 172 if (error == EFSCORRUPTED)
@@ -231,7 +234,7 @@ xfs_sync_inode_data(
231 } 234 }
232 235
233 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? 236 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
234 0 : XFS_B_ASYNC, FI_NONE); 237 0 : XBF_ASYNC, FI_NONE);
235 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 238 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
236 239
237 out_wait: 240 out_wait:
@@ -267,8 +270,7 @@ xfs_sync_inode_attr(
267 goto out_unlock; 270 goto out_unlock;
268 } 271 }
269 272
270 error = xfs_iflush(ip, (flags & SYNC_WAIT) ? 273 error = xfs_iflush(ip, flags);
271 XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
272 274
273 out_unlock: 275 out_unlock:
274 xfs_iunlock(ip, XFS_ILOCK_SHARED); 276 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -293,10 +295,7 @@ xfs_sync_data(
293 if (error) 295 if (error)
294 return XFS_ERROR(error); 296 return XFS_ERROR(error);
295 297
296 xfs_log_force(mp, 0, 298 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
297 (flags & SYNC_WAIT) ?
298 XFS_LOG_FORCE | XFS_LOG_SYNC :
299 XFS_LOG_FORCE);
300 return 0; 299 return 0;
301} 300}
302 301
@@ -322,10 +321,6 @@ xfs_commit_dummy_trans(
322 struct xfs_inode *ip = mp->m_rootip; 321 struct xfs_inode *ip = mp->m_rootip;
323 struct xfs_trans *tp; 322 struct xfs_trans *tp;
324 int error; 323 int error;
325 int log_flags = XFS_LOG_FORCE;
326
327 if (flags & SYNC_WAIT)
328 log_flags |= XFS_LOG_SYNC;
329 324
330 /* 325 /*
331 * Put a dummy transaction in the log to tell recovery 326 * Put a dummy transaction in the log to tell recovery
@@ -347,11 +342,11 @@ xfs_commit_dummy_trans(
347 xfs_iunlock(ip, XFS_ILOCK_EXCL); 342 xfs_iunlock(ip, XFS_ILOCK_EXCL);
348 343
349 /* the log force ensures this transaction is pushed to disk */ 344 /* the log force ensures this transaction is pushed to disk */
350 xfs_log_force(mp, 0, log_flags); 345 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
351 return error; 346 return error;
352} 347}
353 348
354int 349STATIC int
355xfs_sync_fsdata( 350xfs_sync_fsdata(
356 struct xfs_mount *mp, 351 struct xfs_mount *mp,
357 int flags) 352 int flags)
@@ -367,7 +362,7 @@ xfs_sync_fsdata(
367 if (flags & SYNC_TRYLOCK) { 362 if (flags & SYNC_TRYLOCK) {
368 ASSERT(!(flags & SYNC_WAIT)); 363 ASSERT(!(flags & SYNC_WAIT));
369 364
370 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); 365 bp = xfs_getsb(mp, XBF_TRYLOCK);
371 if (!bp) 366 if (!bp)
372 goto out; 367 goto out;
373 368
@@ -387,7 +382,7 @@ xfs_sync_fsdata(
387 * become pinned in between there and here. 382 * become pinned in between there and here.
388 */ 383 */
389 if (XFS_BUF_ISPINNED(bp)) 384 if (XFS_BUF_ISPINNED(bp))
390 xfs_log_force(mp, 0, XFS_LOG_FORCE); 385 xfs_log_force(mp, 0);
391 } 386 }
392 387
393 388
@@ -448,9 +443,6 @@ xfs_quiesce_data(
448 xfs_sync_data(mp, SYNC_WAIT); 443 xfs_sync_data(mp, SYNC_WAIT);
449 xfs_qm_sync(mp, SYNC_WAIT); 444 xfs_qm_sync(mp, SYNC_WAIT);
450 445
451 /* drop inode references pinned by filestreams */
452 xfs_filestream_flush(mp);
453
454 /* write superblock and hoover up shutdown errors */ 446 /* write superblock and hoover up shutdown errors */
455 error = xfs_sync_fsdata(mp, SYNC_WAIT); 447 error = xfs_sync_fsdata(mp, SYNC_WAIT);
456 448
@@ -467,16 +459,18 @@ xfs_quiesce_fs(
467{ 459{
468 int count = 0, pincount; 460 int count = 0, pincount;
469 461
462 xfs_reclaim_inodes(mp, 0);
470 xfs_flush_buftarg(mp->m_ddev_targp, 0); 463 xfs_flush_buftarg(mp->m_ddev_targp, 0);
471 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
472 464
473 /* 465 /*
474 * This loop must run at least twice. The first instance of the loop 466 * This loop must run at least twice. The first instance of the loop
475 * will flush most meta data but that will generate more meta data 467 * will flush most meta data but that will generate more meta data
476 * (typically directory updates). Which then must be flushed and 468 * (typically directory updates). Which then must be flushed and
477 * logged before we can write the unmount record. 469 * logged before we can write the unmount record. We also so sync
470 * reclaim of inodes to catch any that the above delwri flush skipped.
478 */ 471 */
479 do { 472 do {
473 xfs_reclaim_inodes(mp, SYNC_WAIT);
480 xfs_sync_attr(mp, SYNC_WAIT); 474 xfs_sync_attr(mp, SYNC_WAIT);
481 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); 475 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
482 if (!pincount) { 476 if (!pincount) {
@@ -575,7 +569,7 @@ xfs_flush_inodes(
575 igrab(inode); 569 igrab(inode);
576 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); 570 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
577 wait_for_completion(&completion); 571 wait_for_completion(&completion);
578 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 572 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
579} 573}
580 574
581/* 575/*
@@ -591,8 +585,8 @@ xfs_sync_worker(
591 int error; 585 int error;
592 586
593 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 587 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
594 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 588 xfs_log_force(mp, 0);
595 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); 589 xfs_reclaim_inodes(mp, 0);
596 /* dgc: errors ignored here */ 590 /* dgc: errors ignored here */
597 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 591 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
598 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); 592 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
@@ -690,16 +684,17 @@ void
690xfs_inode_set_reclaim_tag( 684xfs_inode_set_reclaim_tag(
691 xfs_inode_t *ip) 685 xfs_inode_t *ip)
692{ 686{
693 xfs_mount_t *mp = ip->i_mount; 687 struct xfs_mount *mp = ip->i_mount;
694 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 688 struct xfs_perag *pag;
695 689
690 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
696 read_lock(&pag->pag_ici_lock); 691 read_lock(&pag->pag_ici_lock);
697 spin_lock(&ip->i_flags_lock); 692 spin_lock(&ip->i_flags_lock);
698 __xfs_inode_set_reclaim_tag(pag, ip); 693 __xfs_inode_set_reclaim_tag(pag, ip);
699 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 694 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
700 spin_unlock(&ip->i_flags_lock); 695 spin_unlock(&ip->i_flags_lock);
701 read_unlock(&pag->pag_ici_lock); 696 read_unlock(&pag->pag_ici_lock);
702 xfs_put_perag(mp, pag); 697 xfs_perag_put(pag);
703} 698}
704 699
705void 700void
@@ -712,12 +707,64 @@ __xfs_inode_clear_reclaim_tag(
712 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 707 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
713} 708}
714 709
710/*
711 * Inodes in different states need to be treated differently, and the return
712 * value of xfs_iflush is not sufficient to get this right. The following table
713 * lists the inode states and the reclaim actions necessary for non-blocking
714 * reclaim:
715 *
716 *
717 * inode state iflush ret required action
718 * --------------- ---------- ---------------
719 * bad - reclaim
720 * shutdown EIO unpin and reclaim
721 * clean, unpinned 0 reclaim
722 * stale, unpinned 0 reclaim
723 * clean, pinned(*) 0 requeue
724 * stale, pinned EAGAIN requeue
725 * dirty, delwri ok 0 requeue
726 * dirty, delwri blocked EAGAIN requeue
727 * dirty, sync flush 0 reclaim
728 *
729 * (*) dgc: I don't think the clean, pinned state is possible but it gets
730 * handled anyway given the order of checks implemented.
731 *
732 * As can be seen from the table, the return value of xfs_iflush() is not
733 * sufficient to correctly decide the reclaim action here. The checks in
734 * xfs_iflush() might look like duplicates, but they are not.
735 *
736 * Also, because we get the flush lock first, we know that any inode that has
737 * been flushed delwri has had the flush completed by the time we check that
738 * the inode is clean. The clean inode check needs to be done before flushing
739 * the inode delwri otherwise we would loop forever requeuing clean inodes as
740 * we cannot tell apart a successful delwri flush and a clean inode from the
741 * return value of xfs_iflush().
742 *
743 * Note that because the inode is flushed delayed write by background
744 * writeback, the flush lock may already be held here and waiting on it can
745 * result in very long latencies. Hence for sync reclaims, where we wait on the
746 * flush lock, the caller should push out delayed write inodes first before
747 * trying to reclaim them to minimise the amount of time spent waiting. For
748 * background relaim, we just requeue the inode for the next pass.
749 *
750 * Hence the order of actions after gaining the locks should be:
751 * bad => reclaim
752 * shutdown => unpin and reclaim
753 * pinned, delwri => requeue
754 * pinned, sync => unpin
755 * stale => reclaim
756 * clean => reclaim
757 * dirty, delwri => flush and requeue
758 * dirty, sync => flush, wait and reclaim
759 */
715STATIC int 760STATIC int
716xfs_reclaim_inode( 761xfs_reclaim_inode(
717 struct xfs_inode *ip, 762 struct xfs_inode *ip,
718 struct xfs_perag *pag, 763 struct xfs_perag *pag,
719 int sync_mode) 764 int sync_mode)
720{ 765{
766 int error = 0;
767
721 /* 768 /*
722 * The radix tree lock here protects a thread in xfs_iget from racing 769 * The radix tree lock here protects a thread in xfs_iget from racing
723 * with us starting reclaim on the inode. Once we have the 770 * with us starting reclaim on the inode. Once we have the
@@ -735,33 +782,70 @@ xfs_reclaim_inode(
735 spin_unlock(&ip->i_flags_lock); 782 spin_unlock(&ip->i_flags_lock);
736 write_unlock(&pag->pag_ici_lock); 783 write_unlock(&pag->pag_ici_lock);
737 784
738 /*
739 * If the inode is still dirty, then flush it out. If the inode
740 * is not in the AIL, then it will be OK to flush it delwri as
741 * long as xfs_iflush() does not keep any references to the inode.
742 * We leave that decision up to xfs_iflush() since it has the
743 * knowledge of whether it's OK to simply do a delwri flush of
744 * the inode or whether we need to wait until the inode is
745 * pulled from the AIL.
746 * We get the flush lock regardless, though, just to make sure
747 * we don't free it while it is being flushed.
748 */
749 xfs_ilock(ip, XFS_ILOCK_EXCL); 785 xfs_ilock(ip, XFS_ILOCK_EXCL);
750 xfs_iflock(ip); 786 if (!xfs_iflock_nowait(ip)) {
787 if (!(sync_mode & SYNC_WAIT))
788 goto out;
789 xfs_iflock(ip);
790 }
791
792 if (is_bad_inode(VFS_I(ip)))
793 goto reclaim;
794 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
795 xfs_iunpin_wait(ip);
796 goto reclaim;
797 }
798 if (xfs_ipincount(ip)) {
799 if (!(sync_mode & SYNC_WAIT)) {
800 xfs_ifunlock(ip);
801 goto out;
802 }
803 xfs_iunpin_wait(ip);
804 }
805 if (xfs_iflags_test(ip, XFS_ISTALE))
806 goto reclaim;
807 if (xfs_inode_clean(ip))
808 goto reclaim;
809
810 /* Now we have an inode that needs flushing */
811 error = xfs_iflush(ip, sync_mode);
812 if (sync_mode & SYNC_WAIT) {
813 xfs_iflock(ip);
814 goto reclaim;
815 }
751 816
752 /* 817 /*
753 * In the case of a forced shutdown we rely on xfs_iflush() to 818 * When we have to flush an inode but don't have SYNC_WAIT set, we
754 * wait for the inode to be unpinned before returning an error. 819 * flush the inode out using a delwri buffer and wait for the next
820 * call into reclaim to find it in a clean state instead of waiting for
821 * it now. We also don't return errors here - if the error is transient
822 * then the next reclaim pass will flush the inode, and if the error
823 * is permanent then the next sync reclaim will relcaim the inode and
824 * pass on the error.
755 */ 825 */
756 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { 826 if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
757 /* synchronize with xfs_iflush_done */ 827 xfs_fs_cmn_err(CE_WARN, ip->i_mount,
758 xfs_iflock(ip); 828 "inode 0x%llx background reclaim flush failed with %d",
759 xfs_ifunlock(ip); 829 (long long)ip->i_ino, error);
760 } 830 }
831out:
832 xfs_iflags_clear(ip, XFS_IRECLAIM);
833 xfs_iunlock(ip, XFS_ILOCK_EXCL);
834 /*
835 * We could return EAGAIN here to make reclaim rescan the inode tree in
836 * a short while. However, this just burns CPU time scanning the tree
837 * waiting for IO to complete and xfssyncd never goes back to the idle
838 * state. Instead, return 0 to let the next scheduled background reclaim
839 * attempt to reclaim the inode again.
840 */
841 return 0;
761 842
843reclaim:
844 xfs_ifunlock(ip);
762 xfs_iunlock(ip, XFS_ILOCK_EXCL); 845 xfs_iunlock(ip, XFS_ILOCK_EXCL);
763 xfs_ireclaim(ip); 846 xfs_ireclaim(ip);
764 return 0; 847 return error;
848
765} 849}
766 850
767int 851int
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index ea932b43335d..d480c346cabb 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -37,7 +37,6 @@ void xfs_syncd_stop(struct xfs_mount *mp);
37 37
38int xfs_sync_attr(struct xfs_mount *mp, int flags); 38int xfs_sync_attr(struct xfs_mount *mp, int flags);
39int xfs_sync_data(struct xfs_mount *mp, int flags); 39int xfs_sync_data(struct xfs_mount *mp, int flags);
40int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
41 40
42int xfs_quiesce_data(struct xfs_mount *mp); 41int xfs_quiesce_data(struct xfs_mount *mp);
43void xfs_quiesce_attr(struct xfs_mount *mp); 42void xfs_quiesce_attr(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c22a608321a3..a4574dcf5065 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -78,6 +78,33 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
78 ) 78 )
79) 79)
80 80
81#define DEFINE_PERAG_REF_EVENT(name) \
82TRACE_EVENT(name, \
83 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
84 unsigned long caller_ip), \
85 TP_ARGS(mp, agno, refcount, caller_ip), \
86 TP_STRUCT__entry( \
87 __field(dev_t, dev) \
88 __field(xfs_agnumber_t, agno) \
89 __field(int, refcount) \
90 __field(unsigned long, caller_ip) \
91 ), \
92 TP_fast_assign( \
93 __entry->dev = mp->m_super->s_dev; \
94 __entry->agno = agno; \
95 __entry->refcount = refcount; \
96 __entry->caller_ip = caller_ip; \
97 ), \
98 TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
99 MAJOR(__entry->dev), MINOR(__entry->dev), \
100 __entry->agno, \
101 __entry->refcount, \
102 (char *)__entry->caller_ip) \
103);
104
105DEFINE_PERAG_REF_EVENT(xfs_perag_get)
106DEFINE_PERAG_REF_EVENT(xfs_perag_put)
107
81#define DEFINE_ATTR_LIST_EVENT(name) \ 108#define DEFINE_ATTR_LIST_EVENT(name) \
82DEFINE_EVENT(xfs_attr_list_class, name, \ 109DEFINE_EVENT(xfs_attr_list_class, name, \
83 TP_PROTO(struct xfs_attr_list_context *ctx), \ 110 TP_PROTO(struct xfs_attr_list_context *ctx), \
@@ -456,6 +483,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
456DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); 483DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
457DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); 484DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
458DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); 485DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
486DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
459DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); 487DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
460DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); 488DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
461DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); 489DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
@@ -1414,6 +1442,59 @@ TRACE_EVENT(xfs_dir2_leafn_moveents,
1414 __entry->count) 1442 __entry->count)
1415); 1443);
1416 1444
1445#define XFS_SWAPEXT_INODES \
1446 { 0, "target" }, \
1447 { 1, "temp" }
1448
1449#define XFS_INODE_FORMAT_STR \
1450 { 0, "invalid" }, \
1451 { 1, "local" }, \
1452 { 2, "extent" }, \
1453 { 3, "btree" }
1454
1455DECLARE_EVENT_CLASS(xfs_swap_extent_class,
1456 TP_PROTO(struct xfs_inode *ip, int which),
1457 TP_ARGS(ip, which),
1458 TP_STRUCT__entry(
1459 __field(dev_t, dev)
1460 __field(int, which)
1461 __field(xfs_ino_t, ino)
1462 __field(int, format)
1463 __field(int, nex)
1464 __field(int, max_nex)
1465 __field(int, broot_size)
1466 __field(int, fork_off)
1467 ),
1468 TP_fast_assign(
1469 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1470 __entry->which = which;
1471 __entry->ino = ip->i_ino;
1472 __entry->format = ip->i_d.di_format;
1473 __entry->nex = ip->i_d.di_nextents;
1474 __entry->max_nex = ip->i_df.if_ext_max;
1475 __entry->broot_size = ip->i_df.if_broot_bytes;
1476 __entry->fork_off = XFS_IFORK_BOFF(ip);
1477 ),
1478 TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
1479 "Max in-fork extents %d, broot size %d, fork offset %d",
1480 MAJOR(__entry->dev), MINOR(__entry->dev),
1481 __entry->ino,
1482 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
1483 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
1484 __entry->nex,
1485 __entry->max_nex,
1486 __entry->broot_size,
1487 __entry->fork_off)
1488)
1489
1490#define DEFINE_SWAPEXT_EVENT(name) \
1491DEFINE_EVENT(xfs_swap_extent_class, name, \
1492 TP_PROTO(struct xfs_inode *ip, int which), \
1493 TP_ARGS(ip, which))
1494
1495DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
1496DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
1497
1417#endif /* _TRACE_XFS_H */ 1498#endif /* _TRACE_XFS_H */
1418 1499
1419#undef TRACE_INCLUDE_PATH 1500#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 0b1878857fc3..fa01b9daba6b 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -45,7 +45,7 @@ xfs_xattr_get(struct dentry *dentry, const char *name,
45 value = NULL; 45 value = NULL;
46 } 46 }
47 47
48 error = -xfs_attr_get(ip, name, value, &asize, xflags); 48 error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
49 if (error) 49 if (error)
50 return error; 50 return error;
51 return asize; 51 return asize;
@@ -67,8 +67,9 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
67 xflags |= ATTR_REPLACE; 67 xflags |= ATTR_REPLACE;
68 68
69 if (!value) 69 if (!value)
70 return -xfs_attr_remove(ip, name, xflags); 70 return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
71 return -xfs_attr_set(ip, name, (void *)value, size, xflags); 71 return -xfs_attr_set(ip, (unsigned char *)name,
72 (void *)value, size, xflags);
72} 73}
73 74
74static struct xattr_handler xfs_xattr_user_handler = { 75static struct xattr_handler xfs_xattr_user_handler = {
@@ -124,8 +125,13 @@ static const char *xfs_xattr_prefix(int flags)
124} 125}
125 126
126static int 127static int
127xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, 128xfs_xattr_put_listent(
128 char *name, int namelen, int valuelen, char *value) 129 struct xfs_attr_list_context *context,
130 int flags,
131 unsigned char *name,
132 int namelen,
133 int valuelen,
134 unsigned char *value)
129{ 135{
130 unsigned int prefix_len = xfs_xattr_prefix_len(flags); 136 unsigned int prefix_len = xfs_xattr_prefix_len(flags);
131 char *offset; 137 char *offset;
@@ -148,7 +154,7 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
148 offset = (char *)context->alist + context->count; 154 offset = (char *)context->alist + context->count;
149 strncpy(offset, xfs_xattr_prefix(flags), prefix_len); 155 strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
150 offset += prefix_len; 156 offset += prefix_len;
151 strncpy(offset, name, namelen); /* real name */ 157 strncpy(offset, (char *)name, namelen); /* real name */
152 offset += namelen; 158 offset += namelen;
153 *offset = '\0'; 159 *offset = '\0';
154 context->count += prefix_len + namelen + 1; 160 context->count += prefix_len + namelen + 1;
@@ -156,8 +162,13 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
156} 162}
157 163
158static int 164static int
159xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags, 165xfs_xattr_put_listent_sizes(
160 char *name, int namelen, int valuelen, char *value) 166 struct xfs_attr_list_context *context,
167 int flags,
168 unsigned char *name,
169 int namelen,
170 int valuelen,
171 unsigned char *value)
161{ 172{
162 context->count += xfs_xattr_prefix_len(flags) + namelen + 1; 173 context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
163 return 0; 174 return 0;
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index d7c7eea09fc2..5f79dd78626b 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1187,7 +1187,7 @@ xfs_qm_dqflush(
1187 * block, nada. 1187 * block, nada.
1188 */ 1188 */
1189 if (!XFS_DQ_IS_DIRTY(dqp) || 1189 if (!XFS_DQ_IS_DIRTY(dqp) ||
1190 (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) { 1190 (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
1191 xfs_dqfunlock(dqp); 1191 xfs_dqfunlock(dqp);
1192 return 0; 1192 return 0;
1193 } 1193 }
@@ -1248,23 +1248,20 @@ xfs_qm_dqflush(
1248 */ 1248 */
1249 if (XFS_BUF_ISPINNED(bp)) { 1249 if (XFS_BUF_ISPINNED(bp)) {
1250 trace_xfs_dqflush_force(dqp); 1250 trace_xfs_dqflush_force(dqp);
1251 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 1251 xfs_log_force(mp, 0);
1252 } 1252 }
1253 1253
1254 if (flags & XFS_QMOPT_DELWRI) { 1254 if (flags & SYNC_WAIT)
1255 xfs_bdwrite(mp, bp);
1256 } else if (flags & XFS_QMOPT_ASYNC) {
1257 error = xfs_bawrite(mp, bp);
1258 } else {
1259 error = xfs_bwrite(mp, bp); 1255 error = xfs_bwrite(mp, bp);
1260 } 1256 else
1257 xfs_bdwrite(mp, bp);
1261 1258
1262 trace_xfs_dqflush_done(dqp); 1259 trace_xfs_dqflush_done(dqp);
1263 1260
1264 /* 1261 /*
1265 * dqp is still locked, but caller is free to unlock it now. 1262 * dqp is still locked, but caller is free to unlock it now.
1266 */ 1263 */
1267 return (error); 1264 return error;
1268 1265
1269} 1266}
1270 1267
@@ -1445,7 +1442,7 @@ xfs_qm_dqpurge(
1445 * We don't care about getting disk errors here. We need 1442 * We don't care about getting disk errors here. We need
1446 * to purge this dquot anyway, so we go ahead regardless. 1443 * to purge this dquot anyway, so we go ahead regardless.
1447 */ 1444 */
1448 error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); 1445 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
1449 if (error) 1446 if (error)
1450 xfs_fs_cmn_err(CE_WARN, mp, 1447 xfs_fs_cmn_err(CE_WARN, mp,
1451 "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1448 "xfs_qm_dqpurge: dquot %p flush failed", dqp);
@@ -1529,25 +1526,17 @@ xfs_qm_dqflock_pushbuf_wait(
1529 * the flush lock when the I/O completes. 1526 * the flush lock when the I/O completes.
1530 */ 1527 */
1531 bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, 1528 bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
1532 XFS_QI_DQCHUNKLEN(dqp->q_mount), 1529 XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK);
1533 XFS_INCORE_TRYLOCK); 1530 if (!bp)
1534 if (bp != NULL) { 1531 goto out_lock;
1535 if (XFS_BUF_ISDELAYWRITE(bp)) { 1532
1536 int error; 1533 if (XFS_BUF_ISDELAYWRITE(bp)) {
1537 if (XFS_BUF_ISPINNED(bp)) { 1534 if (XFS_BUF_ISPINNED(bp))
1538 xfs_log_force(dqp->q_mount, 1535 xfs_log_force(dqp->q_mount, 0);
1539 (xfs_lsn_t)0, 1536 xfs_buf_delwri_promote(bp);
1540 XFS_LOG_FORCE); 1537 wake_up_process(bp->b_target->bt_task);
1541 }
1542 error = xfs_bawrite(dqp->q_mount, bp);
1543 if (error)
1544 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
1545 "xfs_qm_dqflock_pushbuf_wait: "
1546 "pushbuf error %d on dqp %p, bp %p",
1547 error, dqp, bp);
1548 } else {
1549 xfs_buf_relse(bp);
1550 }
1551 } 1538 }
1539 xfs_buf_relse(bp);
1540out_lock:
1552 xfs_dqflock(dqp); 1541 xfs_dqflock(dqp);
1553} 1542}
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index d0d4a9a0bbd7..4e4ee9a57194 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -74,11 +74,11 @@ xfs_qm_dquot_logitem_format(
74 74
75 logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; 75 logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
76 logvec->i_len = sizeof(xfs_dq_logformat_t); 76 logvec->i_len = sizeof(xfs_dq_logformat_t);
77 XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_QFORMAT); 77 logvec->i_type = XLOG_REG_TYPE_QFORMAT;
78 logvec++; 78 logvec++;
79 logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; 79 logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
80 logvec->i_len = sizeof(xfs_disk_dquot_t); 80 logvec->i_len = sizeof(xfs_disk_dquot_t);
81 XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_DQUOT); 81 logvec->i_type = XLOG_REG_TYPE_DQUOT;
82 82
83 ASSERT(2 == logitem->qli_item.li_desc->lid_size); 83 ASSERT(2 == logitem->qli_item.li_desc->lid_size);
84 logitem->qli_format.qlf_size = 2; 84 logitem->qli_format.qlf_size = 2;
@@ -153,7 +153,7 @@ xfs_qm_dquot_logitem_push(
153 * lock without sleeping, then there must not have been 153 * lock without sleeping, then there must not have been
154 * anyone in the process of flushing the dquot. 154 * anyone in the process of flushing the dquot.
155 */ 155 */
156 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 156 error = xfs_qm_dqflush(dqp, 0);
157 if (error) 157 if (error)
158 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 158 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
159 "xfs_qm_dquot_logitem_push: push error %d on dqp %p", 159 "xfs_qm_dquot_logitem_push: push error %d on dqp %p",
@@ -190,7 +190,7 @@ xfs_qm_dqunpin_wait(
190 /* 190 /*
191 * Give the log a push so we don't wait here too long. 191 * Give the log a push so we don't wait here too long.
192 */ 192 */
193 xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); 193 xfs_log_force(dqp->q_mount, 0);
194 wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); 194 wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
195} 195}
196 196
@@ -212,68 +212,31 @@ xfs_qm_dquot_logitem_pushbuf(
212 xfs_dquot_t *dqp; 212 xfs_dquot_t *dqp;
213 xfs_mount_t *mp; 213 xfs_mount_t *mp;
214 xfs_buf_t *bp; 214 xfs_buf_t *bp;
215 uint dopush;
216 215
217 dqp = qip->qli_dquot; 216 dqp = qip->qli_dquot;
218 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 217 ASSERT(XFS_DQ_IS_LOCKED(dqp));
219 218
220 /* 219 /*
221 * The qli_pushbuf_flag keeps others from
222 * trying to duplicate our effort.
223 */
224 ASSERT(qip->qli_pushbuf_flag != 0);
225 ASSERT(qip->qli_push_owner == current_pid());
226
227 /*
228 * If flushlock isn't locked anymore, chances are that the 220 * If flushlock isn't locked anymore, chances are that the
229 * inode flush completed and the inode was taken off the AIL. 221 * inode flush completed and the inode was taken off the AIL.
230 * So, just get out. 222 * So, just get out.
231 */ 223 */
232 if (completion_done(&dqp->q_flush) || 224 if (completion_done(&dqp->q_flush) ||
233 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { 225 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
234 qip->qli_pushbuf_flag = 0;
235 xfs_dqunlock(dqp); 226 xfs_dqunlock(dqp);
236 return; 227 return;
237 } 228 }
238 mp = dqp->q_mount; 229 mp = dqp->q_mount;
239 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, 230 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
240 XFS_QI_DQCHUNKLEN(mp), 231 XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK);
241 XFS_INCORE_TRYLOCK); 232 xfs_dqunlock(dqp);
242 if (bp != NULL) { 233 if (!bp)
243 if (XFS_BUF_ISDELAYWRITE(bp)) {
244 dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
245 !completion_done(&dqp->q_flush));
246 qip->qli_pushbuf_flag = 0;
247 xfs_dqunlock(dqp);
248
249 if (XFS_BUF_ISPINNED(bp)) {
250 xfs_log_force(mp, (xfs_lsn_t)0,
251 XFS_LOG_FORCE);
252 }
253 if (dopush) {
254 int error;
255#ifdef XFSRACEDEBUG
256 delay_for_intr();
257 delay(300);
258#endif
259 error = xfs_bawrite(mp, bp);
260 if (error)
261 xfs_fs_cmn_err(CE_WARN, mp,
262 "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p",
263 error, qip, bp);
264 } else {
265 xfs_buf_relse(bp);
266 }
267 } else {
268 qip->qli_pushbuf_flag = 0;
269 xfs_dqunlock(dqp);
270 xfs_buf_relse(bp);
271 }
272 return; 234 return;
273 } 235 if (XFS_BUF_ISDELAYWRITE(bp))
236 xfs_buf_delwri_promote(bp);
237 xfs_buf_relse(bp);
238 return;
274 239
275 qip->qli_pushbuf_flag = 0;
276 xfs_dqunlock(dqp);
277} 240}
278 241
279/* 242/*
@@ -291,50 +254,24 @@ xfs_qm_dquot_logitem_trylock(
291 xfs_dq_logitem_t *qip) 254 xfs_dq_logitem_t *qip)
292{ 255{
293 xfs_dquot_t *dqp; 256 xfs_dquot_t *dqp;
294 uint retval;
295 257
296 dqp = qip->qli_dquot; 258 dqp = qip->qli_dquot;
297 if (atomic_read(&dqp->q_pincount) > 0) 259 if (atomic_read(&dqp->q_pincount) > 0)
298 return (XFS_ITEM_PINNED); 260 return XFS_ITEM_PINNED;
299 261
300 if (! xfs_qm_dqlock_nowait(dqp)) 262 if (! xfs_qm_dqlock_nowait(dqp))
301 return (XFS_ITEM_LOCKED); 263 return XFS_ITEM_LOCKED;
302 264
303 retval = XFS_ITEM_SUCCESS;
304 if (!xfs_dqflock_nowait(dqp)) { 265 if (!xfs_dqflock_nowait(dqp)) {
305 /* 266 /*
306 * The dquot is already being flushed. It may have been 267 * dquot has already been flushed to the backing buffer,
307 * flushed delayed write, however, and we don't want to 268 * leave it locked, pushbuf routine will unlock it.
308 * get stuck waiting for that to complete. So, we want to check
309 * to see if we can lock the dquot's buffer without sleeping.
310 * If we can and it is marked for delayed write, then we
311 * hold it and send it out from the push routine. We don't
312 * want to do that now since we might sleep in the device
313 * strategy routine. We also don't want to grab the buffer lock
314 * here because we'd like not to call into the buffer cache
315 * while holding the AIL lock.
316 * Make sure to only return PUSHBUF if we set pushbuf_flag
317 * ourselves. If someone else is doing it then we don't
318 * want to go to the push routine and duplicate their efforts.
319 */ 269 */
320 if (qip->qli_pushbuf_flag == 0) { 270 return XFS_ITEM_PUSHBUF;
321 qip->qli_pushbuf_flag = 1;
322 ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
323#ifdef DEBUG
324 qip->qli_push_owner = current_pid();
325#endif
326 /*
327 * The dquot is left locked.
328 */
329 retval = XFS_ITEM_PUSHBUF;
330 } else {
331 retval = XFS_ITEM_FLUSHING;
332 xfs_dqunlock_nonotify(dqp);
333 }
334 } 271 }
335 272
336 ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); 273 ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
337 return (retval); 274 return XFS_ITEM_SUCCESS;
338} 275}
339 276
340 277
@@ -467,7 +404,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf,
467 404
468 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); 405 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
469 log_vector->i_len = sizeof(xfs_qoff_logitem_t); 406 log_vector->i_len = sizeof(xfs_qoff_logitem_t);
470 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); 407 log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
471 qf->qql_format.qf_size = 1; 408 qf->qql_format.qf_size = 1;
472} 409}
473 410
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
index 5a632531f843..5acae2ada70b 100644
--- a/fs/xfs/quota/xfs_dquot_item.h
+++ b/fs/xfs/quota/xfs_dquot_item.h
@@ -27,10 +27,6 @@ typedef struct xfs_dq_logitem {
27 xfs_log_item_t qli_item; /* common portion */ 27 xfs_log_item_t qli_item; /* common portion */
28 struct xfs_dquot *qli_dquot; /* dquot ptr */ 28 struct xfs_dquot *qli_dquot; /* dquot ptr */
29 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ 29 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
30 unsigned short qli_pushbuf_flag; /* 1 bit used in push_ail */
31#ifdef DEBUG
32 uint64_t qli_push_owner;
33#endif
34 xfs_dq_logformat_t qli_format; /* logged structure */ 30 xfs_dq_logformat_t qli_format; /* logged structure */
35} xfs_dq_logitem_t; 31} xfs_dq_logitem_t;
36 32
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 9e627a8b5b0e..417e61e3d9dd 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -118,9 +118,14 @@ xfs_Gqm_init(void)
118 */ 118 */
119 udqhash = kmem_zalloc_greedy(&hsize, 119 udqhash = kmem_zalloc_greedy(&hsize,
120 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), 120 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
121 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t), 121 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
122 KM_SLEEP | KM_MAYFAIL | KM_LARGE); 122 if (!udqhash)
123 gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE); 123 goto out;
124
125 gdqhash = kmem_zalloc_large(hsize);
126 if (!gdqhash)
127 goto out_free_udqhash;
128
124 hsize /= sizeof(xfs_dqhash_t); 129 hsize /= sizeof(xfs_dqhash_t);
125 ndquot = hsize << 8; 130 ndquot = hsize << 8;
126 131
@@ -170,6 +175,11 @@ xfs_Gqm_init(void)
170 mutex_init(&qcheck_lock); 175 mutex_init(&qcheck_lock);
171#endif 176#endif
172 return xqm; 177 return xqm;
178
179 out_free_udqhash:
180 kmem_free_large(udqhash);
181 out:
182 return NULL;
173} 183}
174 184
175/* 185/*
@@ -189,8 +199,8 @@ xfs_qm_destroy(
189 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 199 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
190 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); 200 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
191 } 201 }
192 kmem_free(xqm->qm_usr_dqhtable); 202 kmem_free_large(xqm->qm_usr_dqhtable);
193 kmem_free(xqm->qm_grp_dqhtable); 203 kmem_free_large(xqm->qm_grp_dqhtable);
194 xqm->qm_usr_dqhtable = NULL; 204 xqm->qm_usr_dqhtable = NULL;
195 xqm->qm_grp_dqhtable = NULL; 205 xqm->qm_grp_dqhtable = NULL;
196 xqm->qm_dqhashmask = 0; 206 xqm->qm_dqhashmask = 0;
@@ -219,8 +229,12 @@ xfs_qm_hold_quotafs_ref(
219 */ 229 */
220 mutex_lock(&xfs_Gqm_lock); 230 mutex_lock(&xfs_Gqm_lock);
221 231
222 if (xfs_Gqm == NULL) 232 if (!xfs_Gqm) {
223 xfs_Gqm = xfs_Gqm_init(); 233 xfs_Gqm = xfs_Gqm_init();
234 if (!xfs_Gqm)
235 return ENOMEM;
236 }
237
224 /* 238 /*
225 * We can keep a list of all filesystems with quotas mounted for 239 * We can keep a list of all filesystems with quotas mounted for
226 * debugging and statistical purposes, but ... 240 * debugging and statistical purposes, but ...
@@ -436,7 +450,7 @@ xfs_qm_unmount_quotas(
436STATIC int 450STATIC int
437xfs_qm_dqflush_all( 451xfs_qm_dqflush_all(
438 xfs_mount_t *mp, 452 xfs_mount_t *mp,
439 int flags) 453 int sync_mode)
440{ 454{
441 int recl; 455 int recl;
442 xfs_dquot_t *dqp; 456 xfs_dquot_t *dqp;
@@ -472,7 +486,7 @@ again:
472 * across a disk write. 486 * across a disk write.
473 */ 487 */
474 xfs_qm_mplist_unlock(mp); 488 xfs_qm_mplist_unlock(mp);
475 error = xfs_qm_dqflush(dqp, flags); 489 error = xfs_qm_dqflush(dqp, sync_mode);
476 xfs_dqunlock(dqp); 490 xfs_dqunlock(dqp);
477 if (error) 491 if (error)
478 return error; 492 return error;
@@ -912,13 +926,11 @@ xfs_qm_sync(
912{ 926{
913 int recl, restarts; 927 int recl, restarts;
914 xfs_dquot_t *dqp; 928 xfs_dquot_t *dqp;
915 uint flush_flags;
916 int error; 929 int error;
917 930
918 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 931 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
919 return 0; 932 return 0;
920 933
921 flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
922 restarts = 0; 934 restarts = 0;
923 935
924 again: 936 again:
@@ -978,7 +990,7 @@ xfs_qm_sync(
978 * across a disk write 990 * across a disk write
979 */ 991 */
980 xfs_qm_mplist_unlock(mp); 992 xfs_qm_mplist_unlock(mp);
981 error = xfs_qm_dqflush(dqp, flush_flags); 993 error = xfs_qm_dqflush(dqp, flags);
982 xfs_dqunlock(dqp); 994 xfs_dqunlock(dqp);
983 if (error && XFS_FORCED_SHUTDOWN(mp)) 995 if (error && XFS_FORCED_SHUTDOWN(mp))
984 return 0; /* Need to prevent umount failure */ 996 return 0; /* Need to prevent umount failure */
@@ -1782,7 +1794,7 @@ xfs_qm_quotacheck(
1782 * successfully. 1794 * successfully.
1783 */ 1795 */
1784 if (!error) 1796 if (!error)
1785 error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); 1797 error = xfs_qm_dqflush_all(mp, 0);
1786 1798
1787 /* 1799 /*
1788 * We can get this error if we couldn't do a dquot allocation inside 1800 * We can get this error if we couldn't do a dquot allocation inside
@@ -2004,7 +2016,7 @@ xfs_qm_shake_freelist(
2004 * We flush it delayed write, so don't bother 2016 * We flush it delayed write, so don't bother
2005 * releasing the mplock. 2017 * releasing the mplock.
2006 */ 2018 */
2007 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2019 error = xfs_qm_dqflush(dqp, 0);
2008 if (error) { 2020 if (error) {
2009 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2021 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2010 "xfs_qm_dqflush_all: dquot %p flush failed", dqp); 2022 "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
@@ -2187,7 +2199,7 @@ xfs_qm_dqreclaim_one(void)
2187 * We flush it delayed write, so don't bother 2199 * We flush it delayed write, so don't bother
2188 * releasing the freelist lock. 2200 * releasing the freelist lock.
2189 */ 2201 */
2190 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2202 error = xfs_qm_dqflush(dqp, 0);
2191 if (error) { 2203 if (error) {
2192 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2204 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2193 "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 2205 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index a5346630dfae..97b410c12794 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -59,7 +59,7 @@ xfs_fill_statvfs_from_dquot(
59 be64_to_cpu(dp->d_blk_hardlimit); 59 be64_to_cpu(dp->d_blk_hardlimit);
60 if (limit && statp->f_blocks > limit) { 60 if (limit && statp->f_blocks > limit) {
61 statp->f_blocks = limit; 61 statp->f_blocks = limit;
62 statp->f_bfree = 62 statp->f_bfree = statp->f_bavail =
63 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? 63 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
64 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; 64 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
65 } 65 }
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 873e07e29074..5d0ee8d492db 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1192,9 +1192,9 @@ xfs_qm_internalqcheck(
1192 if (! XFS_IS_QUOTA_ON(mp)) 1192 if (! XFS_IS_QUOTA_ON(mp))
1193 return XFS_ERROR(ESRCH); 1193 return XFS_ERROR(ESRCH);
1194 1194
1195 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1195 xfs_log_force(mp, XFS_LOG_SYNC);
1196 XFS_bflush(mp->m_ddev_targp); 1196 XFS_bflush(mp->m_ddev_targp);
1197 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1197 xfs_log_force(mp, XFS_LOG_SYNC);
1198 XFS_bflush(mp->m_ddev_targp); 1198 XFS_bflush(mp->m_ddev_targp);
1199 1199
1200 mutex_lock(&qcheck_lock); 1200 mutex_lock(&qcheck_lock);
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 97ac9640be98..c3ab75cb1d9a 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -589,12 +589,18 @@ xfs_trans_unreserve_and_mod_dquots(
589 } 589 }
590} 590}
591 591
592STATIC int 592STATIC void
593xfs_quota_error(uint flags) 593xfs_quota_warn(
594 struct xfs_mount *mp,
595 struct xfs_dquot *dqp,
596 int type)
594{ 597{
595 if (flags & XFS_QMOPT_ENOSPC) 598 /* no warnings for project quotas - we just return ENOSPC later */
596 return ENOSPC; 599 if (dqp->dq_flags & XFS_DQ_PROJ)
597 return EDQUOT; 600 return;
601 quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
602 be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
603 type);
598} 604}
599 605
600/* 606/*
@@ -612,7 +618,6 @@ xfs_trans_dqresv(
612 long ninos, 618 long ninos,
613 uint flags) 619 uint flags)
614{ 620{
615 int error;
616 xfs_qcnt_t hardlimit; 621 xfs_qcnt_t hardlimit;
617 xfs_qcnt_t softlimit; 622 xfs_qcnt_t softlimit;
618 time_t timer; 623 time_t timer;
@@ -649,7 +654,6 @@ xfs_trans_dqresv(
649 warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); 654 warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount);
650 resbcountp = &dqp->q_res_rtbcount; 655 resbcountp = &dqp->q_res_rtbcount;
651 } 656 }
652 error = 0;
653 657
654 if ((flags & XFS_QMOPT_FORCE_RES) == 0 && 658 if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
655 dqp->q_core.d_id && 659 dqp->q_core.d_id &&
@@ -667,18 +671,20 @@ xfs_trans_dqresv(
667 * nblks. 671 * nblks.
668 */ 672 */
669 if (hardlimit > 0ULL && 673 if (hardlimit > 0ULL &&
670 (hardlimit <= nblks + *resbcountp)) { 674 hardlimit <= nblks + *resbcountp) {
671 error = xfs_quota_error(flags); 675 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
672 goto error_return; 676 goto error_return;
673 } 677 }
674
675 if (softlimit > 0ULL && 678 if (softlimit > 0ULL &&
676 (softlimit <= nblks + *resbcountp)) { 679 softlimit <= nblks + *resbcountp) {
677 if ((timer != 0 && get_seconds() > timer) || 680 if ((timer != 0 && get_seconds() > timer) ||
678 (warns != 0 && warns >= warnlimit)) { 681 (warns != 0 && warns >= warnlimit)) {
679 error = xfs_quota_error(flags); 682 xfs_quota_warn(mp, dqp,
683 QUOTA_NL_BSOFTLONGWARN);
680 goto error_return; 684 goto error_return;
681 } 685 }
686
687 xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
682 } 688 }
683 } 689 }
684 if (ninos > 0) { 690 if (ninos > 0) {
@@ -692,15 +698,19 @@ xfs_trans_dqresv(
692 softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); 698 softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
693 if (!softlimit) 699 if (!softlimit)
694 softlimit = q->qi_isoftlimit; 700 softlimit = q->qi_isoftlimit;
701
695 if (hardlimit > 0ULL && count >= hardlimit) { 702 if (hardlimit > 0ULL && count >= hardlimit) {
696 error = xfs_quota_error(flags); 703 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
697 goto error_return; 704 goto error_return;
698 } else if (softlimit > 0ULL && count >= softlimit) { 705 }
699 if ((timer != 0 && get_seconds() > timer) || 706 if (softlimit > 0ULL && count >= softlimit) {
707 if ((timer != 0 && get_seconds() > timer) ||
700 (warns != 0 && warns >= warnlimit)) { 708 (warns != 0 && warns >= warnlimit)) {
701 error = xfs_quota_error(flags); 709 xfs_quota_warn(mp, dqp,
710 QUOTA_NL_ISOFTLONGWARN);
702 goto error_return; 711 goto error_return;
703 } 712 }
713 xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
704 } 714 }
705 } 715 }
706 } 716 }
@@ -736,9 +746,14 @@ xfs_trans_dqresv(
736 ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); 746 ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
737 ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); 747 ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
738 748
749 xfs_dqunlock(dqp);
750 return 0;
751
739error_return: 752error_return:
740 xfs_dqunlock(dqp); 753 xfs_dqunlock(dqp);
741 return error; 754 if (flags & XFS_QMOPT_ENOSPC)
755 return ENOSPC;
756 return EDQUOT;
742} 757}
743 758
744 759
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 00fd357c3e46..d13eeba2c8f8 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -36,8 +36,8 @@ struct xfs_acl {
36}; 36};
37 37
38/* On-disk XFS extended attribute names */ 38/* On-disk XFS extended attribute names */
39#define SGI_ACL_FILE "SGI_ACL_FILE" 39#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE"
40#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" 40#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT"
41#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) 41#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
43 43
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 6702bd865811..b1a5a1ff88ea 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,17 +187,13 @@ typedef struct xfs_perag_busy {
187/* 187/*
188 * Per-ag incore structure, copies of information in agf and agi, 188 * Per-ag incore structure, copies of information in agf and agi,
189 * to improve the performance of allocation group selection. 189 * to improve the performance of allocation group selection.
190 *
191 * pick sizes which fit in allocation buckets well
192 */ 190 */
193#if (BITS_PER_LONG == 32)
194#define XFS_PAGB_NUM_SLOTS 84
195#elif (BITS_PER_LONG == 64)
196#define XFS_PAGB_NUM_SLOTS 128 191#define XFS_PAGB_NUM_SLOTS 128
197#endif
198 192
199typedef struct xfs_perag 193typedef struct xfs_perag {
200{ 194 struct xfs_mount *pag_mount; /* owner filesystem */
195 xfs_agnumber_t pag_agno; /* AG this structure belongs to */
196 atomic_t pag_ref; /* perag reference count */
201 char pagf_init; /* this agf's entry is initialized */ 197 char pagf_init; /* this agf's entry is initialized */
202 char pagi_init; /* this agi's entry is initialized */ 198 char pagi_init; /* this agi's entry is initialized */
203 char pagf_metadata; /* the agf is preferred to be metadata */ 199 char pagf_metadata; /* the agf is preferred to be metadata */
@@ -210,8 +206,6 @@ typedef struct xfs_perag
210 __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ 206 __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
211 xfs_agino_t pagi_freecount; /* number of free inodes */ 207 xfs_agino_t pagi_freecount; /* number of free inodes */
212 xfs_agino_t pagi_count; /* number of allocated inodes */ 208 xfs_agino_t pagi_count; /* number of allocated inodes */
213 int pagb_count; /* pagb slots in use */
214 xfs_perag_busy_t *pagb_list; /* unstable blocks */
215 209
216 /* 210 /*
217 * Inode allocation search lookup optimisation. 211 * Inode allocation search lookup optimisation.
@@ -230,6 +224,8 @@ typedef struct xfs_perag
230 rwlock_t pag_ici_lock; /* incore inode lock */ 224 rwlock_t pag_ici_lock; /* incore inode lock */
231 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 225 struct radix_tree_root pag_ici_root; /* incore inode cache root */
232#endif 226#endif
227 int pagb_count; /* pagb slots in use */
228 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
233} xfs_perag_t; 229} xfs_perag_t;
234 230
235/* 231/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 275b1f4f9430..94cddbfb2560 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1662,11 +1662,13 @@ xfs_free_ag_extent(
1662 xfs_agf_t *agf; 1662 xfs_agf_t *agf;
1663 xfs_perag_t *pag; /* per allocation group data */ 1663 xfs_perag_t *pag; /* per allocation group data */
1664 1664
1665 pag = xfs_perag_get(mp, agno);
1666 pag->pagf_freeblks += len;
1667 xfs_perag_put(pag);
1668
1665 agf = XFS_BUF_TO_AGF(agbp); 1669 agf = XFS_BUF_TO_AGF(agbp);
1666 pag = &mp->m_perag[agno];
1667 be32_add_cpu(&agf->agf_freeblks, len); 1670 be32_add_cpu(&agf->agf_freeblks, len);
1668 xfs_trans_agblocks_delta(tp, len); 1671 xfs_trans_agblocks_delta(tp, len);
1669 pag->pagf_freeblks += len;
1670 XFS_WANT_CORRUPTED_GOTO( 1672 XFS_WANT_CORRUPTED_GOTO(
1671 be32_to_cpu(agf->agf_freeblks) <= 1673 be32_to_cpu(agf->agf_freeblks) <=
1672 be32_to_cpu(agf->agf_length), 1674 be32_to_cpu(agf->agf_length),
@@ -1969,10 +1971,12 @@ xfs_alloc_get_freelist(
1969 xfs_trans_brelse(tp, agflbp); 1971 xfs_trans_brelse(tp, agflbp);
1970 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 1972 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
1971 agf->agf_flfirst = 0; 1973 agf->agf_flfirst = 0;
1972 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; 1974
1975 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
1973 be32_add_cpu(&agf->agf_flcount, -1); 1976 be32_add_cpu(&agf->agf_flcount, -1);
1974 xfs_trans_agflist_delta(tp, -1); 1977 xfs_trans_agflist_delta(tp, -1);
1975 pag->pagf_flcount--; 1978 pag->pagf_flcount--;
1979 xfs_perag_put(pag);
1976 1980
1977 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 1981 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
1978 if (btreeblk) { 1982 if (btreeblk) {
@@ -2078,7 +2082,8 @@ xfs_alloc_put_freelist(
2078 be32_add_cpu(&agf->agf_fllast, 1); 2082 be32_add_cpu(&agf->agf_fllast, 1);
2079 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2083 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2080 agf->agf_fllast = 0; 2084 agf->agf_fllast = 0;
2081 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; 2085
2086 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2082 be32_add_cpu(&agf->agf_flcount, 1); 2087 be32_add_cpu(&agf->agf_flcount, 1);
2083 xfs_trans_agflist_delta(tp, 1); 2088 xfs_trans_agflist_delta(tp, 1);
2084 pag->pagf_flcount++; 2089 pag->pagf_flcount++;
@@ -2089,6 +2094,7 @@ xfs_alloc_put_freelist(
2089 pag->pagf_btreeblks--; 2094 pag->pagf_btreeblks--;
2090 logflags |= XFS_AGF_BTREEBLKS; 2095 logflags |= XFS_AGF_BTREEBLKS;
2091 } 2096 }
2097 xfs_perag_put(pag);
2092 2098
2093 xfs_alloc_log_agf(tp, agbp, logflags); 2099 xfs_alloc_log_agf(tp, agbp, logflags);
2094 2100
@@ -2152,7 +2158,6 @@ xfs_read_agf(
2152 xfs_trans_brelse(tp, *bpp); 2158 xfs_trans_brelse(tp, *bpp);
2153 return XFS_ERROR(EFSCORRUPTED); 2159 return XFS_ERROR(EFSCORRUPTED);
2154 } 2160 }
2155
2156 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); 2161 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
2157 return 0; 2162 return 0;
2158} 2163}
@@ -2175,7 +2180,7 @@ xfs_alloc_read_agf(
2175 ASSERT(agno != NULLAGNUMBER); 2180 ASSERT(agno != NULLAGNUMBER);
2176 2181
2177 error = xfs_read_agf(mp, tp, agno, 2182 error = xfs_read_agf(mp, tp, agno,
2178 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0, 2183 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2179 bpp); 2184 bpp);
2180 if (error) 2185 if (error)
2181 return error; 2186 return error;
@@ -2184,7 +2189,7 @@ xfs_alloc_read_agf(
2184 ASSERT(!XFS_BUF_GETERROR(*bpp)); 2189 ASSERT(!XFS_BUF_GETERROR(*bpp));
2185 2190
2186 agf = XFS_BUF_TO_AGF(*bpp); 2191 agf = XFS_BUF_TO_AGF(*bpp);
2187 pag = &mp->m_perag[agno]; 2192 pag = xfs_perag_get(mp, agno);
2188 if (!pag->pagf_init) { 2193 if (!pag->pagf_init) {
2189 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2194 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2190 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2195 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
@@ -2195,8 +2200,8 @@ xfs_alloc_read_agf(
2195 pag->pagf_levels[XFS_BTNUM_CNTi] = 2200 pag->pagf_levels[XFS_BTNUM_CNTi] =
2196 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2201 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2197 spin_lock_init(&pag->pagb_lock); 2202 spin_lock_init(&pag->pagb_lock);
2198 pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * 2203 pag->pagb_count = 0;
2199 sizeof(xfs_perag_busy_t), KM_SLEEP); 2204 memset(pag->pagb_list, 0, sizeof(pag->pagb_list));
2200 pag->pagf_init = 1; 2205 pag->pagf_init = 1;
2201 } 2206 }
2202#ifdef DEBUG 2207#ifdef DEBUG
@@ -2211,6 +2216,7 @@ xfs_alloc_read_agf(
2211 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2216 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2212 } 2217 }
2213#endif 2218#endif
2219 xfs_perag_put(pag);
2214 return 0; 2220 return 0;
2215} 2221}
2216 2222
@@ -2270,8 +2276,7 @@ xfs_alloc_vextent(
2270 * These three force us into a single a.g. 2276 * These three force us into a single a.g.
2271 */ 2277 */
2272 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2278 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2273 down_read(&mp->m_peraglock); 2279 args->pag = xfs_perag_get(mp, args->agno);
2274 args->pag = &mp->m_perag[args->agno];
2275 args->minleft = 0; 2280 args->minleft = 0;
2276 error = xfs_alloc_fix_freelist(args, 0); 2281 error = xfs_alloc_fix_freelist(args, 0);
2277 args->minleft = minleft; 2282 args->minleft = minleft;
@@ -2280,14 +2285,12 @@ xfs_alloc_vextent(
2280 goto error0; 2285 goto error0;
2281 } 2286 }
2282 if (!args->agbp) { 2287 if (!args->agbp) {
2283 up_read(&mp->m_peraglock);
2284 trace_xfs_alloc_vextent_noagbp(args); 2288 trace_xfs_alloc_vextent_noagbp(args);
2285 break; 2289 break;
2286 } 2290 }
2287 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2291 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2288 if ((error = xfs_alloc_ag_vextent(args))) 2292 if ((error = xfs_alloc_ag_vextent(args)))
2289 goto error0; 2293 goto error0;
2290 up_read(&mp->m_peraglock);
2291 break; 2294 break;
2292 case XFS_ALLOCTYPE_START_BNO: 2295 case XFS_ALLOCTYPE_START_BNO:
2293 /* 2296 /*
@@ -2339,9 +2342,8 @@ xfs_alloc_vextent(
2339 * Loop over allocation groups twice; first time with 2342 * Loop over allocation groups twice; first time with
2340 * trylock set, second time without. 2343 * trylock set, second time without.
2341 */ 2344 */
2342 down_read(&mp->m_peraglock);
2343 for (;;) { 2345 for (;;) {
2344 args->pag = &mp->m_perag[args->agno]; 2346 args->pag = xfs_perag_get(mp, args->agno);
2345 if (no_min) args->minleft = 0; 2347 if (no_min) args->minleft = 0;
2346 error = xfs_alloc_fix_freelist(args, flags); 2348 error = xfs_alloc_fix_freelist(args, flags);
2347 args->minleft = minleft; 2349 args->minleft = minleft;
@@ -2400,8 +2402,8 @@ xfs_alloc_vextent(
2400 } 2402 }
2401 } 2403 }
2402 } 2404 }
2405 xfs_perag_put(args->pag);
2403 } 2406 }
2404 up_read(&mp->m_peraglock);
2405 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { 2407 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2406 if (args->agno == sagno) 2408 if (args->agno == sagno)
2407 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2409 mp->m_agfrotor = (mp->m_agfrotor + 1) %
@@ -2427,9 +2429,10 @@ xfs_alloc_vextent(
2427 args->len); 2429 args->len);
2428#endif 2430#endif
2429 } 2431 }
2432 xfs_perag_put(args->pag);
2430 return 0; 2433 return 0;
2431error0: 2434error0:
2432 up_read(&mp->m_peraglock); 2435 xfs_perag_put(args->pag);
2433 return error; 2436 return error;
2434} 2437}
2435 2438
@@ -2454,8 +2457,7 @@ xfs_free_extent(
2454 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2457 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2455 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2458 ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2456 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2459 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2457 down_read(&args.mp->m_peraglock); 2460 args.pag = xfs_perag_get(args.mp, args.agno);
2458 args.pag = &args.mp->m_perag[args.agno];
2459 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2461 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2460 goto error0; 2462 goto error0;
2461#ifdef DEBUG 2463#ifdef DEBUG
@@ -2465,7 +2467,7 @@ xfs_free_extent(
2465#endif 2467#endif
2466 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2468 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2467error0: 2469error0:
2468 up_read(&args.mp->m_peraglock); 2470 xfs_perag_put(args.pag);
2469 return error; 2471 return error;
2470} 2472}
2471 2473
@@ -2486,15 +2488,15 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2486 xfs_agblock_t bno, 2488 xfs_agblock_t bno,
2487 xfs_extlen_t len) 2489 xfs_extlen_t len)
2488{ 2490{
2489 xfs_mount_t *mp;
2490 xfs_perag_busy_t *bsy; 2491 xfs_perag_busy_t *bsy;
2492 struct xfs_perag *pag;
2491 int n; 2493 int n;
2492 2494
2493 mp = tp->t_mountp; 2495 pag = xfs_perag_get(tp->t_mountp, agno);
2494 spin_lock(&mp->m_perag[agno].pagb_lock); 2496 spin_lock(&pag->pagb_lock);
2495 2497
2496 /* search pagb_list for an open slot */ 2498 /* search pagb_list for an open slot */
2497 for (bsy = mp->m_perag[agno].pagb_list, n = 0; 2499 for (bsy = pag->pagb_list, n = 0;
2498 n < XFS_PAGB_NUM_SLOTS; 2500 n < XFS_PAGB_NUM_SLOTS;
2499 bsy++, n++) { 2501 bsy++, n++) {
2500 if (bsy->busy_tp == NULL) { 2502 if (bsy->busy_tp == NULL) {
@@ -2502,11 +2504,11 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2502 } 2504 }
2503 } 2505 }
2504 2506
2505 trace_xfs_alloc_busy(mp, agno, bno, len, n); 2507 trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n);
2506 2508
2507 if (n < XFS_PAGB_NUM_SLOTS) { 2509 if (n < XFS_PAGB_NUM_SLOTS) {
2508 bsy = &mp->m_perag[agno].pagb_list[n]; 2510 bsy = &pag->pagb_list[n];
2509 mp->m_perag[agno].pagb_count++; 2511 pag->pagb_count++;
2510 bsy->busy_start = bno; 2512 bsy->busy_start = bno;
2511 bsy->busy_length = len; 2513 bsy->busy_length = len;
2512 bsy->busy_tp = tp; 2514 bsy->busy_tp = tp;
@@ -2521,7 +2523,8 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2521 xfs_trans_set_sync(tp); 2523 xfs_trans_set_sync(tp);
2522 } 2524 }
2523 2525
2524 spin_unlock(&mp->m_perag[agno].pagb_lock); 2526 spin_unlock(&pag->pagb_lock);
2527 xfs_perag_put(pag);
2525} 2528}
2526 2529
2527void 2530void
@@ -2529,24 +2532,23 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
2529 xfs_agnumber_t agno, 2532 xfs_agnumber_t agno,
2530 int idx) 2533 int idx)
2531{ 2534{
2532 xfs_mount_t *mp; 2535 struct xfs_perag *pag;
2533 xfs_perag_busy_t *list; 2536 xfs_perag_busy_t *list;
2534 2537
2535 mp = tp->t_mountp;
2536
2537 spin_lock(&mp->m_perag[agno].pagb_lock);
2538 list = mp->m_perag[agno].pagb_list;
2539
2540 ASSERT(idx < XFS_PAGB_NUM_SLOTS); 2538 ASSERT(idx < XFS_PAGB_NUM_SLOTS);
2539 pag = xfs_perag_get(tp->t_mountp, agno);
2540 spin_lock(&pag->pagb_lock);
2541 list = pag->pagb_list;
2541 2542
2542 trace_xfs_alloc_unbusy(mp, agno, idx, list[idx].busy_tp == tp); 2543 trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp);
2543 2544
2544 if (list[idx].busy_tp == tp) { 2545 if (list[idx].busy_tp == tp) {
2545 list[idx].busy_tp = NULL; 2546 list[idx].busy_tp = NULL;
2546 mp->m_perag[agno].pagb_count--; 2547 pag->pagb_count--;
2547 } 2548 }
2548 2549
2549 spin_unlock(&mp->m_perag[agno].pagb_lock); 2550 spin_unlock(&pag->pagb_lock);
2551 xfs_perag_put(pag);
2550} 2552}
2551 2553
2552 2554
@@ -2560,17 +2562,15 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2560 xfs_agblock_t bno, 2562 xfs_agblock_t bno,
2561 xfs_extlen_t len) 2563 xfs_extlen_t len)
2562{ 2564{
2563 xfs_mount_t *mp; 2565 struct xfs_perag *pag;
2564 xfs_perag_busy_t *bsy; 2566 xfs_perag_busy_t *bsy;
2565 xfs_agblock_t uend, bend; 2567 xfs_agblock_t uend, bend;
2566 xfs_lsn_t lsn = 0; 2568 xfs_lsn_t lsn = 0;
2567 int cnt; 2569 int cnt;
2568 2570
2569 mp = tp->t_mountp; 2571 pag = xfs_perag_get(tp->t_mountp, agno);
2570 2572 spin_lock(&pag->pagb_lock);
2571 spin_lock(&mp->m_perag[agno].pagb_lock); 2573 cnt = pag->pagb_count;
2572
2573 uend = bno + len - 1;
2574 2574
2575 /* 2575 /*
2576 * search pagb_list for this slot, skipping open slots. We have to 2576 * search pagb_list for this slot, skipping open slots. We have to
@@ -2578,8 +2578,9 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2578 * we have to get the most recent LSN for the log force to push out 2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range. 2579 * all the transactions that span the range.
2580 */ 2580 */
2581 for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) { 2581 uend = bno + len - 1;
2582 bsy = &mp->m_perag[agno].pagb_list[cnt]; 2582 for (cnt = 0; cnt < pag->pagb_count; cnt++) {
2583 bsy = &pag->pagb_list[cnt];
2583 if (!bsy->busy_tp) 2584 if (!bsy->busy_tp)
2584 continue; 2585 continue;
2585 2586
@@ -2591,7 +2592,8 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2591 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) 2592 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2592 lsn = bsy->busy_tp->t_commit_lsn; 2593 lsn = bsy->busy_tp->t_commit_lsn;
2593 } 2594 }
2594 spin_unlock(&mp->m_perag[agno].pagb_lock); 2595 spin_unlock(&pag->pagb_lock);
2596 xfs_perag_put(pag);
2595 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); 2597 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2596 2598
2597 /* 2599 /*
@@ -2599,5 +2601,5 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2599 * transaction that freed the block 2601 * transaction that freed the block
2600 */ 2602 */
2601 if (lsn) 2603 if (lsn)
2602 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2604 xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC);
2603} 2605}
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index adbd9141aea1..b726e10d2c1c 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -61,12 +61,14 @@ xfs_allocbt_set_root(
61 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 61 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
62 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); 62 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
63 int btnum = cur->bc_btnum; 63 int btnum = cur->bc_btnum;
64 struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
64 65
65 ASSERT(ptr->s != 0); 66 ASSERT(ptr->s != 0);
66 67
67 agf->agf_roots[btnum] = ptr->s; 68 agf->agf_roots[btnum] = ptr->s;
68 be32_add_cpu(&agf->agf_levels[btnum], inc); 69 be32_add_cpu(&agf->agf_levels[btnum], inc);
69 cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc; 70 pag->pagf_levels[btnum] += inc;
71 xfs_perag_put(pag);
70 72
71 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 73 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
72} 74}
@@ -150,6 +152,7 @@ xfs_allocbt_update_lastrec(
150{ 152{
151 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); 153 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
152 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); 154 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
155 struct xfs_perag *pag;
153 __be32 len; 156 __be32 len;
154 int numrecs; 157 int numrecs;
155 158
@@ -193,7 +196,9 @@ xfs_allocbt_update_lastrec(
193 } 196 }
194 197
195 agf->agf_longest = len; 198 agf->agf_longest = len;
196 cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len); 199 pag = xfs_perag_get(cur->bc_mp, seqno);
200 pag->pagf_longest = be32_to_cpu(len);
201 xfs_perag_put(pag);
197 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); 202 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
198} 203}
199 204
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index e953b6cfb2a8..b9c196a53c42 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -93,12 +93,12 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
93STATIC int 93STATIC int
94xfs_attr_name_to_xname( 94xfs_attr_name_to_xname(
95 struct xfs_name *xname, 95 struct xfs_name *xname,
96 const char *aname) 96 const unsigned char *aname)
97{ 97{
98 if (!aname) 98 if (!aname)
99 return EINVAL; 99 return EINVAL;
100 xname->name = aname; 100 xname->name = aname;
101 xname->len = strlen(aname); 101 xname->len = strlen((char *)aname);
102 if (xname->len >= MAXNAMELEN) 102 if (xname->len >= MAXNAMELEN)
103 return EFAULT; /* match IRIX behaviour */ 103 return EFAULT; /* match IRIX behaviour */
104 104
@@ -124,7 +124,7 @@ STATIC int
124xfs_attr_get_int( 124xfs_attr_get_int(
125 struct xfs_inode *ip, 125 struct xfs_inode *ip,
126 struct xfs_name *name, 126 struct xfs_name *name,
127 char *value, 127 unsigned char *value,
128 int *valuelenp, 128 int *valuelenp,
129 int flags) 129 int flags)
130{ 130{
@@ -171,8 +171,8 @@ xfs_attr_get_int(
171int 171int
172xfs_attr_get( 172xfs_attr_get(
173 xfs_inode_t *ip, 173 xfs_inode_t *ip,
174 const char *name, 174 const unsigned char *name,
175 char *value, 175 unsigned char *value,
176 int *valuelenp, 176 int *valuelenp,
177 int flags) 177 int flags)
178{ 178{
@@ -197,7 +197,7 @@ xfs_attr_get(
197/* 197/*
198 * Calculate how many blocks we need for the new attribute, 198 * Calculate how many blocks we need for the new attribute,
199 */ 199 */
200int 200STATIC int
201xfs_attr_calc_size( 201xfs_attr_calc_size(
202 struct xfs_inode *ip, 202 struct xfs_inode *ip,
203 int namelen, 203 int namelen,
@@ -235,8 +235,12 @@ xfs_attr_calc_size(
235} 235}
236 236
237STATIC int 237STATIC int
238xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, 238xfs_attr_set_int(
239 char *value, int valuelen, int flags) 239 struct xfs_inode *dp,
240 struct xfs_name *name,
241 unsigned char *value,
242 int valuelen,
243 int flags)
240{ 244{
241 xfs_da_args_t args; 245 xfs_da_args_t args;
242 xfs_fsblock_t firstblock; 246 xfs_fsblock_t firstblock;
@@ -452,8 +456,8 @@ out:
452int 456int
453xfs_attr_set( 457xfs_attr_set(
454 xfs_inode_t *dp, 458 xfs_inode_t *dp,
455 const char *name, 459 const unsigned char *name,
456 char *value, 460 unsigned char *value,
457 int valuelen, 461 int valuelen,
458 int flags) 462 int flags)
459{ 463{
@@ -600,7 +604,7 @@ out:
600int 604int
601xfs_attr_remove( 605xfs_attr_remove(
602 xfs_inode_t *dp, 606 xfs_inode_t *dp,
603 const char *name, 607 const unsigned char *name,
604 int flags) 608 int flags)
605{ 609{
606 int error; 610 int error;
@@ -669,9 +673,13 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
669 */ 673 */
670/*ARGSUSED*/ 674/*ARGSUSED*/
671STATIC int 675STATIC int
672xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags, 676xfs_attr_put_listent(
673 char *name, int namelen, 677 xfs_attr_list_context_t *context,
674 int valuelen, char *value) 678 int flags,
679 unsigned char *name,
680 int namelen,
681 int valuelen,
682 unsigned char *value)
675{ 683{
676 struct attrlist *alist = (struct attrlist *)context->alist; 684 struct attrlist *alist = (struct attrlist *)context->alist;
677 attrlist_ent_t *aep; 685 attrlist_ent_t *aep;
@@ -1980,7 +1988,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
1980 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE]; 1988 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
1981 xfs_mount_t *mp; 1989 xfs_mount_t *mp;
1982 xfs_daddr_t dblkno; 1990 xfs_daddr_t dblkno;
1983 xfs_caddr_t dst; 1991 void *dst;
1984 xfs_buf_t *bp; 1992 xfs_buf_t *bp;
1985 int nmap, error, tmp, valuelen, blkcnt, i; 1993 int nmap, error, tmp, valuelen, blkcnt, i;
1986 xfs_dablk_t lblkno; 1994 xfs_dablk_t lblkno;
@@ -2007,15 +2015,14 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2007 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 2015 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2008 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 2016 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2009 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, 2017 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2010 blkcnt, 2018 blkcnt, XBF_LOCK | XBF_DONT_BLOCK,
2011 XFS_BUF_LOCK | XBF_DONT_BLOCK,
2012 &bp); 2019 &bp);
2013 if (error) 2020 if (error)
2014 return(error); 2021 return(error);
2015 2022
2016 tmp = (valuelen < XFS_BUF_SIZE(bp)) 2023 tmp = (valuelen < XFS_BUF_SIZE(bp))
2017 ? valuelen : XFS_BUF_SIZE(bp); 2024 ? valuelen : XFS_BUF_SIZE(bp);
2018 xfs_biomove(bp, 0, tmp, dst, XFS_B_READ); 2025 xfs_biomove(bp, 0, tmp, dst, XBF_READ);
2019 xfs_buf_relse(bp); 2026 xfs_buf_relse(bp);
2020 dst += tmp; 2027 dst += tmp;
2021 valuelen -= tmp; 2028 valuelen -= tmp;
@@ -2039,7 +2046,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2039 xfs_inode_t *dp; 2046 xfs_inode_t *dp;
2040 xfs_bmbt_irec_t map; 2047 xfs_bmbt_irec_t map;
2041 xfs_daddr_t dblkno; 2048 xfs_daddr_t dblkno;
2042 xfs_caddr_t src; 2049 void *src;
2043 xfs_buf_t *bp; 2050 xfs_buf_t *bp;
2044 xfs_dablk_t lblkno; 2051 xfs_dablk_t lblkno;
2045 int blkcnt, valuelen, nmap, error, tmp, committed; 2052 int blkcnt, valuelen, nmap, error, tmp, committed;
@@ -2141,13 +2148,13 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2141 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2148 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2142 2149
2143 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 2150 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
2144 XFS_BUF_LOCK | XBF_DONT_BLOCK); 2151 XBF_LOCK | XBF_DONT_BLOCK);
2145 ASSERT(bp); 2152 ASSERT(bp);
2146 ASSERT(!XFS_BUF_GETERROR(bp)); 2153 ASSERT(!XFS_BUF_GETERROR(bp));
2147 2154
2148 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : 2155 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
2149 XFS_BUF_SIZE(bp); 2156 XFS_BUF_SIZE(bp);
2150 xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE); 2157 xfs_biomove(bp, 0, tmp, src, XBF_WRITE);
2151 if (tmp < XFS_BUF_SIZE(bp)) 2158 if (tmp < XFS_BUF_SIZE(bp))
2152 xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); 2159 xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
2153 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ 2160 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
@@ -2208,8 +2215,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2208 /* 2215 /*
2209 * If the "remote" value is in the cache, remove it. 2216 * If the "remote" value is in the cache, remove it.
2210 */ 2217 */
2211 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, 2218 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
2212 XFS_INCORE_TRYLOCK);
2213 if (bp) { 2219 if (bp) {
2214 XFS_BUF_STALE(bp); 2220 XFS_BUF_STALE(bp);
2215 XFS_BUF_UNDELAYWRITE(bp); 2221 XFS_BUF_UNDELAYWRITE(bp);
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 59b410ce69a1..e920d68ef509 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -113,7 +113,7 @@ typedef struct attrlist_cursor_kern {
113 113
114 114
115typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, 115typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
116 char *, int, int, char *); 116 unsigned char *, int, int, unsigned char *);
117 117
118typedef struct xfs_attr_list_context { 118typedef struct xfs_attr_list_context {
119 struct xfs_inode *dp; /* inode */ 119 struct xfs_inode *dp; /* inode */
@@ -139,7 +139,6 @@ typedef struct xfs_attr_list_context {
139/* 139/*
140 * Overall external interface routines. 140 * Overall external interface routines.
141 */ 141 */
142int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);
143int xfs_attr_inactive(struct xfs_inode *dp); 142int xfs_attr_inactive(struct xfs_inode *dp);
144int xfs_attr_rmtval_get(struct xfs_da_args *args); 143int xfs_attr_rmtval_get(struct xfs_da_args *args);
145int xfs_attr_list_int(struct xfs_attr_list_context *); 144int xfs_attr_list_int(struct xfs_attr_list_context *);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index baf41b5af756..a90ce74fc256 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -521,11 +521,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
521 521
522 sfe = &sf->list[0]; 522 sfe = &sf->list[0];
523 for (i = 0; i < sf->hdr.count; i++) { 523 for (i = 0; i < sf->hdr.count; i++) {
524 nargs.name = (char *)sfe->nameval; 524 nargs.name = sfe->nameval;
525 nargs.namelen = sfe->namelen; 525 nargs.namelen = sfe->namelen;
526 nargs.value = (char *)&sfe->nameval[nargs.namelen]; 526 nargs.value = &sfe->nameval[nargs.namelen];
527 nargs.valuelen = sfe->valuelen; 527 nargs.valuelen = sfe->valuelen;
528 nargs.hashval = xfs_da_hashname((char *)sfe->nameval, 528 nargs.hashval = xfs_da_hashname(sfe->nameval,
529 sfe->namelen); 529 sfe->namelen);
530 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); 530 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
531 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */ 531 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */
@@ -612,10 +612,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
612 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { 612 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
613 error = context->put_listent(context, 613 error = context->put_listent(context,
614 sfe->flags, 614 sfe->flags,
615 (char *)sfe->nameval, 615 sfe->nameval,
616 (int)sfe->namelen, 616 (int)sfe->namelen,
617 (int)sfe->valuelen, 617 (int)sfe->valuelen,
618 (char*)&sfe->nameval[sfe->namelen]); 618 &sfe->nameval[sfe->namelen]);
619 619
620 /* 620 /*
621 * Either search callback finished early or 621 * Either search callback finished early or
@@ -659,8 +659,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
659 } 659 }
660 660
661 sbp->entno = i; 661 sbp->entno = i;
662 sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); 662 sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen);
663 sbp->name = (char *)sfe->nameval; 663 sbp->name = sfe->nameval;
664 sbp->namelen = sfe->namelen; 664 sbp->namelen = sfe->namelen;
665 /* These are bytes, and both on-disk, don't endian-flip */ 665 /* These are bytes, and both on-disk, don't endian-flip */
666 sbp->valuelen = sfe->valuelen; 666 sbp->valuelen = sfe->valuelen;
@@ -818,9 +818,9 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
818 continue; 818 continue;
819 ASSERT(entry->flags & XFS_ATTR_LOCAL); 819 ASSERT(entry->flags & XFS_ATTR_LOCAL);
820 name_loc = xfs_attr_leaf_name_local(leaf, i); 820 name_loc = xfs_attr_leaf_name_local(leaf, i);
821 nargs.name = (char *)name_loc->nameval; 821 nargs.name = name_loc->nameval;
822 nargs.namelen = name_loc->namelen; 822 nargs.namelen = name_loc->namelen;
823 nargs.value = (char *)&name_loc->nameval[nargs.namelen]; 823 nargs.value = &name_loc->nameval[nargs.namelen];
824 nargs.valuelen = be16_to_cpu(name_loc->valuelen); 824 nargs.valuelen = be16_to_cpu(name_loc->valuelen);
825 nargs.hashval = be32_to_cpu(entry->hashval); 825 nargs.hashval = be32_to_cpu(entry->hashval);
826 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); 826 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags);
@@ -2370,10 +2370,10 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2370 2370
2371 retval = context->put_listent(context, 2371 retval = context->put_listent(context,
2372 entry->flags, 2372 entry->flags,
2373 (char *)name_loc->nameval, 2373 name_loc->nameval,
2374 (int)name_loc->namelen, 2374 (int)name_loc->namelen,
2375 be16_to_cpu(name_loc->valuelen), 2375 be16_to_cpu(name_loc->valuelen),
2376 (char *)&name_loc->nameval[name_loc->namelen]); 2376 &name_loc->nameval[name_loc->namelen]);
2377 if (retval) 2377 if (retval)
2378 return retval; 2378 return retval;
2379 } else { 2379 } else {
@@ -2397,15 +2397,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2397 return retval; 2397 return retval;
2398 retval = context->put_listent(context, 2398 retval = context->put_listent(context,
2399 entry->flags, 2399 entry->flags,
2400 (char *)name_rmt->name, 2400 name_rmt->name,
2401 (int)name_rmt->namelen, 2401 (int)name_rmt->namelen,
2402 valuelen, 2402 valuelen,
2403 (char*)args.value); 2403 args.value);
2404 kmem_free(args.value); 2404 kmem_free(args.value);
2405 } else { 2405 } else {
2406 retval = context->put_listent(context, 2406 retval = context->put_listent(context,
2407 entry->flags, 2407 entry->flags,
2408 (char *)name_rmt->name, 2408 name_rmt->name,
2409 (int)name_rmt->namelen, 2409 (int)name_rmt->namelen,
2410 valuelen, 2410 valuelen,
2411 NULL); 2411 NULL);
@@ -2950,7 +2950,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
2950 map.br_blockcount); 2950 map.br_blockcount);
2951 bp = xfs_trans_get_buf(*trans, 2951 bp = xfs_trans_get_buf(*trans,
2952 dp->i_mount->m_ddev_targp, 2952 dp->i_mount->m_ddev_targp,
2953 dblkno, dblkcnt, XFS_BUF_LOCK); 2953 dblkno, dblkcnt, XBF_LOCK);
2954 xfs_trans_binval(*trans, bp); 2954 xfs_trans_binval(*trans, bp);
2955 /* 2955 /*
2956 * Roll to next transaction. 2956 * Roll to next transaction.
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index 76ab7b0cbb3a..919756e3ba53 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -52,7 +52,7 @@ typedef struct xfs_attr_sf_sort {
52 __uint8_t valuelen; /* length of value */ 52 __uint8_t valuelen; /* length of value */
53 __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ 53 __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
54 xfs_dahash_t hash; /* this entry's hash value */ 54 xfs_dahash_t hash; /* this entry's hash value */
55 char *name; /* name value, pointer into buffer */ 55 unsigned char *name; /* name value, pointer into buffer */
56} xfs_attr_sf_sort_t; 56} xfs_attr_sf_sort_t;
57 57
58#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ 58#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 98251cdc52aa..1869fb973819 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2629,13 +2629,12 @@ xfs_bmap_btalloc(
2629 if (startag == NULLAGNUMBER) 2629 if (startag == NULLAGNUMBER)
2630 startag = ag = 0; 2630 startag = ag = 0;
2631 notinit = 0; 2631 notinit = 0;
2632 down_read(&mp->m_peraglock); 2632 pag = xfs_perag_get(mp, ag);
2633 while (blen < ap->alen) { 2633 while (blen < ap->alen) {
2634 pag = &mp->m_perag[ag];
2635 if (!pag->pagf_init && 2634 if (!pag->pagf_init &&
2636 (error = xfs_alloc_pagf_init(mp, args.tp, 2635 (error = xfs_alloc_pagf_init(mp, args.tp,
2637 ag, XFS_ALLOC_FLAG_TRYLOCK))) { 2636 ag, XFS_ALLOC_FLAG_TRYLOCK))) {
2638 up_read(&mp->m_peraglock); 2637 xfs_perag_put(pag);
2639 return error; 2638 return error;
2640 } 2639 }
2641 /* 2640 /*
@@ -2667,13 +2666,13 @@ xfs_bmap_btalloc(
2667 break; 2666 break;
2668 2667
2669 error = xfs_filestream_new_ag(ap, &ag); 2668 error = xfs_filestream_new_ag(ap, &ag);
2670 if (error) { 2669 xfs_perag_put(pag);
2671 up_read(&mp->m_peraglock); 2670 if (error)
2672 return error; 2671 return error;
2673 }
2674 2672
2675 /* loop again to set 'blen'*/ 2673 /* loop again to set 'blen'*/
2676 startag = NULLAGNUMBER; 2674 startag = NULLAGNUMBER;
2675 pag = xfs_perag_get(mp, ag);
2677 continue; 2676 continue;
2678 } 2677 }
2679 } 2678 }
@@ -2681,8 +2680,10 @@ xfs_bmap_btalloc(
2681 ag = 0; 2680 ag = 0;
2682 if (ag == startag) 2681 if (ag == startag)
2683 break; 2682 break;
2683 xfs_perag_put(pag);
2684 pag = xfs_perag_get(mp, ag);
2684 } 2685 }
2685 up_read(&mp->m_peraglock); 2686 xfs_perag_put(pag);
2686 /* 2687 /*
2687 * Since the above loop did a BUF_TRYLOCK, it is 2688 * Since the above loop did a BUF_TRYLOCK, it is
2688 * possible that there is space for this request. 2689 * possible that there is space for this request.
@@ -4470,7 +4471,7 @@ xfs_bmapi(
4470 xfs_fsblock_t abno; /* allocated block number */ 4471 xfs_fsblock_t abno; /* allocated block number */
4471 xfs_extlen_t alen; /* allocated extent length */ 4472 xfs_extlen_t alen; /* allocated extent length */
4472 xfs_fileoff_t aoff; /* allocated file offset */ 4473 xfs_fileoff_t aoff; /* allocated file offset */
4473 xfs_bmalloca_t bma; /* args for xfs_bmap_alloc */ 4474 xfs_bmalloca_t bma = { 0 }; /* args for xfs_bmap_alloc */
4474 xfs_btree_cur_t *cur; /* bmap btree cursor */ 4475 xfs_btree_cur_t *cur; /* bmap btree cursor */
4475 xfs_fileoff_t end; /* end of mapped file region */ 4476 xfs_fileoff_t end; /* end of mapped file region */
4476 int eof; /* we've hit the end of extents */ 4477 int eof; /* we've hit the end of extents */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 38751d5fac6f..416e47e54b83 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -334,7 +334,7 @@ xfs_bmbt_disk_set_allf(
334/* 334/*
335 * Set all the fields in a bmap extent record from the uncompressed form. 335 * Set all the fields in a bmap extent record from the uncompressed form.
336 */ 336 */
337void 337STATIC void
338xfs_bmbt_disk_set_all( 338xfs_bmbt_disk_set_all(
339 xfs_bmbt_rec_t *r, 339 xfs_bmbt_rec_t *r,
340 xfs_bmbt_irec_t *s) 340 xfs_bmbt_irec_t *s)
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index cf07ca7c22e7..0e66c4ea0f85 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -223,7 +223,6 @@ extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v);
223extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v); 223extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v);
224extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v); 224extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v);
225 225
226extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
227extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, 226extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
228 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 227 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
229 228
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 36a0992dd669..96be4b0f2496 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -977,7 +977,7 @@ xfs_btree_get_buf_block(
977 xfs_daddr_t d; 977 xfs_daddr_t d;
978 978
979 /* need to sort out how callers deal with failures first */ 979 /* need to sort out how callers deal with failures first */
980 ASSERT(!(flags & XFS_BUF_TRYLOCK)); 980 ASSERT(!(flags & XBF_TRYLOCK));
981 981
982 d = xfs_btree_ptr_to_daddr(cur, ptr); 982 d = xfs_btree_ptr_to_daddr(cur, ptr);
983 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 983 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
@@ -1008,7 +1008,7 @@ xfs_btree_read_buf_block(
1008 int error; 1008 int error;
1009 1009
1010 /* need to sort out how callers deal with failures first */ 1010 /* need to sort out how callers deal with failures first */
1011 ASSERT(!(flags & XFS_BUF_TRYLOCK)); 1011 ASSERT(!(flags & XBF_TRYLOCK));
1012 1012
1013 d = xfs_btree_ptr_to_daddr(cur, ptr); 1013 d = xfs_btree_ptr_to_daddr(cur, ptr);
1014 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1014 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a30f7e9eb2b9..f3c49e69eab9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -250,7 +250,7 @@ xfs_buf_item_format(
250 ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); 250 ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
251 vecp->i_addr = (xfs_caddr_t)&bip->bli_format; 251 vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
252 vecp->i_len = base_size; 252 vecp->i_len = base_size;
253 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); 253 vecp->i_type = XLOG_REG_TYPE_BFORMAT;
254 vecp++; 254 vecp++;
255 nvecs = 1; 255 nvecs = 1;
256 256
@@ -297,14 +297,14 @@ xfs_buf_item_format(
297 buffer_offset = first_bit * XFS_BLI_CHUNK; 297 buffer_offset = first_bit * XFS_BLI_CHUNK;
298 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 298 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
299 vecp->i_len = nbits * XFS_BLI_CHUNK; 299 vecp->i_len = nbits * XFS_BLI_CHUNK;
300 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); 300 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
301 nvecs++; 301 nvecs++;
302 break; 302 break;
303 } else if (next_bit != last_bit + 1) { 303 } else if (next_bit != last_bit + 1) {
304 buffer_offset = first_bit * XFS_BLI_CHUNK; 304 buffer_offset = first_bit * XFS_BLI_CHUNK;
305 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 305 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
306 vecp->i_len = nbits * XFS_BLI_CHUNK; 306 vecp->i_len = nbits * XFS_BLI_CHUNK;
307 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); 307 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
308 nvecs++; 308 nvecs++;
309 vecp++; 309 vecp++;
310 first_bit = next_bit; 310 first_bit = next_bit;
@@ -316,7 +316,7 @@ xfs_buf_item_format(
316 buffer_offset = first_bit * XFS_BLI_CHUNK; 316 buffer_offset = first_bit * XFS_BLI_CHUNK;
317 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 317 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
318 vecp->i_len = nbits * XFS_BLI_CHUNK; 318 vecp->i_len = nbits * XFS_BLI_CHUNK;
319 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); 319 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
320/* You would think we need to bump the nvecs here too, but we do not 320/* You would think we need to bump the nvecs here too, but we do not
321 * this number is used by recovery, and it gets confused by the boundary 321 * this number is used by recovery, and it gets confused by the boundary
322 * split here 322 * split here
@@ -467,8 +467,10 @@ xfs_buf_item_unpin_remove(
467/* 467/*
468 * This is called to attempt to lock the buffer associated with this 468 * This is called to attempt to lock the buffer associated with this
469 * buf log item. Don't sleep on the buffer lock. If we can't get 469 * buf log item. Don't sleep on the buffer lock. If we can't get
470 * the lock right away, return 0. If we can get the lock, pull the 470 * the lock right away, return 0. If we can get the lock, take a
471 * buffer from the free list, mark it busy, and return 1. 471 * reference to the buffer. If this is a delayed write buffer that
472 * needs AIL help to be written back, invoke the pushbuf routine
473 * rather than the normal success path.
472 */ 474 */
473STATIC uint 475STATIC uint
474xfs_buf_item_trylock( 476xfs_buf_item_trylock(
@@ -477,24 +479,18 @@ xfs_buf_item_trylock(
477 xfs_buf_t *bp; 479 xfs_buf_t *bp;
478 480
479 bp = bip->bli_buf; 481 bp = bip->bli_buf;
480 482 if (XFS_BUF_ISPINNED(bp))
481 if (XFS_BUF_ISPINNED(bp)) {
482 return XFS_ITEM_PINNED; 483 return XFS_ITEM_PINNED;
483 } 484 if (!XFS_BUF_CPSEMA(bp))
484
485 if (!XFS_BUF_CPSEMA(bp)) {
486 return XFS_ITEM_LOCKED; 485 return XFS_ITEM_LOCKED;
487 }
488 486
489 /* 487 /* take a reference to the buffer. */
490 * Remove the buffer from the free list. Only do this
491 * if it's on the free list. Private buffers like the
492 * superblock buffer are not.
493 */
494 XFS_BUF_HOLD(bp); 488 XFS_BUF_HOLD(bp);
495 489
496 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 490 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
497 trace_xfs_buf_item_trylock(bip); 491 trace_xfs_buf_item_trylock(bip);
492 if (XFS_BUF_ISDELAYWRITE(bp))
493 return XFS_ITEM_PUSHBUF;
498 return XFS_ITEM_SUCCESS; 494 return XFS_ITEM_SUCCESS;
499} 495}
500 496
@@ -626,11 +622,9 @@ xfs_buf_item_committed(
626} 622}
627 623
628/* 624/*
629 * This is called to asynchronously write the buffer associated with this 625 * The buffer is locked, but is not a delayed write buffer. This happens
630 * buf log item out to disk. The buffer will already have been locked by 626 * if we race with IO completion and hence we don't want to try to write it
631 * a successful call to xfs_buf_item_trylock(). If the buffer still has 627 * again. Just release the buffer.
632 * B_DELWRI set, then get it going out to disk with a call to bawrite().
633 * If not, then just release the buffer.
634 */ 628 */
635STATIC void 629STATIC void
636xfs_buf_item_push( 630xfs_buf_item_push(
@@ -642,17 +636,29 @@ xfs_buf_item_push(
642 trace_xfs_buf_item_push(bip); 636 trace_xfs_buf_item_push(bip);
643 637
644 bp = bip->bli_buf; 638 bp = bip->bli_buf;
639 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
640 xfs_buf_relse(bp);
641}
645 642
646 if (XFS_BUF_ISDELAYWRITE(bp)) { 643/*
647 int error; 644 * The buffer is locked and is a delayed write buffer. Promote the buffer
648 error = xfs_bawrite(bip->bli_item.li_mountp, bp); 645 * in the delayed write queue as the caller knows that they must invoke
649 if (error) 646 * the xfsbufd to get this buffer written. We have to unlock the buffer
650 xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, 647 * to allow the xfsbufd to write it, too.
651 "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", 648 */
652 error, bip, bp); 649STATIC void
653 } else { 650xfs_buf_item_pushbuf(
654 xfs_buf_relse(bp); 651 xfs_buf_log_item_t *bip)
655 } 652{
653 xfs_buf_t *bp;
654
655 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
656 trace_xfs_buf_item_pushbuf(bip);
657
658 bp = bip->bli_buf;
659 ASSERT(XFS_BUF_ISDELAYWRITE(bp));
660 xfs_buf_delwri_promote(bp);
661 xfs_buf_relse(bp);
656} 662}
657 663
658/* ARGSUSED */ 664/* ARGSUSED */
@@ -677,7 +683,7 @@ static struct xfs_item_ops xfs_buf_item_ops = {
677 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 683 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
678 xfs_buf_item_committed, 684 xfs_buf_item_committed,
679 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, 685 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
680 .iop_pushbuf = NULL, 686 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf,
681 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 687 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
682 xfs_buf_item_committing 688 xfs_buf_item_committing
683}; 689};
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index c0c8869115b1..0ca556b4bf31 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1534,8 +1534,8 @@ xfs_da_hashname(const __uint8_t *name, int namelen)
1534enum xfs_dacmp 1534enum xfs_dacmp
1535xfs_da_compname( 1535xfs_da_compname(
1536 struct xfs_da_args *args, 1536 struct xfs_da_args *args,
1537 const char *name, 1537 const unsigned char *name,
1538 int len) 1538 int len)
1539{ 1539{
1540 return (args->namelen == len && memcmp(args->name, name, len) == 0) ? 1540 return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1541 XFS_CMP_EXACT : XFS_CMP_DIFFERENT; 1541 XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 30cd08f56a3a..fe9f5a8c1d2a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -209,7 +209,8 @@ typedef struct xfs_da_state {
209 */ 209 */
210struct xfs_nameops { 210struct xfs_nameops {
211 xfs_dahash_t (*hashname)(struct xfs_name *); 211 xfs_dahash_t (*hashname)(struct xfs_name *);
212 enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int); 212 enum xfs_dacmp (*compname)(struct xfs_da_args *,
213 const unsigned char *, int);
213}; 214};
214 215
215 216
@@ -260,7 +261,7 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
260 261
261uint xfs_da_hashname(const __uint8_t *name_string, int name_length); 262uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
262enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, 263enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
263 const char *name, int len); 264 const unsigned char *name, int len);
264 265
265 266
266xfs_da_state_t *xfs_da_state_alloc(void); 267xfs_da_state_t *xfs_da_state_alloc(void);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 84ca1cf16a1e..cd27c9d6c71f 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -45,15 +45,21 @@
45#include "xfs_vnodeops.h" 45#include "xfs_vnodeops.h"
46#include "xfs_trace.h" 46#include "xfs_trace.h"
47 47
48
49static int xfs_swap_extents(
50 xfs_inode_t *ip, /* target inode */
51 xfs_inode_t *tip, /* tmp inode */
52 xfs_swapext_t *sxp);
53
48/* 54/*
49 * Syssgi interface for swapext 55 * ioctl interface for swapext
50 */ 56 */
51int 57int
52xfs_swapext( 58xfs_swapext(
53 xfs_swapext_t *sxp) 59 xfs_swapext_t *sxp)
54{ 60{
55 xfs_inode_t *ip, *tip; 61 xfs_inode_t *ip, *tip;
56 struct file *file, *target_file; 62 struct file *file, *tmp_file;
57 int error = 0; 63 int error = 0;
58 64
59 /* Pull information for the target fd */ 65 /* Pull information for the target fd */
@@ -68,46 +74,46 @@ xfs_swapext(
68 goto out_put_file; 74 goto out_put_file;
69 } 75 }
70 76
71 target_file = fget((int)sxp->sx_fdtmp); 77 tmp_file = fget((int)sxp->sx_fdtmp);
72 if (!target_file) { 78 if (!tmp_file) {
73 error = XFS_ERROR(EINVAL); 79 error = XFS_ERROR(EINVAL);
74 goto out_put_file; 80 goto out_put_file;
75 } 81 }
76 82
77 if (!(target_file->f_mode & FMODE_WRITE) || 83 if (!(tmp_file->f_mode & FMODE_WRITE) ||
78 (target_file->f_flags & O_APPEND)) { 84 (tmp_file->f_flags & O_APPEND)) {
79 error = XFS_ERROR(EBADF); 85 error = XFS_ERROR(EBADF);
80 goto out_put_target_file; 86 goto out_put_tmp_file;
81 } 87 }
82 88
83 if (IS_SWAPFILE(file->f_path.dentry->d_inode) || 89 if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
84 IS_SWAPFILE(target_file->f_path.dentry->d_inode)) { 90 IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) {
85 error = XFS_ERROR(EINVAL); 91 error = XFS_ERROR(EINVAL);
86 goto out_put_target_file; 92 goto out_put_tmp_file;
87 } 93 }
88 94
89 ip = XFS_I(file->f_path.dentry->d_inode); 95 ip = XFS_I(file->f_path.dentry->d_inode);
90 tip = XFS_I(target_file->f_path.dentry->d_inode); 96 tip = XFS_I(tmp_file->f_path.dentry->d_inode);
91 97
92 if (ip->i_mount != tip->i_mount) { 98 if (ip->i_mount != tip->i_mount) {
93 error = XFS_ERROR(EINVAL); 99 error = XFS_ERROR(EINVAL);
94 goto out_put_target_file; 100 goto out_put_tmp_file;
95 } 101 }
96 102
97 if (ip->i_ino == tip->i_ino) { 103 if (ip->i_ino == tip->i_ino) {
98 error = XFS_ERROR(EINVAL); 104 error = XFS_ERROR(EINVAL);
99 goto out_put_target_file; 105 goto out_put_tmp_file;
100 } 106 }
101 107
102 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 108 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
103 error = XFS_ERROR(EIO); 109 error = XFS_ERROR(EIO);
104 goto out_put_target_file; 110 goto out_put_tmp_file;
105 } 111 }
106 112
107 error = xfs_swap_extents(ip, tip, sxp); 113 error = xfs_swap_extents(ip, tip, sxp);
108 114
109 out_put_target_file: 115 out_put_tmp_file:
110 fput(target_file); 116 fput(tmp_file);
111 out_put_file: 117 out_put_file:
112 fput(file); 118 fput(file);
113 out: 119 out:
@@ -186,7 +192,7 @@ xfs_swap_extents_check_format(
186 return 0; 192 return 0;
187} 193}
188 194
189int 195static int
190xfs_swap_extents( 196xfs_swap_extents(
191 xfs_inode_t *ip, /* target inode */ 197 xfs_inode_t *ip, /* target inode */
192 xfs_inode_t *tip, /* tmp inode */ 198 xfs_inode_t *tip, /* tmp inode */
@@ -254,6 +260,9 @@ xfs_swap_extents(
254 goto out_unlock; 260 goto out_unlock;
255 } 261 }
256 262
263 trace_xfs_swap_extent_before(ip, 0);
264 trace_xfs_swap_extent_before(tip, 1);
265
257 /* check inode formats now that data is flushed */ 266 /* check inode formats now that data is flushed */
258 error = xfs_swap_extents_check_format(ip, tip); 267 error = xfs_swap_extents_check_format(ip, tip);
259 if (error) { 268 if (error) {
@@ -421,6 +430,8 @@ xfs_swap_extents(
421 430
422 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); 431 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
423 432
433 trace_xfs_swap_extent_after(ip, 0);
434 trace_xfs_swap_extent_after(tip, 1);
424out: 435out:
425 kmem_free(tempifp); 436 kmem_free(tempifp);
426 return error; 437 return error;
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h
index 4f55a6306558..20bdd935c121 100644
--- a/fs/xfs/xfs_dfrag.h
+++ b/fs/xfs/xfs_dfrag.h
@@ -48,9 +48,6 @@ typedef struct xfs_swapext
48 */ 48 */
49int xfs_swapext(struct xfs_swapext *sx); 49int xfs_swapext(struct xfs_swapext *sx);
50 50
51int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
52 struct xfs_swapext *sxp);
53
54#endif /* __KERNEL__ */ 51#endif /* __KERNEL__ */
55 52
56#endif /* __XFS_DFRAG_H__ */ 53#endif /* __XFS_DFRAG_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 93634a7e90e9..42520f041265 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -44,7 +44,7 @@
44#include "xfs_vnodeops.h" 44#include "xfs_vnodeops.h"
45#include "xfs_trace.h" 45#include "xfs_trace.h"
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2}; 47struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2};
48 48
49/* 49/*
50 * ASCII case-insensitive (ie. A-Z) support for directories that was 50 * ASCII case-insensitive (ie. A-Z) support for directories that was
@@ -66,8 +66,8 @@ xfs_ascii_ci_hashname(
66STATIC enum xfs_dacmp 66STATIC enum xfs_dacmp
67xfs_ascii_ci_compname( 67xfs_ascii_ci_compname(
68 struct xfs_da_args *args, 68 struct xfs_da_args *args,
69 const char *name, 69 const unsigned char *name,
70 int len) 70 int len)
71{ 71{
72 enum xfs_dacmp result; 72 enum xfs_dacmp result;
73 int i; 73 int i;
@@ -247,7 +247,7 @@ xfs_dir_createname(
247int 247int
248xfs_dir_cilookup_result( 248xfs_dir_cilookup_result(
249 struct xfs_da_args *args, 249 struct xfs_da_args *args,
250 const char *name, 250 const unsigned char *name,
251 int len) 251 int len)
252{ 252{
253 if (args->cmpresult == XFS_CMP_DIFFERENT) 253 if (args->cmpresult == XFS_CMP_DIFFERENT)
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 1d9ef96f33aa..74a3b1057685 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -100,7 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, 100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
101 struct xfs_dabuf *bp); 101 struct xfs_dabuf *bp);
102 102
103extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name, 103extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
104 int len); 104 const unsigned char *name, int len);
105 105
106#endif /* __XFS_DIR2_H__ */ 106#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index ddc4ecc7807f..779a267b0a84 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -57,8 +57,8 @@ static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot;
57void 57void
58xfs_dir_startup(void) 58xfs_dir_startup(void)
59{ 59{
60 xfs_dir_hash_dot = xfs_da_hashname(".", 1); 60 xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1);
61 xfs_dir_hash_dotdot = xfs_da_hashname("..", 2); 61 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
62} 62}
63 63
64/* 64/*
@@ -513,8 +513,9 @@ xfs_dir2_block_getdents(
513 /* 513 /*
514 * If it didn't fit, set the final offset to here & return. 514 * If it didn't fit, set the final offset to here & return.
515 */ 515 */
516 if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff, 516 if (filldir(dirent, (char *)dep->name, dep->namelen,
517 be64_to_cpu(dep->inumber), DT_UNKNOWN)) { 517 cook & 0x7fffffff, be64_to_cpu(dep->inumber),
518 DT_UNKNOWN)) {
518 *offset = cook & 0x7fffffff; 519 *offset = cook & 0x7fffffff;
519 xfs_da_brelse(NULL, bp); 520 xfs_da_brelse(NULL, bp);
520 return 0; 521 return 0;
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 29f484c11b3a..e2d89854ec9e 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -1081,7 +1081,7 @@ xfs_dir2_leaf_getdents(
1081 dep = (xfs_dir2_data_entry_t *)ptr; 1081 dep = (xfs_dir2_data_entry_t *)ptr;
1082 length = xfs_dir2_data_entsize(dep->namelen); 1082 length = xfs_dir2_data_entsize(dep->namelen);
1083 1083
1084 if (filldir(dirent, dep->name, dep->namelen, 1084 if (filldir(dirent, (char *)dep->name, dep->namelen,
1085 xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, 1085 xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
1086 be64_to_cpu(dep->inumber), DT_UNKNOWN)) 1086 be64_to_cpu(dep->inumber), DT_UNKNOWN))
1087 break; 1087 break;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index ce6e355199b5..78fc4d9ae756 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -65,7 +65,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
65/* 65/*
66 * Log entries from a freespace block. 66 * Log entries from a freespace block.
67 */ 67 */
68void 68STATIC void
69xfs_dir2_free_log_bests( 69xfs_dir2_free_log_bests(
70 xfs_trans_t *tp, /* transaction pointer */ 70 xfs_trans_t *tp, /* transaction pointer */
71 xfs_dabuf_t *bp, /* freespace buffer */ 71 xfs_dabuf_t *bp, /* freespace buffer */
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index dde72db3d695..82dfe7147195 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -75,8 +75,6 @@ xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
75 return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)); 75 return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
76} 76}
77 77
78extern void xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
79 int first, int last);
80extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, 78extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
81 struct xfs_dabuf *lbp); 79 struct xfs_dabuf *lbp);
82extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); 80extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 9d4f17a69676..c1a5945d463a 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -782,7 +782,7 @@ xfs_dir2_sf_getdents(
782 } 782 }
783 783
784 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); 784 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
785 if (filldir(dirent, sfep->name, sfep->namelen, 785 if (filldir(dirent, (char *)sfep->name, sfep->namelen,
786 off & 0x7fffffff, ino, DT_UNKNOWN)) { 786 off & 0x7fffffff, ino, DT_UNKNOWN)) {
787 *offset = off & 0x7fffffff; 787 *offset = off & 0x7fffffff;
788 return 0; 788 return 0;
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 05a4bdd4be39..6f35ed1b39b9 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -82,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
82 82
83 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); 83 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format);
84 log_vector->i_len = size; 84 log_vector->i_len = size;
85 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT); 85 log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
86 ASSERT(size >= sizeof(xfs_efi_log_format_t)); 86 ASSERT(size >= sizeof(xfs_efi_log_format_t));
87} 87}
88 88
@@ -406,7 +406,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp,
406 406
407 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); 407 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format);
408 log_vector->i_len = size; 408 log_vector->i_len = size;
409 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT); 409 log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
410 ASSERT(size >= sizeof(xfs_efd_log_format_t)); 410 ASSERT(size >= sizeof(xfs_efd_log_format_t));
411} 411}
412 412
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a631e1451abb..390850ee6603 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -140,6 +140,7 @@ _xfs_filestream_pick_ag(
140 int flags, 140 int flags,
141 xfs_extlen_t minlen) 141 xfs_extlen_t minlen)
142{ 142{
143 int streams, max_streams;
143 int err, trylock, nscan; 144 int err, trylock, nscan;
144 xfs_extlen_t longest, free, minfree, maxfree = 0; 145 xfs_extlen_t longest, free, minfree, maxfree = 0;
145 xfs_agnumber_t ag, max_ag = NULLAGNUMBER; 146 xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
@@ -155,15 +156,15 @@ _xfs_filestream_pick_ag(
155 trylock = XFS_ALLOC_FLAG_TRYLOCK; 156 trylock = XFS_ALLOC_FLAG_TRYLOCK;
156 157
157 for (nscan = 0; 1; nscan++) { 158 for (nscan = 0; 1; nscan++) {
158 159 pag = xfs_perag_get(mp, ag);
159 TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag)); 160 TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
160
161 pag = mp->m_perag + ag;
162 161
163 if (!pag->pagf_init) { 162 if (!pag->pagf_init) {
164 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); 163 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
165 if (err && !trylock) 164 if (err && !trylock) {
165 xfs_perag_put(pag);
166 return err; 166 return err;
167 }
167 } 168 }
168 169
169 /* Might fail sometimes during the 1st pass with trylock set. */ 170 /* Might fail sometimes during the 1st pass with trylock set. */
@@ -173,6 +174,7 @@ _xfs_filestream_pick_ag(
173 /* Keep track of the AG with the most free blocks. */ 174 /* Keep track of the AG with the most free blocks. */
174 if (pag->pagf_freeblks > maxfree) { 175 if (pag->pagf_freeblks > maxfree) {
175 maxfree = pag->pagf_freeblks; 176 maxfree = pag->pagf_freeblks;
177 max_streams = atomic_read(&pag->pagf_fstrms);
176 max_ag = ag; 178 max_ag = ag;
177 } 179 }
178 180
@@ -195,6 +197,8 @@ _xfs_filestream_pick_ag(
195 197
196 /* Break out, retaining the reference on the AG. */ 198 /* Break out, retaining the reference on the AG. */
197 free = pag->pagf_freeblks; 199 free = pag->pagf_freeblks;
200 streams = atomic_read(&pag->pagf_fstrms);
201 xfs_perag_put(pag);
198 *agp = ag; 202 *agp = ag;
199 break; 203 break;
200 } 204 }
@@ -202,6 +206,7 @@ _xfs_filestream_pick_ag(
202 /* Drop the reference on this AG, it's not usable. */ 206 /* Drop the reference on this AG, it's not usable. */
203 xfs_filestream_put_ag(mp, ag); 207 xfs_filestream_put_ag(mp, ag);
204next_ag: 208next_ag:
209 xfs_perag_put(pag);
205 /* Move to the next AG, wrapping to AG 0 if necessary. */ 210 /* Move to the next AG, wrapping to AG 0 if necessary. */
206 if (++ag >= mp->m_sb.sb_agcount) 211 if (++ag >= mp->m_sb.sb_agcount)
207 ag = 0; 212 ag = 0;
@@ -229,6 +234,7 @@ next_ag:
229 if (max_ag != NULLAGNUMBER) { 234 if (max_ag != NULLAGNUMBER) {
230 xfs_filestream_get_ag(mp, max_ag); 235 xfs_filestream_get_ag(mp, max_ag);
231 TRACE_AG_PICK1(mp, max_ag, maxfree); 236 TRACE_AG_PICK1(mp, max_ag, maxfree);
237 streams = max_streams;
232 free = maxfree; 238 free = maxfree;
233 *agp = max_ag; 239 *agp = max_ag;
234 break; 240 break;
@@ -240,16 +246,14 @@ next_ag:
240 return 0; 246 return 0;
241 } 247 }
242 248
243 TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp), 249 TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
244 free, nscan, flags);
245 250
246 return 0; 251 return 0;
247} 252}
248 253
249/* 254/*
250 * Set the allocation group number for a file or a directory, updating inode 255 * Set the allocation group number for a file or a directory, updating inode
251 * references and per-AG references as appropriate. Must be called with the 256 * references and per-AG references as appropriate.
252 * m_peraglock held in read mode.
253 */ 257 */
254static int 258static int
255_xfs_filestream_update_ag( 259_xfs_filestream_update_ag(
@@ -451,20 +455,6 @@ xfs_filestream_unmount(
451} 455}
452 456
453/* 457/*
454 * If the mount point's m_perag array is going to be reallocated, all
455 * outstanding cache entries must be flushed to avoid accessing reference count
456 * addresses that have been freed. The call to xfs_filestream_flush() must be
457 * made inside the block that holds the m_peraglock in write mode to do the
458 * reallocation.
459 */
460void
461xfs_filestream_flush(
462 xfs_mount_t *mp)
463{
464 xfs_mru_cache_flush(mp->m_filestream);
465}
466
467/*
468 * Return the AG of the filestream the file or directory belongs to, or 458 * Return the AG of the filestream the file or directory belongs to, or
469 * NULLAGNUMBER otherwise. 459 * NULLAGNUMBER otherwise.
470 */ 460 */
@@ -526,7 +516,6 @@ xfs_filestream_associate(
526 516
527 mp = pip->i_mount; 517 mp = pip->i_mount;
528 cache = mp->m_filestream; 518 cache = mp->m_filestream;
529 down_read(&mp->m_peraglock);
530 519
531 /* 520 /*
532 * We have a problem, Houston. 521 * We have a problem, Houston.
@@ -543,10 +532,8 @@ xfs_filestream_associate(
543 * 532 *
544 * So, if we can't get the iolock without sleeping then just give up 533 * So, if we can't get the iolock without sleeping then just give up
545 */ 534 */
546 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) { 535 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
547 up_read(&mp->m_peraglock);
548 return 1; 536 return 1;
549 }
550 537
551 /* If the parent directory is already in the cache, use its AG. */ 538 /* If the parent directory is already in the cache, use its AG. */
552 item = xfs_mru_cache_lookup(cache, pip->i_ino); 539 item = xfs_mru_cache_lookup(cache, pip->i_ino);
@@ -601,7 +588,6 @@ exit_did_pick:
601 588
602exit: 589exit:
603 xfs_iunlock(pip, XFS_IOLOCK_EXCL); 590 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
604 up_read(&mp->m_peraglock);
605 return -err; 591 return -err;
606} 592}
607 593
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 4aba67c5f64f..260f757bbc5d 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -79,12 +79,21 @@ extern ktrace_t *xfs_filestreams_trace_buf;
79 * the cache that reference per-ag array elements that have since been 79 * the cache that reference per-ag array elements that have since been
80 * reallocated. 80 * reallocated.
81 */ 81 */
82/*
83 * xfs_filestream_peek_ag is only used in tracing code
84 */
82static inline int 85static inline int
83xfs_filestream_peek_ag( 86xfs_filestream_peek_ag(
84 xfs_mount_t *mp, 87 xfs_mount_t *mp,
85 xfs_agnumber_t agno) 88 xfs_agnumber_t agno)
86{ 89{
87 return atomic_read(&mp->m_perag[agno].pagf_fstrms); 90 struct xfs_perag *pag;
91 int ret;
92
93 pag = xfs_perag_get(mp, agno);
94 ret = atomic_read(&pag->pagf_fstrms);
95 xfs_perag_put(pag);
96 return ret;
88} 97}
89 98
90static inline int 99static inline int
@@ -92,7 +101,13 @@ xfs_filestream_get_ag(
92 xfs_mount_t *mp, 101 xfs_mount_t *mp,
93 xfs_agnumber_t agno) 102 xfs_agnumber_t agno)
94{ 103{
95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms); 104 struct xfs_perag *pag;
105 int ret;
106
107 pag = xfs_perag_get(mp, agno);
108 ret = atomic_inc_return(&pag->pagf_fstrms);
109 xfs_perag_put(pag);
110 return ret;
96} 111}
97 112
98static inline int 113static inline int
@@ -100,7 +115,13 @@ xfs_filestream_put_ag(
100 xfs_mount_t *mp, 115 xfs_mount_t *mp,
101 xfs_agnumber_t agno) 116 xfs_agnumber_t agno)
102{ 117{
103 return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms); 118 struct xfs_perag *pag;
119 int ret;
120
121 pag = xfs_perag_get(mp, agno);
122 ret = atomic_dec_return(&pag->pagf_fstrms);
123 xfs_perag_put(pag);
124 return ret;
104} 125}
105 126
106/* allocation selection flags */ 127/* allocation selection flags */
@@ -114,7 +135,6 @@ int xfs_filestream_init(void);
114void xfs_filestream_uninit(void); 135void xfs_filestream_uninit(void);
115int xfs_filestream_mount(struct xfs_mount *mp); 136int xfs_filestream_mount(struct xfs_mount *mp);
116void xfs_filestream_unmount(struct xfs_mount *mp); 137void xfs_filestream_unmount(struct xfs_mount *mp);
117void xfs_filestream_flush(struct xfs_mount *mp);
118xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); 138xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
119int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); 139int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
120void xfs_filestream_deassociate(struct xfs_inode *ip); 140void xfs_filestream_deassociate(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a13919a6a364..37a6f62c57b6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,27 +167,14 @@ xfs_growfs_data_private(
167 } 167 }
168 new = nb - mp->m_sb.sb_dblocks; 168 new = nb - mp->m_sb.sb_dblocks;
169 oagcount = mp->m_sb.sb_agcount; 169 oagcount = mp->m_sb.sb_agcount;
170 if (nagcount > oagcount) {
171 void *new_perag, *old_perag;
172
173 xfs_filestream_flush(mp);
174
175 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
176 KM_MAYFAIL);
177 if (!new_perag)
178 return XFS_ERROR(ENOMEM);
179
180 down_write(&mp->m_peraglock);
181 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
182 old_perag = mp->m_perag;
183 mp->m_perag = new_perag;
184
185 mp->m_flags |= XFS_MOUNT_32BITINODES;
186 nagimax = xfs_initialize_perag(mp, nagcount);
187 up_write(&mp->m_peraglock);
188 170
189 kmem_free(old_perag); 171 /* allocate the new per-ag structures */
172 if (nagcount > oagcount) {
173 error = xfs_initialize_perag(mp, nagcount, &nagimax);
174 if (error)
175 return error;
190 } 176 }
177
191 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 178 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
192 tp->t_flags |= XFS_TRANS_RESERVE; 179 tp->t_flags |= XFS_TRANS_RESERVE;
193 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), 180 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
@@ -196,6 +183,11 @@ xfs_growfs_data_private(
196 return error; 183 return error;
197 } 184 }
198 185
186 /*
187 * Write new AG headers to disk. Non-transactional, but written
188 * synchronously so they are completed prior to the growfs transaction
189 * being logged.
190 */
199 nfree = 0; 191 nfree = 0;
200 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { 192 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
201 /* 193 /*
@@ -359,6 +351,12 @@ xfs_growfs_data_private(
359 goto error0; 351 goto error0;
360 } 352 }
361 } 353 }
354
355 /*
356 * Update changed superblock fields transactionally. These are not
357 * seen by the rest of the world until the transaction commit applies
358 * them atomically to the superblock.
359 */
362 if (nagcount > oagcount) 360 if (nagcount > oagcount)
363 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); 361 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
364 if (nb > mp->m_sb.sb_dblocks) 362 if (nb > mp->m_sb.sb_dblocks)
@@ -369,9 +367,9 @@ xfs_growfs_data_private(
369 if (dpct) 367 if (dpct)
370 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 368 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
371 error = xfs_trans_commit(tp, 0); 369 error = xfs_trans_commit(tp, 0);
372 if (error) { 370 if (error)
373 return error; 371 return error;
374 } 372
375 /* New allocation groups fully initialized, so update mount struct */ 373 /* New allocation groups fully initialized, so update mount struct */
376 if (nagimax) 374 if (nagimax)
377 mp->m_maxagi = nagimax; 375 mp->m_maxagi = nagimax;
@@ -381,6 +379,8 @@ xfs_growfs_data_private(
381 mp->m_maxicount = icount << mp->m_sb.sb_inopblog; 379 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
382 } else 380 } else
383 mp->m_maxicount = 0; 381 mp->m_maxicount = 0;
382
383 /* update secondary superblocks. */
384 for (agno = 1; agno < nagcount; agno++) { 384 for (agno = 1; agno < nagcount; agno++) {
385 error = xfs_read_buf(mp, mp->m_ddev_targp, 385 error = xfs_read_buf(mp, mp->m_ddev_targp,
386 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 386 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index cb907ba69c4c..9d884c127bb9 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -205,7 +205,7 @@ xfs_ialloc_inode_init(
205 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); 205 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
206 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 206 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
207 mp->m_bsize * blks_per_cluster, 207 mp->m_bsize * blks_per_cluster,
208 XFS_BUF_LOCK); 208 XBF_LOCK);
209 ASSERT(fbuf); 209 ASSERT(fbuf);
210 ASSERT(!XFS_BUF_GETERROR(fbuf)); 210 ASSERT(!XFS_BUF_GETERROR(fbuf));
211 211
@@ -253,6 +253,7 @@ xfs_ialloc_ag_alloc(
253 xfs_agino_t thisino; /* current inode number, for loop */ 253 xfs_agino_t thisino; /* current inode number, for loop */
254 int isaligned = 0; /* inode allocation at stripe unit */ 254 int isaligned = 0; /* inode allocation at stripe unit */
255 /* boundary */ 255 /* boundary */
256 struct xfs_perag *pag;
256 257
257 args.tp = tp; 258 args.tp = tp;
258 args.mp = tp->t_mountp; 259 args.mp = tp->t_mountp;
@@ -382,9 +383,9 @@ xfs_ialloc_ag_alloc(
382 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 383 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
383 be32_add_cpu(&agi->agi_count, newlen); 384 be32_add_cpu(&agi->agi_count, newlen);
384 be32_add_cpu(&agi->agi_freecount, newlen); 385 be32_add_cpu(&agi->agi_freecount, newlen);
385 down_read(&args.mp->m_peraglock); 386 pag = xfs_perag_get(args.mp, agno);
386 args.mp->m_perag[agno].pagi_freecount += newlen; 387 pag->pagi_freecount += newlen;
387 up_read(&args.mp->m_peraglock); 388 xfs_perag_put(pag);
388 agi->agi_newino = cpu_to_be32(newino); 389 agi->agi_newino = cpu_to_be32(newino);
389 390
390 /* 391 /*
@@ -486,9 +487,8 @@ xfs_ialloc_ag_select(
486 */ 487 */
487 agno = pagno; 488 agno = pagno;
488 flags = XFS_ALLOC_FLAG_TRYLOCK; 489 flags = XFS_ALLOC_FLAG_TRYLOCK;
489 down_read(&mp->m_peraglock);
490 for (;;) { 490 for (;;) {
491 pag = &mp->m_perag[agno]; 491 pag = xfs_perag_get(mp, agno);
492 if (!pag->pagi_init) { 492 if (!pag->pagi_init) {
493 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 493 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
494 agbp = NULL; 494 agbp = NULL;
@@ -527,7 +527,7 @@ xfs_ialloc_ag_select(
527 agbp = NULL; 527 agbp = NULL;
528 goto nextag; 528 goto nextag;
529 } 529 }
530 up_read(&mp->m_peraglock); 530 xfs_perag_put(pag);
531 return agbp; 531 return agbp;
532 } 532 }
533 } 533 }
@@ -535,22 +535,19 @@ unlock_nextag:
535 if (agbp) 535 if (agbp)
536 xfs_trans_brelse(tp, agbp); 536 xfs_trans_brelse(tp, agbp);
537nextag: 537nextag:
538 xfs_perag_put(pag);
538 /* 539 /*
539 * No point in iterating over the rest, if we're shutting 540 * No point in iterating over the rest, if we're shutting
540 * down. 541 * down.
541 */ 542 */
542 if (XFS_FORCED_SHUTDOWN(mp)) { 543 if (XFS_FORCED_SHUTDOWN(mp))
543 up_read(&mp->m_peraglock);
544 return NULL; 544 return NULL;
545 }
546 agno++; 545 agno++;
547 if (agno >= agcount) 546 if (agno >= agcount)
548 agno = 0; 547 agno = 0;
549 if (agno == pagno) { 548 if (agno == pagno) {
550 if (flags == 0) { 549 if (flags == 0)
551 up_read(&mp->m_peraglock);
552 return NULL; 550 return NULL;
553 }
554 flags = 0; 551 flags = 0;
555 } 552 }
556 } 553 }
@@ -672,6 +669,7 @@ xfs_dialloc(
672 xfs_agnumber_t tagno; /* testing allocation group number */ 669 xfs_agnumber_t tagno; /* testing allocation group number */
673 xfs_btree_cur_t *tcur; /* temp cursor */ 670 xfs_btree_cur_t *tcur; /* temp cursor */
674 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ 671 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
672 struct xfs_perag *pag;
675 673
676 674
677 if (*IO_agbp == NULL) { 675 if (*IO_agbp == NULL) {
@@ -771,13 +769,13 @@ nextag:
771 *inop = NULLFSINO; 769 *inop = NULLFSINO;
772 return noroom ? ENOSPC : 0; 770 return noroom ? ENOSPC : 0;
773 } 771 }
774 down_read(&mp->m_peraglock); 772 pag = xfs_perag_get(mp, tagno);
775 if (mp->m_perag[tagno].pagi_inodeok == 0) { 773 if (pag->pagi_inodeok == 0) {
776 up_read(&mp->m_peraglock); 774 xfs_perag_put(pag);
777 goto nextag; 775 goto nextag;
778 } 776 }
779 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); 777 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
780 up_read(&mp->m_peraglock); 778 xfs_perag_put(pag);
781 if (error) 779 if (error)
782 goto nextag; 780 goto nextag;
783 agi = XFS_BUF_TO_AGI(agbp); 781 agi = XFS_BUF_TO_AGI(agbp);
@@ -790,6 +788,7 @@ nextag:
790 */ 788 */
791 agno = tagno; 789 agno = tagno;
792 *IO_agbp = NULL; 790 *IO_agbp = NULL;
791 pag = xfs_perag_get(mp, agno);
793 792
794 restart_pagno: 793 restart_pagno:
795 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); 794 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
@@ -808,7 +807,6 @@ nextag:
808 * If in the same AG as the parent, try to get near the parent. 807 * If in the same AG as the parent, try to get near the parent.
809 */ 808 */
810 if (pagno == agno) { 809 if (pagno == agno) {
811 xfs_perag_t *pag = &mp->m_perag[agno];
812 int doneleft; /* done, to the left */ 810 int doneleft; /* done, to the left */
813 int doneright; /* done, to the right */ 811 int doneright; /* done, to the right */
814 int searchdistance = 10; 812 int searchdistance = 10;
@@ -1006,9 +1004,7 @@ alloc_inode:
1006 goto error0; 1004 goto error0;
1007 be32_add_cpu(&agi->agi_freecount, -1); 1005 be32_add_cpu(&agi->agi_freecount, -1);
1008 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1006 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1009 down_read(&mp->m_peraglock); 1007 pag->pagi_freecount--;
1010 mp->m_perag[tagno].pagi_freecount--;
1011 up_read(&mp->m_peraglock);
1012 1008
1013 error = xfs_check_agi_freecount(cur, agi); 1009 error = xfs_check_agi_freecount(cur, agi);
1014 if (error) 1010 if (error)
@@ -1016,12 +1012,14 @@ alloc_inode:
1016 1012
1017 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1013 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1018 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 1014 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
1015 xfs_perag_put(pag);
1019 *inop = ino; 1016 *inop = ino;
1020 return 0; 1017 return 0;
1021error1: 1018error1:
1022 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 1019 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1023error0: 1020error0:
1024 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1021 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1022 xfs_perag_put(pag);
1025 return error; 1023 return error;
1026} 1024}
1027 1025
@@ -1052,6 +1050,7 @@ xfs_difree(
1052 xfs_mount_t *mp; /* mount structure for filesystem */ 1050 xfs_mount_t *mp; /* mount structure for filesystem */
1053 int off; /* offset of inode in inode chunk */ 1051 int off; /* offset of inode in inode chunk */
1054 xfs_inobt_rec_incore_t rec; /* btree record */ 1052 xfs_inobt_rec_incore_t rec; /* btree record */
1053 struct xfs_perag *pag;
1055 1054
1056 mp = tp->t_mountp; 1055 mp = tp->t_mountp;
1057 1056
@@ -1088,9 +1087,7 @@ xfs_difree(
1088 /* 1087 /*
1089 * Get the allocation group header. 1088 * Get the allocation group header.
1090 */ 1089 */
1091 down_read(&mp->m_peraglock);
1092 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1090 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1093 up_read(&mp->m_peraglock);
1094 if (error) { 1091 if (error) {
1095 cmn_err(CE_WARN, 1092 cmn_err(CE_WARN,
1096 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", 1093 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
@@ -1157,9 +1154,9 @@ xfs_difree(
1157 be32_add_cpu(&agi->agi_count, -ilen); 1154 be32_add_cpu(&agi->agi_count, -ilen);
1158 be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1155 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
1159 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1156 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
1160 down_read(&mp->m_peraglock); 1157 pag = xfs_perag_get(mp, agno);
1161 mp->m_perag[agno].pagi_freecount -= ilen - 1; 1158 pag->pagi_freecount -= ilen - 1;
1162 up_read(&mp->m_peraglock); 1159 xfs_perag_put(pag);
1163 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1160 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1164 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1161 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1165 1162
@@ -1188,9 +1185,9 @@ xfs_difree(
1188 */ 1185 */
1189 be32_add_cpu(&agi->agi_freecount, 1); 1186 be32_add_cpu(&agi->agi_freecount, 1);
1190 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1187 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1191 down_read(&mp->m_peraglock); 1188 pag = xfs_perag_get(mp, agno);
1192 mp->m_perag[agno].pagi_freecount++; 1189 pag->pagi_freecount++;
1193 up_read(&mp->m_peraglock); 1190 xfs_perag_put(pag);
1194 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1191 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1195 } 1192 }
1196 1193
@@ -1312,9 +1309,7 @@ xfs_imap(
1312 xfs_buf_t *agbp; /* agi buffer */ 1309 xfs_buf_t *agbp; /* agi buffer */
1313 int i; /* temp state */ 1310 int i; /* temp state */
1314 1311
1315 down_read(&mp->m_peraglock);
1316 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1312 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1317 up_read(&mp->m_peraglock);
1318 if (error) { 1313 if (error) {
1319 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1314 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1320 "xfs_ialloc_read_agi() returned " 1315 "xfs_ialloc_read_agi() returned "
@@ -1379,7 +1374,6 @@ xfs_imap(
1379 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1374 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1380 return XFS_ERROR(EINVAL); 1375 return XFS_ERROR(EINVAL);
1381 } 1376 }
1382
1383 return 0; 1377 return 0;
1384} 1378}
1385 1379
@@ -1523,8 +1517,7 @@ xfs_ialloc_read_agi(
1523 return error; 1517 return error;
1524 1518
1525 agi = XFS_BUF_TO_AGI(*bpp); 1519 agi = XFS_BUF_TO_AGI(*bpp);
1526 pag = &mp->m_perag[agno]; 1520 pag = xfs_perag_get(mp, agno);
1527
1528 if (!pag->pagi_init) { 1521 if (!pag->pagi_init) {
1529 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1522 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1530 pag->pagi_count = be32_to_cpu(agi->agi_count); 1523 pag->pagi_count = be32_to_cpu(agi->agi_count);
@@ -1537,6 +1530,7 @@ xfs_ialloc_read_agi(
1537 */ 1530 */
1538 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 1531 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1539 XFS_FORCED_SHUTDOWN(mp)); 1532 XFS_FORCED_SHUTDOWN(mp));
1533 xfs_perag_put(pag);
1540 return 0; 1534 return 0;
1541} 1535}
1542 1536
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 155e798f30a1..e281eb4a1c49 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -374,7 +374,7 @@ xfs_iget(
374 return EINVAL; 374 return EINVAL;
375 375
376 /* get the perag structure and ensure that it's inode capable */ 376 /* get the perag structure and ensure that it's inode capable */
377 pag = xfs_get_perag(mp, ino); 377 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
378 if (!pag->pagi_inodeok) 378 if (!pag->pagi_inodeok)
379 return EINVAL; 379 return EINVAL;
380 ASSERT(pag->pag_ici_init); 380 ASSERT(pag->pag_ici_init);
@@ -398,7 +398,7 @@ again:
398 if (error) 398 if (error)
399 goto out_error_or_again; 399 goto out_error_or_again;
400 } 400 }
401 xfs_put_perag(mp, pag); 401 xfs_perag_put(pag);
402 402
403 *ipp = ip; 403 *ipp = ip;
404 404
@@ -417,7 +417,7 @@ out_error_or_again:
417 delay(1); 417 delay(1);
418 goto again; 418 goto again;
419 } 419 }
420 xfs_put_perag(mp, pag); 420 xfs_perag_put(pag);
421 return error; 421 return error;
422} 422}
423 423
@@ -488,12 +488,12 @@ xfs_ireclaim(
488 * added to the tree assert that it's been there before to catch 488 * added to the tree assert that it's been there before to catch
489 * problems with the inode life time early on. 489 * problems with the inode life time early on.
490 */ 490 */
491 pag = xfs_get_perag(mp, ip->i_ino); 491 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
492 write_lock(&pag->pag_ici_lock); 492 write_lock(&pag->pag_ici_lock);
493 if (!radix_tree_delete(&pag->pag_ici_root, agino)) 493 if (!radix_tree_delete(&pag->pag_ici_root, agino))
494 ASSERT(0); 494 ASSERT(0);
495 write_unlock(&pag->pag_ici_lock); 495 write_unlock(&pag->pag_ici_lock);
496 xfs_put_perag(mp, pag); 496 xfs_perag_put(pag);
497 497
498 /* 498 /*
499 * Here we do an (almost) spurious inode lock in order to coordinate 499 * Here we do an (almost) spurious inode lock in order to coordinate
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ef77fd88c8e3..fa31360046d4 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -151,7 +151,7 @@ xfs_imap_to_bp(
151 "an error %d on %s. Returning error.", 151 "an error %d on %s. Returning error.",
152 error, mp->m_fsname); 152 error, mp->m_fsname);
153 } else { 153 } else {
154 ASSERT(buf_flags & XFS_BUF_TRYLOCK); 154 ASSERT(buf_flags & XBF_TRYLOCK);
155 } 155 }
156 return error; 156 return error;
157 } 157 }
@@ -239,7 +239,7 @@ xfs_inotobp(
239 if (error) 239 if (error)
240 return error; 240 return error;
241 241
242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); 242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
243 if (error) 243 if (error)
244 return error; 244 return error;
245 245
@@ -285,7 +285,7 @@ xfs_itobp(
285 return error; 285 return error;
286 286
287 if (!bp) { 287 if (!bp) {
288 ASSERT(buf_flags & XFS_BUF_TRYLOCK); 288 ASSERT(buf_flags & XBF_TRYLOCK);
289 ASSERT(tp == NULL); 289 ASSERT(tp == NULL);
290 *bpp = NULL; 290 *bpp = NULL;
291 return EAGAIN; 291 return EAGAIN;
@@ -807,7 +807,7 @@ xfs_iread(
807 * Get pointers to the on-disk inode and the buffer containing it. 807 * Get pointers to the on-disk inode and the buffer containing it.
808 */ 808 */
809 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 809 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
810 XFS_BUF_LOCK, iget_flags); 810 XBF_LOCK, iget_flags);
811 if (error) 811 if (error)
812 return error; 812 return error;
813 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 813 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
@@ -1751,7 +1751,7 @@ xfs_iunlink(
1751 * Here we put the head pointer into our next pointer, 1751 * Here we put the head pointer into our next pointer,
1752 * and then we fall through to point the head at us. 1752 * and then we fall through to point the head at us.
1753 */ 1753 */
1754 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1754 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1755 if (error) 1755 if (error)
1756 return error; 1756 return error;
1757 1757
@@ -1833,7 +1833,7 @@ xfs_iunlink_remove(
1833 * of dealing with the buffer when there is no need to 1833 * of dealing with the buffer when there is no need to
1834 * change it. 1834 * change it.
1835 */ 1835 */
1836 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1836 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1837 if (error) { 1837 if (error) {
1838 cmn_err(CE_WARN, 1838 cmn_err(CE_WARN,
1839 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1839 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -1895,7 +1895,7 @@ xfs_iunlink_remove(
1895 * Now last_ibp points to the buffer previous to us on 1895 * Now last_ibp points to the buffer previous to us on
1896 * the unlinked list. Pull us from the list. 1896 * the unlinked list. Pull us from the list.
1897 */ 1897 */
1898 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1898 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1899 if (error) { 1899 if (error) {
1900 cmn_err(CE_WARN, 1900 cmn_err(CE_WARN,
1901 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1901 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -1946,8 +1946,9 @@ xfs_ifree_cluster(
1946 xfs_inode_t *ip, **ip_found; 1946 xfs_inode_t *ip, **ip_found;
1947 xfs_inode_log_item_t *iip; 1947 xfs_inode_log_item_t *iip;
1948 xfs_log_item_t *lip; 1948 xfs_log_item_t *lip;
1949 xfs_perag_t *pag = xfs_get_perag(mp, inum); 1949 struct xfs_perag *pag;
1950 1950
1951 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
1951 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 1952 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
1952 blks_per_cluster = 1; 1953 blks_per_cluster = 1;
1953 ninodes = mp->m_sb.sb_inopblock; 1954 ninodes = mp->m_sb.sb_inopblock;
@@ -2039,7 +2040,7 @@ xfs_ifree_cluster(
2039 2040
2040 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2041 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
2041 mp->m_bsize * blks_per_cluster, 2042 mp->m_bsize * blks_per_cluster,
2042 XFS_BUF_LOCK); 2043 XBF_LOCK);
2043 2044
2044 pre_flushed = 0; 2045 pre_flushed = 0;
2045 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 2046 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
@@ -2088,7 +2089,7 @@ xfs_ifree_cluster(
2088 } 2089 }
2089 2090
2090 kmem_free(ip_found); 2091 kmem_free(ip_found);
2091 xfs_put_perag(mp, pag); 2092 xfs_perag_put(pag);
2092} 2093}
2093 2094
2094/* 2095/*
@@ -2150,7 +2151,7 @@ xfs_ifree(
2150 2151
2151 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2152 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2152 2153
2153 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 2154 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK);
2154 if (error) 2155 if (error)
2155 return error; 2156 return error;
2156 2157
@@ -2483,13 +2484,16 @@ __xfs_iunpin_wait(
2483 return; 2484 return;
2484 2485
2485 /* Give the log a push to start the unpinning I/O */ 2486 /* Give the log a push to start the unpinning I/O */
2486 xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? 2487 if (iip && iip->ili_last_lsn)
2487 iip->ili_last_lsn : 0, XFS_LOG_FORCE); 2488 xfs_log_force_lsn(ip->i_mount, iip->ili_last_lsn, 0);
2489 else
2490 xfs_log_force(ip->i_mount, 0);
2491
2488 if (wait) 2492 if (wait)
2489 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 2493 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
2490} 2494}
2491 2495
2492static inline void 2496void
2493xfs_iunpin_wait( 2497xfs_iunpin_wait(
2494 xfs_inode_t *ip) 2498 xfs_inode_t *ip)
2495{ 2499{
@@ -2675,7 +2679,7 @@ xfs_iflush_cluster(
2675 xfs_buf_t *bp) 2679 xfs_buf_t *bp)
2676{ 2680{
2677 xfs_mount_t *mp = ip->i_mount; 2681 xfs_mount_t *mp = ip->i_mount;
2678 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 2682 struct xfs_perag *pag;
2679 unsigned long first_index, mask; 2683 unsigned long first_index, mask;
2680 unsigned long inodes_per_cluster; 2684 unsigned long inodes_per_cluster;
2681 int ilist_size; 2685 int ilist_size;
@@ -2686,6 +2690,7 @@ xfs_iflush_cluster(
2686 int bufwasdelwri; 2690 int bufwasdelwri;
2687 int i; 2691 int i;
2688 2692
2693 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2689 ASSERT(pag->pagi_inodeok); 2694 ASSERT(pag->pagi_inodeok);
2690 ASSERT(pag->pag_ici_init); 2695 ASSERT(pag->pag_ici_init);
2691 2696
@@ -2693,7 +2698,7 @@ xfs_iflush_cluster(
2693 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 2698 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
2694 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2699 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
2695 if (!ilist) 2700 if (!ilist)
2696 return 0; 2701 goto out_put;
2697 2702
2698 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2703 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2699 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 2704 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
@@ -2762,6 +2767,8 @@ xfs_iflush_cluster(
2762out_free: 2767out_free:
2763 read_unlock(&pag->pag_ici_lock); 2768 read_unlock(&pag->pag_ici_lock);
2764 kmem_free(ilist); 2769 kmem_free(ilist);
2770out_put:
2771 xfs_perag_put(pag);
2765 return 0; 2772 return 0;
2766 2773
2767 2774
@@ -2805,6 +2812,7 @@ cluster_corrupt_out:
2805 */ 2812 */
2806 xfs_iflush_abort(iq); 2813 xfs_iflush_abort(iq);
2807 kmem_free(ilist); 2814 kmem_free(ilist);
2815 xfs_perag_put(pag);
2808 return XFS_ERROR(EFSCORRUPTED); 2816 return XFS_ERROR(EFSCORRUPTED);
2809} 2817}
2810 2818
@@ -2827,8 +2835,6 @@ xfs_iflush(
2827 xfs_dinode_t *dip; 2835 xfs_dinode_t *dip;
2828 xfs_mount_t *mp; 2836 xfs_mount_t *mp;
2829 int error; 2837 int error;
2830 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
2831 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
2832 2838
2833 XFS_STATS_INC(xs_iflush_count); 2839 XFS_STATS_INC(xs_iflush_count);
2834 2840
@@ -2841,15 +2847,6 @@ xfs_iflush(
2841 mp = ip->i_mount; 2847 mp = ip->i_mount;
2842 2848
2843 /* 2849 /*
2844 * If the inode isn't dirty, then just release the inode flush lock and
2845 * do nothing.
2846 */
2847 if (xfs_inode_clean(ip)) {
2848 xfs_ifunlock(ip);
2849 return 0;
2850 }
2851
2852 /*
2853 * We can't flush the inode until it is unpinned, so wait for it if we 2850 * We can't flush the inode until it is unpinned, so wait for it if we
2854 * are allowed to block. We know noone new can pin it, because we are 2851 * are allowed to block. We know noone new can pin it, because we are
2855 * holding the inode lock shared and you need to hold it exclusively to 2852 * holding the inode lock shared and you need to hold it exclusively to
@@ -2860,7 +2857,7 @@ xfs_iflush(
2860 * in the same cluster are dirty, they will probably write the inode 2857 * in the same cluster are dirty, they will probably write the inode
2861 * out for us if they occur after the log force completes. 2858 * out for us if they occur after the log force completes.
2862 */ 2859 */
2863 if (noblock && xfs_ipincount(ip)) { 2860 if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
2864 xfs_iunpin_nowait(ip); 2861 xfs_iunpin_nowait(ip);
2865 xfs_ifunlock(ip); 2862 xfs_ifunlock(ip);
2866 return EAGAIN; 2863 return EAGAIN;
@@ -2894,60 +2891,10 @@ xfs_iflush(
2894 } 2891 }
2895 2892
2896 /* 2893 /*
2897 * Decide how buffer will be flushed out. This is done before
2898 * the call to xfs_iflush_int because this field is zeroed by it.
2899 */
2900 if (iip != NULL && iip->ili_format.ilf_fields != 0) {
2901 /*
2902 * Flush out the inode buffer according to the directions
2903 * of the caller. In the cases where the caller has given
2904 * us a choice choose the non-delwri case. This is because
2905 * the inode is in the AIL and we need to get it out soon.
2906 */
2907 switch (flags) {
2908 case XFS_IFLUSH_SYNC:
2909 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
2910 flags = 0;
2911 break;
2912 case XFS_IFLUSH_ASYNC_NOBLOCK:
2913 case XFS_IFLUSH_ASYNC:
2914 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
2915 flags = INT_ASYNC;
2916 break;
2917 case XFS_IFLUSH_DELWRI:
2918 flags = INT_DELWRI;
2919 break;
2920 default:
2921 ASSERT(0);
2922 flags = 0;
2923 break;
2924 }
2925 } else {
2926 switch (flags) {
2927 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
2928 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
2929 case XFS_IFLUSH_DELWRI:
2930 flags = INT_DELWRI;
2931 break;
2932 case XFS_IFLUSH_ASYNC_NOBLOCK:
2933 case XFS_IFLUSH_ASYNC:
2934 flags = INT_ASYNC;
2935 break;
2936 case XFS_IFLUSH_SYNC:
2937 flags = 0;
2938 break;
2939 default:
2940 ASSERT(0);
2941 flags = 0;
2942 break;
2943 }
2944 }
2945
2946 /*
2947 * Get the buffer containing the on-disk inode. 2894 * Get the buffer containing the on-disk inode.
2948 */ 2895 */
2949 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 2896 error = xfs_itobp(mp, NULL, ip, &dip, &bp,
2950 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); 2897 (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK);
2951 if (error || !bp) { 2898 if (error || !bp) {
2952 xfs_ifunlock(ip); 2899 xfs_ifunlock(ip);
2953 return error; 2900 return error;
@@ -2965,7 +2912,7 @@ xfs_iflush(
2965 * get stuck waiting in the write for too long. 2912 * get stuck waiting in the write for too long.
2966 */ 2913 */
2967 if (XFS_BUF_ISPINNED(bp)) 2914 if (XFS_BUF_ISPINNED(bp))
2968 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 2915 xfs_log_force(mp, 0);
2969 2916
2970 /* 2917 /*
2971 * inode clustering: 2918 * inode clustering:
@@ -2975,13 +2922,10 @@ xfs_iflush(
2975 if (error) 2922 if (error)
2976 goto cluster_corrupt_out; 2923 goto cluster_corrupt_out;
2977 2924
2978 if (flags & INT_DELWRI) { 2925 if (flags & SYNC_WAIT)
2979 xfs_bdwrite(mp, bp);
2980 } else if (flags & INT_ASYNC) {
2981 error = xfs_bawrite(mp, bp);
2982 } else {
2983 error = xfs_bwrite(mp, bp); 2926 error = xfs_bwrite(mp, bp);
2984 } 2927 else
2928 xfs_bdwrite(mp, bp);
2985 return error; 2929 return error;
2986 2930
2987corrupt_out: 2931corrupt_out:
@@ -3016,16 +2960,6 @@ xfs_iflush_int(
3016 iip = ip->i_itemp; 2960 iip = ip->i_itemp;
3017 mp = ip->i_mount; 2961 mp = ip->i_mount;
3018 2962
3019
3020 /*
3021 * If the inode isn't dirty, then just release the inode
3022 * flush lock and do nothing.
3023 */
3024 if (xfs_inode_clean(ip)) {
3025 xfs_ifunlock(ip);
3026 return 0;
3027 }
3028
3029 /* set *dip = inode's place in the buffer */ 2963 /* set *dip = inode's place in the buffer */
3030 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2964 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
3031 2965
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index ec1f28c4fc4f..6c912b027596 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -420,16 +420,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
420#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) 420#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
421 421
422/* 422/*
423 * Flags for xfs_iflush()
424 */
425#define XFS_IFLUSH_DELWRI_ELSE_SYNC 1
426#define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2
427#define XFS_IFLUSH_SYNC 3
428#define XFS_IFLUSH_ASYNC 4
429#define XFS_IFLUSH_DELWRI 5
430#define XFS_IFLUSH_ASYNC_NOBLOCK 6
431
432/*
433 * Flags for xfs_itruncate_start(). 423 * Flags for xfs_itruncate_start().
434 */ 424 */
435#define XFS_ITRUNC_DEFINITE 0x1 425#define XFS_ITRUNC_DEFINITE 0x1
@@ -483,6 +473,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
483void xfs_iext_realloc(xfs_inode_t *, int, int); 473void xfs_iext_realloc(xfs_inode_t *, int, int);
484void xfs_ipin(xfs_inode_t *); 474void xfs_ipin(xfs_inode_t *);
485void xfs_iunpin(xfs_inode_t *); 475void xfs_iunpin(xfs_inode_t *);
476void xfs_iunpin_wait(xfs_inode_t *);
486int xfs_iflush(xfs_inode_t *, uint); 477int xfs_iflush(xfs_inode_t *, uint);
487void xfs_ichgtime(xfs_inode_t *, int); 478void xfs_ichgtime(xfs_inode_t *, int);
488void xfs_lock_inodes(xfs_inode_t **, int, uint); 479void xfs_lock_inodes(xfs_inode_t **, int, uint);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f38855d21ea5..d4dc063111f8 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -228,7 +228,7 @@ xfs_inode_item_format(
228 228
229 vecp->i_addr = (xfs_caddr_t)&iip->ili_format; 229 vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
230 vecp->i_len = sizeof(xfs_inode_log_format_t); 230 vecp->i_len = sizeof(xfs_inode_log_format_t);
231 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); 231 vecp->i_type = XLOG_REG_TYPE_IFORMAT;
232 vecp++; 232 vecp++;
233 nvecs = 1; 233 nvecs = 1;
234 234
@@ -279,7 +279,7 @@ xfs_inode_item_format(
279 279
280 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 280 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
281 vecp->i_len = sizeof(struct xfs_icdinode); 281 vecp->i_len = sizeof(struct xfs_icdinode);
282 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 282 vecp->i_type = XLOG_REG_TYPE_ICORE;
283 vecp++; 283 vecp++;
284 nvecs++; 284 nvecs++;
285 iip->ili_format.ilf_fields |= XFS_ILOG_CORE; 285 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
@@ -336,7 +336,7 @@ xfs_inode_item_format(
336 vecp->i_addr = 336 vecp->i_addr =
337 (char *)(ip->i_df.if_u1.if_extents); 337 (char *)(ip->i_df.if_u1.if_extents);
338 vecp->i_len = ip->i_df.if_bytes; 338 vecp->i_len = ip->i_df.if_bytes;
339 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 339 vecp->i_type = XLOG_REG_TYPE_IEXT;
340 } else 340 } else
341#endif 341#endif
342 { 342 {
@@ -355,7 +355,7 @@ xfs_inode_item_format(
355 vecp->i_addr = (xfs_caddr_t)ext_buffer; 355 vecp->i_addr = (xfs_caddr_t)ext_buffer;
356 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 356 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
357 XFS_DATA_FORK); 357 XFS_DATA_FORK);
358 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 358 vecp->i_type = XLOG_REG_TYPE_IEXT;
359 } 359 }
360 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 360 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
361 iip->ili_format.ilf_dsize = vecp->i_len; 361 iip->ili_format.ilf_dsize = vecp->i_len;
@@ -373,7 +373,7 @@ xfs_inode_item_format(
373 ASSERT(ip->i_df.if_broot != NULL); 373 ASSERT(ip->i_df.if_broot != NULL);
374 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; 374 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
375 vecp->i_len = ip->i_df.if_broot_bytes; 375 vecp->i_len = ip->i_df.if_broot_bytes;
376 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); 376 vecp->i_type = XLOG_REG_TYPE_IBROOT;
377 vecp++; 377 vecp++;
378 nvecs++; 378 nvecs++;
379 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 379 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
@@ -399,7 +399,7 @@ xfs_inode_item_format(
399 ASSERT((ip->i_df.if_real_bytes == 0) || 399 ASSERT((ip->i_df.if_real_bytes == 0) ||
400 (ip->i_df.if_real_bytes == data_bytes)); 400 (ip->i_df.if_real_bytes == data_bytes));
401 vecp->i_len = (int)data_bytes; 401 vecp->i_len = (int)data_bytes;
402 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); 402 vecp->i_type = XLOG_REG_TYPE_ILOCAL;
403 vecp++; 403 vecp++;
404 nvecs++; 404 nvecs++;
405 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 405 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
@@ -477,7 +477,7 @@ xfs_inode_item_format(
477 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 477 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
478 XFS_ATTR_FORK); 478 XFS_ATTR_FORK);
479#endif 479#endif
480 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); 480 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
481 iip->ili_format.ilf_asize = vecp->i_len; 481 iip->ili_format.ilf_asize = vecp->i_len;
482 vecp++; 482 vecp++;
483 nvecs++; 483 nvecs++;
@@ -492,7 +492,7 @@ xfs_inode_item_format(
492 ASSERT(ip->i_afp->if_broot != NULL); 492 ASSERT(ip->i_afp->if_broot != NULL);
493 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; 493 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
494 vecp->i_len = ip->i_afp->if_broot_bytes; 494 vecp->i_len = ip->i_afp->if_broot_bytes;
495 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); 495 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
496 vecp++; 496 vecp++;
497 nvecs++; 497 nvecs++;
498 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 498 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
@@ -516,7 +516,7 @@ xfs_inode_item_format(
516 ASSERT((ip->i_afp->if_real_bytes == 0) || 516 ASSERT((ip->i_afp->if_real_bytes == 0) ||
517 (ip->i_afp->if_real_bytes == data_bytes)); 517 (ip->i_afp->if_real_bytes == data_bytes));
518 vecp->i_len = (int)data_bytes; 518 vecp->i_len = (int)data_bytes;
519 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); 519 vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL;
520 vecp++; 520 vecp++;
521 nvecs++; 521 nvecs++;
522 iip->ili_format.ilf_asize = (unsigned)data_bytes; 522 iip->ili_format.ilf_asize = (unsigned)data_bytes;
@@ -602,33 +602,20 @@ xfs_inode_item_trylock(
602 602
603 if (!xfs_iflock_nowait(ip)) { 603 if (!xfs_iflock_nowait(ip)) {
604 /* 604 /*
605 * If someone else isn't already trying to push the inode 605 * inode has already been flushed to the backing buffer,
606 * buffer, we get to do it. 606 * leave it locked in shared mode, pushbuf routine will
607 * unlock it.
607 */ 608 */
608 if (iip->ili_pushbuf_flag == 0) { 609 return XFS_ITEM_PUSHBUF;
609 iip->ili_pushbuf_flag = 1;
610#ifdef DEBUG
611 iip->ili_push_owner = current_pid();
612#endif
613 /*
614 * Inode is left locked in shared mode.
615 * Pushbuf routine gets to unlock it.
616 */
617 return XFS_ITEM_PUSHBUF;
618 } else {
619 /*
620 * We hold the AIL lock, so we must specify the
621 * NONOTIFY flag so that we won't double trip.
622 */
623 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
624 return XFS_ITEM_FLUSHING;
625 }
626 /* NOTREACHED */
627 } 610 }
628 611
629 /* Stale items should force out the iclog */ 612 /* Stale items should force out the iclog */
630 if (ip->i_flags & XFS_ISTALE) { 613 if (ip->i_flags & XFS_ISTALE) {
631 xfs_ifunlock(ip); 614 xfs_ifunlock(ip);
615 /*
616 * we hold the AIL lock - notify the unlock routine of this
617 * so it doesn't try to get the lock again.
618 */
632 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); 619 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
633 return XFS_ITEM_PINNED; 620 return XFS_ITEM_PINNED;
634 } 621 }
@@ -746,11 +733,8 @@ xfs_inode_item_committed(
746 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK 733 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
747 * failed to get the inode flush lock but did get the inode locked SHARED. 734 * failed to get the inode flush lock but did get the inode locked SHARED.
748 * Here we're trying to see if the inode buffer is incore, and if so whether it's 735 * Here we're trying to see if the inode buffer is incore, and if so whether it's
749 * marked delayed write. If that's the case, we'll initiate a bawrite on that 736 * marked delayed write. If that's the case, we'll promote it and that will
750 * buffer to expedite the process. 737 * allow the caller to write the buffer by triggering the xfsbufd to run.
751 *
752 * We aren't holding the AIL lock (or the flush lock) when this gets called,
753 * so it is inherently race-y.
754 */ 738 */
755STATIC void 739STATIC void
756xfs_inode_item_pushbuf( 740xfs_inode_item_pushbuf(
@@ -759,82 +743,30 @@ xfs_inode_item_pushbuf(
759 xfs_inode_t *ip; 743 xfs_inode_t *ip;
760 xfs_mount_t *mp; 744 xfs_mount_t *mp;
761 xfs_buf_t *bp; 745 xfs_buf_t *bp;
762 uint dopush;
763 746
764 ip = iip->ili_inode; 747 ip = iip->ili_inode;
765
766 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 748 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
767 749
768 /* 750 /*
769 * The ili_pushbuf_flag keeps others from
770 * trying to duplicate our effort.
771 */
772 ASSERT(iip->ili_pushbuf_flag != 0);
773 ASSERT(iip->ili_push_owner == current_pid());
774
775 /*
776 * If a flush is not in progress anymore, chances are that the 751 * If a flush is not in progress anymore, chances are that the
777 * inode was taken off the AIL. So, just get out. 752 * inode was taken off the AIL. So, just get out.
778 */ 753 */
779 if (completion_done(&ip->i_flush) || 754 if (completion_done(&ip->i_flush) ||
780 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 755 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
781 iip->ili_pushbuf_flag = 0;
782 xfs_iunlock(ip, XFS_ILOCK_SHARED); 756 xfs_iunlock(ip, XFS_ILOCK_SHARED);
783 return; 757 return;
784 } 758 }
785 759
786 mp = ip->i_mount; 760 mp = ip->i_mount;
787 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, 761 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno,
788 iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); 762 iip->ili_format.ilf_len, XBF_TRYLOCK);
789 763
790 if (bp != NULL) {
791 if (XFS_BUF_ISDELAYWRITE(bp)) {
792 /*
793 * We were racing with iflush because we don't hold
794 * the AIL lock or the flush lock. However, at this point,
795 * we have the buffer, and we know that it's dirty.
796 * So, it's possible that iflush raced with us, and
797 * this item is already taken off the AIL.
798 * If not, we can flush it async.
799 */
800 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
801 !completion_done(&ip->i_flush));
802 iip->ili_pushbuf_flag = 0;
803 xfs_iunlock(ip, XFS_ILOCK_SHARED);
804
805 trace_xfs_inode_item_push(bp, _RET_IP_);
806
807 if (XFS_BUF_ISPINNED(bp)) {
808 xfs_log_force(mp, (xfs_lsn_t)0,
809 XFS_LOG_FORCE);
810 }
811 if (dopush) {
812 int error;
813 error = xfs_bawrite(mp, bp);
814 if (error)
815 xfs_fs_cmn_err(CE_WARN, mp,
816 "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p",
817 error, iip, bp);
818 } else {
819 xfs_buf_relse(bp);
820 }
821 } else {
822 iip->ili_pushbuf_flag = 0;
823 xfs_iunlock(ip, XFS_ILOCK_SHARED);
824 xfs_buf_relse(bp);
825 }
826 return;
827 }
828 /*
829 * We have to be careful about resetting pushbuf flag too early (above).
830 * Even though in theory we can do it as soon as we have the buflock,
831 * we don't want others to be doing work needlessly. They'll come to
832 * this function thinking that pushing the buffer is their
833 * responsibility only to find that the buffer is still locked by
834 * another doing the same thing
835 */
836 iip->ili_pushbuf_flag = 0;
837 xfs_iunlock(ip, XFS_ILOCK_SHARED); 764 xfs_iunlock(ip, XFS_ILOCK_SHARED);
765 if (!bp)
766 return;
767 if (XFS_BUF_ISDELAYWRITE(bp))
768 xfs_buf_delwri_promote(bp);
769 xfs_buf_relse(bp);
838 return; 770 return;
839} 771}
840 772
@@ -867,10 +799,14 @@ xfs_inode_item_push(
867 iip->ili_format.ilf_fields != 0); 799 iip->ili_format.ilf_fields != 0);
868 800
869 /* 801 /*
870 * Write out the inode. The completion routine ('iflush_done') will 802 * Push the inode to it's backing buffer. This will not remove the
871 * pull it from the AIL, mark it clean, unlock the flush lock. 803 * inode from the AIL - a further push will be required to trigger a
804 * buffer push. However, this allows all the dirty inodes to be pushed
805 * to the buffer before it is pushed to disk. THe buffer IO completion
806 * will pull th einode from the AIL, mark it clean and unlock the flush
807 * lock.
872 */ 808 */
873 (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); 809 (void) xfs_iflush(ip, 0);
874 xfs_iunlock(ip, XFS_ILOCK_SHARED); 810 xfs_iunlock(ip, XFS_ILOCK_SHARED);
875 811
876 return; 812 return;
@@ -934,7 +870,6 @@ xfs_inode_item_init(
934 /* 870 /*
935 We have zeroed memory. No need ... 871 We have zeroed memory. No need ...
936 iip->ili_extents_buf = NULL; 872 iip->ili_extents_buf = NULL;
937 iip->ili_pushbuf_flag = 0;
938 */ 873 */
939 874
940 iip->ili_format.ilf_type = XFS_LI_INODE; 875 iip->ili_format.ilf_type = XFS_LI_INODE;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index cc8df1ac7783..9a467958ecdd 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -144,12 +144,6 @@ typedef struct xfs_inode_log_item {
144 data exts */ 144 data exts */
145 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged 145 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
146 attr exts */ 146 attr exts */
147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */
148
149#ifdef DEBUG
150 uint64_t ili_push_owner; /* one who sets pushbuf_flag
151 above gets to push the buf */
152#endif
153#ifdef XFS_TRANS_DEBUG 147#ifdef XFS_TRANS_DEBUG
154 int ili_root_size; 148 int ili_root_size;
155 char *ili_orig_root; 149 char *ili_orig_root;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 62efab2f3839..3af02314c605 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -408,8 +408,10 @@ xfs_bulkstat(
408 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); 408 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
409 nimask = ~(nicluster - 1); 409 nimask = ~(nicluster - 1);
410 nbcluster = nicluster >> mp->m_sb.sb_inopblog; 410 nbcluster = nicluster >> mp->m_sb.sb_inopblog;
411 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, 411 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
412 KM_SLEEP | KM_MAYFAIL | KM_LARGE); 412 if (!irbuf)
413 return ENOMEM;
414
413 nirbuf = irbsize / sizeof(*irbuf); 415 nirbuf = irbsize / sizeof(*irbuf);
414 416
415 /* 417 /*
@@ -420,9 +422,7 @@ xfs_bulkstat(
420 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { 422 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
421 cond_resched(); 423 cond_resched();
422 bp = NULL; 424 bp = NULL;
423 down_read(&mp->m_peraglock);
424 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 425 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
425 up_read(&mp->m_peraglock);
426 if (error) { 426 if (error) {
427 /* 427 /*
428 * Skip this allocation group and go to the next one. 428 * Skip this allocation group and go to the next one.
@@ -729,7 +729,7 @@ xfs_bulkstat(
729 /* 729 /*
730 * Done, we're either out of filesystem or space to put the data. 730 * Done, we're either out of filesystem or space to put the data.
731 */ 731 */
732 kmem_free(irbuf); 732 kmem_free_large(irbuf);
733 *ubcountp = ubelem; 733 *ubcountp = ubelem;
734 /* 734 /*
735 * Found some inodes, return them now and return the error next time. 735 * Found some inodes, return them now and return the error next time.
@@ -849,9 +849,7 @@ xfs_inumbers(
849 agbp = NULL; 849 agbp = NULL;
850 while (left > 0 && agno < mp->m_sb.sb_agcount) { 850 while (left > 0 && agno < mp->m_sb.sb_agcount) {
851 if (agbp == NULL) { 851 if (agbp == NULL) {
852 down_read(&mp->m_peraglock);
853 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 852 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
854 up_read(&mp->m_peraglock);
855 if (error) { 853 if (error) {
856 /* 854 /*
857 * If we can't read the AGI of this ag, 855 * If we can't read the AGI of this ag,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 600b5b06aaeb..4f16be4b6ee5 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -50,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone;
50 (off) += (bytes);} 50 (off) += (bytes);}
51 51
52/* Local miscellaneous function prototypes */ 52/* Local miscellaneous function prototypes */
53STATIC int xlog_bdstrat_cb(struct xfs_buf *);
54STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, 53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
55 xlog_in_core_t **, xfs_lsn_t *); 54 xlog_in_core_t **, xfs_lsn_t *);
56STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, 55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
@@ -80,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log,
80STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
81 xlog_in_core_t *iclog, 80 xlog_in_core_t *iclog,
82 int eventual_size); 81 int eventual_size);
83STATIC int xlog_state_sync(xlog_t *log,
84 xfs_lsn_t lsn,
85 uint flags,
86 int *log_flushed);
87STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
88STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 82STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
89 83
90/* local functions to manipulate grant head */ 84/* local functions to manipulate grant head */
@@ -297,65 +291,6 @@ xfs_log_done(xfs_mount_t *mp,
297 return lsn; 291 return lsn;
298} /* xfs_log_done */ 292} /* xfs_log_done */
299 293
300
301/*
302 * Force the in-core log to disk. If flags == XFS_LOG_SYNC,
303 * the force is done synchronously.
304 *
305 * Asynchronous forces are implemented by setting the WANT_SYNC
306 * bit in the appropriate in-core log and then returning.
307 *
308 * Synchronous forces are implemented with a signal variable. All callers
309 * to force a given lsn to disk will wait on a the sv attached to the
310 * specific in-core log. When given in-core log finally completes its
311 * write to disk, that thread will wake up all threads waiting on the
312 * sv.
313 */
314int
315_xfs_log_force(
316 xfs_mount_t *mp,
317 xfs_lsn_t lsn,
318 uint flags,
319 int *log_flushed)
320{
321 xlog_t *log = mp->m_log;
322 int dummy;
323
324 if (!log_flushed)
325 log_flushed = &dummy;
326
327 ASSERT(flags & XFS_LOG_FORCE);
328
329 XFS_STATS_INC(xs_log_force);
330
331 if (log->l_flags & XLOG_IO_ERROR)
332 return XFS_ERROR(EIO);
333 if (lsn == 0)
334 return xlog_state_sync_all(log, flags, log_flushed);
335 else
336 return xlog_state_sync(log, lsn, flags, log_flushed);
337} /* _xfs_log_force */
338
339/*
340 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
341 * about errors or whether the log was flushed or not. This is the normal
342 * interface to use when trying to unpin items or move the log forward.
343 */
344void
345xfs_log_force(
346 xfs_mount_t *mp,
347 xfs_lsn_t lsn,
348 uint flags)
349{
350 int error;
351 error = _xfs_log_force(mp, lsn, flags, NULL);
352 if (error) {
353 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
354 "error %d returned.", error);
355 }
356}
357
358
359/* 294/*
360 * Attaches a new iclog I/O completion callback routine during 295 * Attaches a new iclog I/O completion callback routine during
361 * transaction commit. If the log is in error state, a non-zero 296 * transaction commit. If the log is in error state, a non-zero
@@ -602,7 +537,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
602 if (mp->m_flags & XFS_MOUNT_RDONLY) 537 if (mp->m_flags & XFS_MOUNT_RDONLY)
603 return 0; 538 return 0;
604 539
605 error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); 540 error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
606 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); 541 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
607 542
608#ifdef DEBUG 543#ifdef DEBUG
@@ -618,7 +553,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
618 if (! (XLOG_FORCED_SHUTDOWN(log))) { 553 if (! (XLOG_FORCED_SHUTDOWN(log))) {
619 reg[0].i_addr = (void*)&magic; 554 reg[0].i_addr = (void*)&magic;
620 reg[0].i_len = sizeof(magic); 555 reg[0].i_len = sizeof(magic);
621 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT); 556 reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
622 557
623 error = xfs_log_reserve(mp, 600, 1, &tic, 558 error = xfs_log_reserve(mp, 600, 1, &tic,
624 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); 559 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
@@ -988,35 +923,6 @@ xlog_iodone(xfs_buf_t *bp)
988} /* xlog_iodone */ 923} /* xlog_iodone */
989 924
990/* 925/*
991 * The bdstrat callback function for log bufs. This gives us a central
992 * place to trap bufs in case we get hit by a log I/O error and need to
993 * shutdown. Actually, in practice, even when we didn't get a log error,
994 * we transition the iclogs to IOERROR state *after* flushing all existing
995 * iclogs to disk. This is because we don't want anymore new transactions to be
996 * started or completed afterwards.
997 */
998STATIC int
999xlog_bdstrat_cb(struct xfs_buf *bp)
1000{
1001 xlog_in_core_t *iclog;
1002
1003 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1004
1005 if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) {
1006 /* note for irix bstrat will need struct bdevsw passed
1007 * Fix the following macro if the code ever is merged
1008 */
1009 XFS_bdstrat(bp);
1010 return 0;
1011 }
1012
1013 XFS_BUF_ERROR(bp, EIO);
1014 XFS_BUF_STALE(bp);
1015 xfs_biodone(bp);
1016 return XFS_ERROR(EIO);
1017}
1018
1019/*
1020 * Return size of each in-core log record buffer. 926 * Return size of each in-core log record buffer.
1021 * 927 *
1022 * All machines get 8 x 32kB buffers by default, unless tuned otherwise. 928 * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
@@ -1158,7 +1064,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1158 if (!bp) 1064 if (!bp)
1159 goto out_free_log; 1065 goto out_free_log;
1160 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1066 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1161 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1162 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1067 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1163 ASSERT(XFS_BUF_ISBUSY(bp)); 1068 ASSERT(XFS_BUF_ISBUSY(bp));
1164 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 1069 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
@@ -1196,7 +1101,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1196 if (!XFS_BUF_CPSEMA(bp)) 1101 if (!XFS_BUF_CPSEMA(bp))
1197 ASSERT(0); 1102 ASSERT(0);
1198 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1103 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1199 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1200 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1104 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1201 iclog->ic_bp = bp; 1105 iclog->ic_bp = bp;
1202 iclog->ic_data = bp->b_addr; 1106 iclog->ic_data = bp->b_addr;
@@ -1268,7 +1172,7 @@ xlog_commit_record(xfs_mount_t *mp,
1268 1172
1269 reg[0].i_addr = NULL; 1173 reg[0].i_addr = NULL;
1270 reg[0].i_len = 0; 1174 reg[0].i_len = 0;
1271 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT); 1175 reg[0].i_type = XLOG_REG_TYPE_COMMIT;
1272 1176
1273 ASSERT_ALWAYS(iclog); 1177 ASSERT_ALWAYS(iclog);
1274 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1178 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1343,6 +1247,37 @@ xlog_grant_push_ail(xfs_mount_t *mp,
1343 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1247 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1344} /* xlog_grant_push_ail */ 1248} /* xlog_grant_push_ail */
1345 1249
1250/*
1251 * The bdstrat callback function for log bufs. This gives us a central
1252 * place to trap bufs in case we get hit by a log I/O error and need to
1253 * shutdown. Actually, in practice, even when we didn't get a log error,
1254 * we transition the iclogs to IOERROR state *after* flushing all existing
1255 * iclogs to disk. This is because we don't want anymore new transactions to be
1256 * started or completed afterwards.
1257 */
1258STATIC int
1259xlog_bdstrat(
1260 struct xfs_buf *bp)
1261{
1262 struct xlog_in_core *iclog;
1263
1264 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1265 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1266 XFS_BUF_ERROR(bp, EIO);
1267 XFS_BUF_STALE(bp);
1268 xfs_biodone(bp);
1269 /*
1270 * It would seem logical to return EIO here, but we rely on
1271 * the log state machine to propagate I/O errors instead of
1272 * doing it here.
1273 */
1274 return 0;
1275 }
1276
1277 bp->b_flags |= _XBF_RUN_QUEUES;
1278 xfs_buf_iorequest(bp);
1279 return 0;
1280}
1346 1281
1347/* 1282/*
1348 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous 1283 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous
@@ -1462,7 +1397,7 @@ xlog_sync(xlog_t *log,
1462 */ 1397 */
1463 XFS_BUF_WRITE(bp); 1398 XFS_BUF_WRITE(bp);
1464 1399
1465 if ((error = XFS_bwrite(bp))) { 1400 if ((error = xlog_bdstrat(bp))) {
1466 xfs_ioerror_alert("xlog_sync", log->l_mp, bp, 1401 xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
1467 XFS_BUF_ADDR(bp)); 1402 XFS_BUF_ADDR(bp));
1468 return error; 1403 return error;
@@ -1502,7 +1437,7 @@ xlog_sync(xlog_t *log,
1502 /* account for internal log which doesn't start at block #0 */ 1437 /* account for internal log which doesn't start at block #0 */
1503 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1438 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1504 XFS_BUF_WRITE(bp); 1439 XFS_BUF_WRITE(bp);
1505 if ((error = XFS_bwrite(bp))) { 1440 if ((error = xlog_bdstrat(bp))) {
1506 xfs_ioerror_alert("xlog_sync (split)", log->l_mp, 1441 xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
1507 bp, XFS_BUF_ADDR(bp)); 1442 bp, XFS_BUF_ADDR(bp));
1508 return error; 1443 return error;
@@ -2854,7 +2789,6 @@ xlog_state_switch_iclogs(xlog_t *log,
2854 log->l_iclog = iclog->ic_next; 2789 log->l_iclog = iclog->ic_next;
2855} /* xlog_state_switch_iclogs */ 2790} /* xlog_state_switch_iclogs */
2856 2791
2857
2858/* 2792/*
2859 * Write out all data in the in-core log as of this exact moment in time. 2793 * Write out all data in the in-core log as of this exact moment in time.
2860 * 2794 *
@@ -2882,11 +2816,17 @@ xlog_state_switch_iclogs(xlog_t *log,
2882 * b) when we return from flushing out this iclog, it is still 2816 * b) when we return from flushing out this iclog, it is still
2883 * not in the active nor dirty state. 2817 * not in the active nor dirty state.
2884 */ 2818 */
2885STATIC int 2819int
2886xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) 2820_xfs_log_force(
2821 struct xfs_mount *mp,
2822 uint flags,
2823 int *log_flushed)
2887{ 2824{
2888 xlog_in_core_t *iclog; 2825 struct log *log = mp->m_log;
2889 xfs_lsn_t lsn; 2826 struct xlog_in_core *iclog;
2827 xfs_lsn_t lsn;
2828
2829 XFS_STATS_INC(xs_log_force);
2890 2830
2891 spin_lock(&log->l_icloglock); 2831 spin_lock(&log->l_icloglock);
2892 2832
@@ -2932,7 +2872,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2932 2872
2933 if (xlog_state_release_iclog(log, iclog)) 2873 if (xlog_state_release_iclog(log, iclog))
2934 return XFS_ERROR(EIO); 2874 return XFS_ERROR(EIO);
2935 *log_flushed = 1; 2875
2876 if (log_flushed)
2877 *log_flushed = 1;
2936 spin_lock(&log->l_icloglock); 2878 spin_lock(&log->l_icloglock);
2937 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && 2879 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
2938 iclog->ic_state != XLOG_STATE_DIRTY) 2880 iclog->ic_state != XLOG_STATE_DIRTY)
@@ -2976,19 +2918,37 @@ maybe_sleep:
2976 */ 2918 */
2977 if (iclog->ic_state & XLOG_STATE_IOERROR) 2919 if (iclog->ic_state & XLOG_STATE_IOERROR)
2978 return XFS_ERROR(EIO); 2920 return XFS_ERROR(EIO);
2979 *log_flushed = 1; 2921 if (log_flushed)
2980 2922 *log_flushed = 1;
2981 } else { 2923 } else {
2982 2924
2983no_sleep: 2925no_sleep:
2984 spin_unlock(&log->l_icloglock); 2926 spin_unlock(&log->l_icloglock);
2985 } 2927 }
2986 return 0; 2928 return 0;
2987} /* xlog_state_sync_all */ 2929}
2988 2930
2931/*
2932 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
2933 * about errors or whether the log was flushed or not. This is the normal
2934 * interface to use when trying to unpin items or move the log forward.
2935 */
2936void
2937xfs_log_force(
2938 xfs_mount_t *mp,
2939 uint flags)
2940{
2941 int error;
2942
2943 error = _xfs_log_force(mp, flags, NULL);
2944 if (error) {
2945 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
2946 "error %d returned.", error);
2947 }
2948}
2989 2949
2990/* 2950/*
2991 * Used by code which implements synchronous log forces. 2951 * Force the in-core log to disk for a specific LSN.
2992 * 2952 *
2993 * Find in-core log with lsn. 2953 * Find in-core log with lsn.
2994 * If it is in the DIRTY state, just return. 2954 * If it is in the DIRTY state, just return.
@@ -2996,109 +2956,142 @@ no_sleep:
2996 * state and go to sleep or return. 2956 * state and go to sleep or return.
2997 * If it is in any other state, go to sleep or return. 2957 * If it is in any other state, go to sleep or return.
2998 * 2958 *
2999 * If filesystem activity goes to zero, the iclog will get flushed only by 2959 * Synchronous forces are implemented with a signal variable. All callers
3000 * bdflush(). 2960 * to force a given lsn to disk will wait on a the sv attached to the
2961 * specific in-core log. When given in-core log finally completes its
2962 * write to disk, that thread will wake up all threads waiting on the
2963 * sv.
3001 */ 2964 */
3002STATIC int 2965int
3003xlog_state_sync(xlog_t *log, 2966_xfs_log_force_lsn(
3004 xfs_lsn_t lsn, 2967 struct xfs_mount *mp,
3005 uint flags, 2968 xfs_lsn_t lsn,
3006 int *log_flushed) 2969 uint flags,
2970 int *log_flushed)
3007{ 2971{
3008 xlog_in_core_t *iclog; 2972 struct log *log = mp->m_log;
3009 int already_slept = 0; 2973 struct xlog_in_core *iclog;
2974 int already_slept = 0;
3010 2975
3011try_again: 2976 ASSERT(lsn != 0);
3012 spin_lock(&log->l_icloglock);
3013 iclog = log->l_iclog;
3014 2977
3015 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2978 XFS_STATS_INC(xs_log_force);
3016 spin_unlock(&log->l_icloglock);
3017 return XFS_ERROR(EIO);
3018 }
3019
3020 do {
3021 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3022 iclog = iclog->ic_next;
3023 continue;
3024 }
3025 2979
3026 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2980try_again:
2981 spin_lock(&log->l_icloglock);
2982 iclog = log->l_iclog;
2983 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3027 spin_unlock(&log->l_icloglock); 2984 spin_unlock(&log->l_icloglock);
3028 return 0; 2985 return XFS_ERROR(EIO);
3029 } 2986 }
3030 2987
3031 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 2988 do {
3032 /* 2989 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3033 * We sleep here if we haven't already slept (e.g. 2990 iclog = iclog->ic_next;
3034 * this is the first time we've looked at the correct 2991 continue;
3035 * iclog buf) and the buffer before us is going to 2992 }
3036 * be sync'ed. The reason for this is that if we 2993
3037 * are doing sync transactions here, by waiting for 2994 if (iclog->ic_state == XLOG_STATE_DIRTY) {
3038 * the previous I/O to complete, we can allow a few 2995 spin_unlock(&log->l_icloglock);
3039 * more transactions into this iclog before we close 2996 return 0;
3040 * it down. 2997 }
3041 * 2998
3042 * Otherwise, we mark the buffer WANT_SYNC, and bump 2999 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3043 * up the refcnt so we can release the log (which drops 3000 /*
3044 * the ref count). The state switch keeps new transaction 3001 * We sleep here if we haven't already slept (e.g.
3045 * commits from using this buffer. When the current commits 3002 * this is the first time we've looked at the correct
3046 * finish writing into the buffer, the refcount will drop to 3003 * iclog buf) and the buffer before us is going to
3047 * zero and the buffer will go out then. 3004 * be sync'ed. The reason for this is that if we
3048 */ 3005 * are doing sync transactions here, by waiting for
3049 if (!already_slept && 3006 * the previous I/O to complete, we can allow a few
3050 (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | 3007 * more transactions into this iclog before we close
3051 XLOG_STATE_SYNCING))) { 3008 * it down.
3052 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3009 *
3053 XFS_STATS_INC(xs_log_force_sleep); 3010 * Otherwise, we mark the buffer WANT_SYNC, and bump
3054 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, 3011 * up the refcnt so we can release the log (which
3055 &log->l_icloglock, s); 3012 * drops the ref count). The state switch keeps new
3056 *log_flushed = 1; 3013 * transaction commits from using this buffer. When
3057 already_slept = 1; 3014 * the current commits finish writing into the buffer,
3058 goto try_again; 3015 * the refcount will drop to zero and the buffer will
3059 } else { 3016 * go out then.
3017 */
3018 if (!already_slept &&
3019 (iclog->ic_prev->ic_state &
3020 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3021 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3022
3023 XFS_STATS_INC(xs_log_force_sleep);
3024
3025 sv_wait(&iclog->ic_prev->ic_write_wait,
3026 PSWP, &log->l_icloglock, s);
3027 if (log_flushed)
3028 *log_flushed = 1;
3029 already_slept = 1;
3030 goto try_again;
3031 }
3060 atomic_inc(&iclog->ic_refcnt); 3032 atomic_inc(&iclog->ic_refcnt);
3061 xlog_state_switch_iclogs(log, iclog, 0); 3033 xlog_state_switch_iclogs(log, iclog, 0);
3062 spin_unlock(&log->l_icloglock); 3034 spin_unlock(&log->l_icloglock);
3063 if (xlog_state_release_iclog(log, iclog)) 3035 if (xlog_state_release_iclog(log, iclog))
3064 return XFS_ERROR(EIO); 3036 return XFS_ERROR(EIO);
3065 *log_flushed = 1; 3037 if (log_flushed)
3038 *log_flushed = 1;
3066 spin_lock(&log->l_icloglock); 3039 spin_lock(&log->l_icloglock);
3067 } 3040 }
3068 }
3069 3041
3070 if ((flags & XFS_LOG_SYNC) && /* sleep */ 3042 if ((flags & XFS_LOG_SYNC) && /* sleep */
3071 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3043 !(iclog->ic_state &
3044 (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3045 /*
3046 * Don't wait on completion if we know that we've
3047 * gotten a log write error.
3048 */
3049 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3050 spin_unlock(&log->l_icloglock);
3051 return XFS_ERROR(EIO);
3052 }
3053 XFS_STATS_INC(xs_log_force_sleep);
3054 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3055 /*
3056 * No need to grab the log lock here since we're
3057 * only deciding whether or not to return EIO
3058 * and the memory read should be atomic.
3059 */
3060 if (iclog->ic_state & XLOG_STATE_IOERROR)
3061 return XFS_ERROR(EIO);
3072 3062
3073 /* 3063 if (log_flushed)
3074 * Don't wait on completion if we know that we've 3064 *log_flushed = 1;
3075 * gotten a log write error. 3065 } else { /* just return */
3076 */
3077 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3078 spin_unlock(&log->l_icloglock); 3066 spin_unlock(&log->l_icloglock);
3079 return XFS_ERROR(EIO);
3080 } 3067 }
3081 XFS_STATS_INC(xs_log_force_sleep);
3082 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3083 /*
3084 * No need to grab the log lock here since we're
3085 * only deciding whether or not to return EIO
3086 * and the memory read should be atomic.
3087 */
3088 if (iclog->ic_state & XLOG_STATE_IOERROR)
3089 return XFS_ERROR(EIO);
3090 *log_flushed = 1;
3091 } else { /* just return */
3092 spin_unlock(&log->l_icloglock);
3093 }
3094 return 0;
3095 3068
3096 } while (iclog != log->l_iclog); 3069 return 0;
3070 } while (iclog != log->l_iclog);
3097 3071
3098 spin_unlock(&log->l_icloglock); 3072 spin_unlock(&log->l_icloglock);
3099 return 0; 3073 return 0;
3100} /* xlog_state_sync */ 3074}
3101 3075
3076/*
3077 * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
3078 * about errors or whether the log was flushed or not. This is the normal
3079 * interface to use when trying to unpin items or move the log forward.
3080 */
3081void
3082xfs_log_force_lsn(
3083 xfs_mount_t *mp,
3084 xfs_lsn_t lsn,
3085 uint flags)
3086{
3087 int error;
3088
3089 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3090 if (error) {
3091 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
3092 "error %d returned.", error);
3093 }
3094}
3102 3095
3103/* 3096/*
3104 * Called when we want to mark the current iclog as being ready to sync to 3097 * Called when we want to mark the current iclog as being ready to sync to
@@ -3463,7 +3456,6 @@ xfs_log_force_umount(
3463 xlog_ticket_t *tic; 3456 xlog_ticket_t *tic;
3464 xlog_t *log; 3457 xlog_t *log;
3465 int retval; 3458 int retval;
3466 int dummy;
3467 3459
3468 log = mp->m_log; 3460 log = mp->m_log;
3469 3461
@@ -3537,13 +3529,14 @@ xfs_log_force_umount(
3537 } 3529 }
3538 spin_unlock(&log->l_grant_lock); 3530 spin_unlock(&log->l_grant_lock);
3539 3531
3540 if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3532 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3541 ASSERT(!logerror); 3533 ASSERT(!logerror);
3542 /* 3534 /*
3543 * Force the incore logs to disk before shutting the 3535 * Force the incore logs to disk before shutting the
3544 * log down completely. 3536 * log down completely.
3545 */ 3537 */
3546 xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); 3538 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3539
3547 spin_lock(&log->l_icloglock); 3540 spin_lock(&log->l_icloglock);
3548 retval = xlog_state_ioerror(log); 3541 retval = xlog_state_ioerror(log);
3549 spin_unlock(&log->l_icloglock); 3542 spin_unlock(&log->l_icloglock);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d0c9baa50b1a..7074be9d13e9 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -70,14 +70,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
70 * Flags to xfs_log_force() 70 * Flags to xfs_log_force()
71 * 71 *
72 * XFS_LOG_SYNC: Synchronous force in-core log to disk 72 * XFS_LOG_SYNC: Synchronous force in-core log to disk
73 * XFS_LOG_FORCE: Start in-core log write now.
74 * XFS_LOG_URGE: Start write within some window of time.
75 *
76 * Note: Either XFS_LOG_FORCE or XFS_LOG_URGE must be set.
77 */ 73 */
78#define XFS_LOG_SYNC 0x1 74#define XFS_LOG_SYNC 0x1
79#define XFS_LOG_FORCE 0x2
80#define XFS_LOG_URGE 0x4
81 75
82#endif /* __KERNEL__ */ 76#endif /* __KERNEL__ */
83 77
@@ -110,10 +104,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
110#define XLOG_REG_TYPE_TRANSHDR 19 104#define XLOG_REG_TYPE_TRANSHDR 19
111#define XLOG_REG_TYPE_MAX 19 105#define XLOG_REG_TYPE_MAX 19
112 106
113#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
114
115typedef struct xfs_log_iovec { 107typedef struct xfs_log_iovec {
116 xfs_caddr_t i_addr; /* beginning address of region */ 108 xfs_caddr_t i_addr; /* beginning address of region */
117 int i_len; /* length in bytes of region */ 109 int i_len; /* length in bytes of region */
118 uint i_type; /* type of region */ 110 uint i_type; /* type of region */
119} xfs_log_iovec_t; 111} xfs_log_iovec_t;
@@ -140,12 +132,17 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
140 void **iclog, 132 void **iclog,
141 uint flags); 133 uint flags);
142int _xfs_log_force(struct xfs_mount *mp, 134int _xfs_log_force(struct xfs_mount *mp,
143 xfs_lsn_t lsn,
144 uint flags, 135 uint flags,
145 int *log_forced); 136 int *log_forced);
146void xfs_log_force(struct xfs_mount *mp, 137void xfs_log_force(struct xfs_mount *mp,
147 xfs_lsn_t lsn,
148 uint flags); 138 uint flags);
139int _xfs_log_force_lsn(struct xfs_mount *mp,
140 xfs_lsn_t lsn,
141 uint flags,
142 int *log_forced);
143void xfs_log_force_lsn(struct xfs_mount *mp,
144 xfs_lsn_t lsn,
145 uint flags);
149int xfs_log_mount(struct xfs_mount *mp, 146int xfs_log_mount(struct xfs_mount *mp,
150 struct xfs_buftarg *log_target, 147 struct xfs_buftarg *log_target,
151 xfs_daddr_t start_block, 148 xfs_daddr_t start_block,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index d55662db7077..fd02a18facd5 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -443,14 +443,9 @@ typedef struct log {
443 443
444/* common routines */ 444/* common routines */
445extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 445extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
446extern int xlog_find_tail(xlog_t *log,
447 xfs_daddr_t *head_blk,
448 xfs_daddr_t *tail_blk);
449extern int xlog_recover(xlog_t *log); 446extern int xlog_recover(xlog_t *log);
450extern int xlog_recover_finish(xlog_t *log); 447extern int xlog_recover_finish(xlog_t *log);
451extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 448extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
452extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
453extern void xlog_put_bp(struct xfs_buf *);
454 449
455extern kmem_zone_t *xfs_log_ticket_zone; 450extern kmem_zone_t *xfs_log_ticket_zone;
456 451
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 69ac2e5ef20c..22e6efdc17ea 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -50,8 +50,6 @@
50 50
51STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 51STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
52STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); 52STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
53STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
54 xlog_recover_item_t *item);
55#if defined(DEBUG) 53#if defined(DEBUG)
56STATIC void xlog_recover_check_summary(xlog_t *); 54STATIC void xlog_recover_check_summary(xlog_t *);
57#else 55#else
@@ -68,7 +66,7 @@ STATIC void xlog_recover_check_summary(xlog_t *);
68 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) 66 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
69#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) 67#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask)
70 68
71xfs_buf_t * 69STATIC xfs_buf_t *
72xlog_get_bp( 70xlog_get_bp(
73 xlog_t *log, 71 xlog_t *log,
74 int nbblks) 72 int nbblks)
@@ -88,7 +86,7 @@ xlog_get_bp(
88 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); 86 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
89} 87}
90 88
91void 89STATIC void
92xlog_put_bp( 90xlog_put_bp(
93 xfs_buf_t *bp) 91 xfs_buf_t *bp)
94{ 92{
@@ -805,7 +803,7 @@ xlog_find_head(
805 * We could speed up search by using current head_blk buffer, but it is not 803 * We could speed up search by using current head_blk buffer, but it is not
806 * available. 804 * available.
807 */ 805 */
808int 806STATIC int
809xlog_find_tail( 807xlog_find_tail(
810 xlog_t *log, 808 xlog_t *log,
811 xfs_daddr_t *head_blk, 809 xfs_daddr_t *head_blk,
@@ -1367,36 +1365,45 @@ xlog_clear_stale_blocks(
1367 1365
1368STATIC xlog_recover_t * 1366STATIC xlog_recover_t *
1369xlog_recover_find_tid( 1367xlog_recover_find_tid(
1370 xlog_recover_t *q, 1368 struct hlist_head *head,
1371 xlog_tid_t tid) 1369 xlog_tid_t tid)
1372{ 1370{
1373 xlog_recover_t *p = q; 1371 xlog_recover_t *trans;
1372 struct hlist_node *n;
1374 1373
1375 while (p != NULL) { 1374 hlist_for_each_entry(trans, n, head, r_list) {
1376 if (p->r_log_tid == tid) 1375 if (trans->r_log_tid == tid)
1377 break; 1376 return trans;
1378 p = p->r_next;
1379 } 1377 }
1380 return p; 1378 return NULL;
1381} 1379}
1382 1380
1383STATIC void 1381STATIC void
1384xlog_recover_put_hashq( 1382xlog_recover_new_tid(
1385 xlog_recover_t **q, 1383 struct hlist_head *head,
1386 xlog_recover_t *trans) 1384 xlog_tid_t tid,
1385 xfs_lsn_t lsn)
1387{ 1386{
1388 trans->r_next = *q; 1387 xlog_recover_t *trans;
1389 *q = trans; 1388
1389 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1390 trans->r_log_tid = tid;
1391 trans->r_lsn = lsn;
1392 INIT_LIST_HEAD(&trans->r_itemq);
1393
1394 INIT_HLIST_NODE(&trans->r_list);
1395 hlist_add_head(&trans->r_list, head);
1390} 1396}
1391 1397
1392STATIC void 1398STATIC void
1393xlog_recover_add_item( 1399xlog_recover_add_item(
1394 xlog_recover_item_t **itemq) 1400 struct list_head *head)
1395{ 1401{
1396 xlog_recover_item_t *item; 1402 xlog_recover_item_t *item;
1397 1403
1398 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); 1404 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
1399 xlog_recover_insert_item_backq(itemq, item); 1405 INIT_LIST_HEAD(&item->ri_list);
1406 list_add_tail(&item->ri_list, head);
1400} 1407}
1401 1408
1402STATIC int 1409STATIC int
@@ -1409,8 +1416,7 @@ xlog_recover_add_to_cont_trans(
1409 xfs_caddr_t ptr, old_ptr; 1416 xfs_caddr_t ptr, old_ptr;
1410 int old_len; 1417 int old_len;
1411 1418
1412 item = trans->r_itemq; 1419 if (list_empty(&trans->r_itemq)) {
1413 if (item == NULL) {
1414 /* finish copying rest of trans header */ 1420 /* finish copying rest of trans header */
1415 xlog_recover_add_item(&trans->r_itemq); 1421 xlog_recover_add_item(&trans->r_itemq);
1416 ptr = (xfs_caddr_t) &trans->r_theader + 1422 ptr = (xfs_caddr_t) &trans->r_theader +
@@ -1418,7 +1424,8 @@ xlog_recover_add_to_cont_trans(
1418 memcpy(ptr, dp, len); /* d, s, l */ 1424 memcpy(ptr, dp, len); /* d, s, l */
1419 return 0; 1425 return 0;
1420 } 1426 }
1421 item = item->ri_prev; 1427 /* take the tail entry */
1428 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1422 1429
1423 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1430 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1424 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1431 old_len = item->ri_buf[item->ri_cnt-1].i_len;
@@ -1455,8 +1462,7 @@ xlog_recover_add_to_trans(
1455 1462
1456 if (!len) 1463 if (!len)
1457 return 0; 1464 return 0;
1458 item = trans->r_itemq; 1465 if (list_empty(&trans->r_itemq)) {
1459 if (item == NULL) {
1460 /* we need to catch log corruptions here */ 1466 /* we need to catch log corruptions here */
1461 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 1467 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1462 xlog_warn("XFS: xlog_recover_add_to_trans: " 1468 xlog_warn("XFS: xlog_recover_add_to_trans: "
@@ -1474,12 +1480,15 @@ xlog_recover_add_to_trans(
1474 memcpy(ptr, dp, len); 1480 memcpy(ptr, dp, len);
1475 in_f = (xfs_inode_log_format_t *)ptr; 1481 in_f = (xfs_inode_log_format_t *)ptr;
1476 1482
1477 if (item->ri_prev->ri_total != 0 && 1483 /* take the tail entry */
1478 item->ri_prev->ri_total == item->ri_prev->ri_cnt) { 1484 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1485 if (item->ri_total != 0 &&
1486 item->ri_total == item->ri_cnt) {
1487 /* tail item is in use, get a new one */
1479 xlog_recover_add_item(&trans->r_itemq); 1488 xlog_recover_add_item(&trans->r_itemq);
1489 item = list_entry(trans->r_itemq.prev,
1490 xlog_recover_item_t, ri_list);
1480 } 1491 }
1481 item = trans->r_itemq;
1482 item = item->ri_prev;
1483 1492
1484 if (item->ri_total == 0) { /* first region to be added */ 1493 if (item->ri_total == 0) { /* first region to be added */
1485 if (in_f->ilf_size == 0 || 1494 if (in_f->ilf_size == 0 ||
@@ -1504,96 +1513,29 @@ xlog_recover_add_to_trans(
1504 return 0; 1513 return 0;
1505} 1514}
1506 1515
1507STATIC void 1516/*
1508xlog_recover_new_tid( 1517 * Sort the log items in the transaction. Cancelled buffers need
1509 xlog_recover_t **q, 1518 * to be put first so they are processed before any items that might
1510 xlog_tid_t tid, 1519 * modify the buffers. If they are cancelled, then the modifications
1511 xfs_lsn_t lsn) 1520 * don't need to be replayed.
1512{ 1521 */
1513 xlog_recover_t *trans;
1514
1515 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1516 trans->r_log_tid = tid;
1517 trans->r_lsn = lsn;
1518 xlog_recover_put_hashq(q, trans);
1519}
1520
1521STATIC int
1522xlog_recover_unlink_tid(
1523 xlog_recover_t **q,
1524 xlog_recover_t *trans)
1525{
1526 xlog_recover_t *tp;
1527 int found = 0;
1528
1529 ASSERT(trans != NULL);
1530 if (trans == *q) {
1531 *q = (*q)->r_next;
1532 } else {
1533 tp = *q;
1534 while (tp) {
1535 if (tp->r_next == trans) {
1536 found = 1;
1537 break;
1538 }
1539 tp = tp->r_next;
1540 }
1541 if (!found) {
1542 xlog_warn(
1543 "XFS: xlog_recover_unlink_tid: trans not found");
1544 ASSERT(0);
1545 return XFS_ERROR(EIO);
1546 }
1547 tp->r_next = tp->r_next->r_next;
1548 }
1549 return 0;
1550}
1551
1552STATIC void
1553xlog_recover_insert_item_backq(
1554 xlog_recover_item_t **q,
1555 xlog_recover_item_t *item)
1556{
1557 if (*q == NULL) {
1558 item->ri_prev = item->ri_next = item;
1559 *q = item;
1560 } else {
1561 item->ri_next = *q;
1562 item->ri_prev = (*q)->ri_prev;
1563 (*q)->ri_prev = item;
1564 item->ri_prev->ri_next = item;
1565 }
1566}
1567
1568STATIC void
1569xlog_recover_insert_item_frontq(
1570 xlog_recover_item_t **q,
1571 xlog_recover_item_t *item)
1572{
1573 xlog_recover_insert_item_backq(q, item);
1574 *q = item;
1575}
1576
1577STATIC int 1522STATIC int
1578xlog_recover_reorder_trans( 1523xlog_recover_reorder_trans(
1579 xlog_recover_t *trans) 1524 xlog_recover_t *trans)
1580{ 1525{
1581 xlog_recover_item_t *first_item, *itemq, *itemq_next; 1526 xlog_recover_item_t *item, *n;
1582 xfs_buf_log_format_t *buf_f; 1527 LIST_HEAD(sort_list);
1583 ushort flags = 0;
1584 1528
1585 first_item = itemq = trans->r_itemq; 1529 list_splice_init(&trans->r_itemq, &sort_list);
1586 trans->r_itemq = NULL; 1530 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
1587 do { 1531 xfs_buf_log_format_t *buf_f;
1588 itemq_next = itemq->ri_next;
1589 buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
1590 1532
1591 switch (ITEM_TYPE(itemq)) { 1533 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
1534
1535 switch (ITEM_TYPE(item)) {
1592 case XFS_LI_BUF: 1536 case XFS_LI_BUF:
1593 flags = buf_f->blf_flags; 1537 if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
1594 if (!(flags & XFS_BLI_CANCEL)) { 1538 list_move(&item->ri_list, &trans->r_itemq);
1595 xlog_recover_insert_item_frontq(&trans->r_itemq,
1596 itemq);
1597 break; 1539 break;
1598 } 1540 }
1599 case XFS_LI_INODE: 1541 case XFS_LI_INODE:
@@ -1601,7 +1543,7 @@ xlog_recover_reorder_trans(
1601 case XFS_LI_QUOTAOFF: 1543 case XFS_LI_QUOTAOFF:
1602 case XFS_LI_EFD: 1544 case XFS_LI_EFD:
1603 case XFS_LI_EFI: 1545 case XFS_LI_EFI:
1604 xlog_recover_insert_item_backq(&trans->r_itemq, itemq); 1546 list_move_tail(&item->ri_list, &trans->r_itemq);
1605 break; 1547 break;
1606 default: 1548 default:
1607 xlog_warn( 1549 xlog_warn(
@@ -1609,8 +1551,8 @@ xlog_recover_reorder_trans(
1609 ASSERT(0); 1551 ASSERT(0);
1610 return XFS_ERROR(EIO); 1552 return XFS_ERROR(EIO);
1611 } 1553 }
1612 itemq = itemq_next; 1554 }
1613 } while (first_item != itemq); 1555 ASSERT(list_empty(&sort_list));
1614 return 0; 1556 return 0;
1615} 1557}
1616 1558
@@ -2242,9 +2184,9 @@ xlog_recover_do_buffer_trans(
2242 } 2184 }
2243 2185
2244 mp = log->l_mp; 2186 mp = log->l_mp;
2245 buf_flags = XFS_BUF_LOCK; 2187 buf_flags = XBF_LOCK;
2246 if (!(flags & XFS_BLI_INODE_BUF)) 2188 if (!(flags & XFS_BLI_INODE_BUF))
2247 buf_flags |= XFS_BUF_MAPPED; 2189 buf_flags |= XBF_MAPPED;
2248 2190
2249 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); 2191 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
2250 if (XFS_BUF_ISERROR(bp)) { 2192 if (XFS_BUF_ISERROR(bp)) {
@@ -2346,7 +2288,7 @@ xlog_recover_do_inode_trans(
2346 } 2288 }
2347 2289
2348 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 2290 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
2349 XFS_BUF_LOCK); 2291 XBF_LOCK);
2350 if (XFS_BUF_ISERROR(bp)) { 2292 if (XFS_BUF_ISERROR(bp)) {
2351 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2293 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
2352 bp, in_f->ilf_blkno); 2294 bp, in_f->ilf_blkno);
@@ -2814,14 +2756,13 @@ xlog_recover_do_trans(
2814 int pass) 2756 int pass)
2815{ 2757{
2816 int error = 0; 2758 int error = 0;
2817 xlog_recover_item_t *item, *first_item; 2759 xlog_recover_item_t *item;
2818 2760
2819 error = xlog_recover_reorder_trans(trans); 2761 error = xlog_recover_reorder_trans(trans);
2820 if (error) 2762 if (error)
2821 return error; 2763 return error;
2822 2764
2823 first_item = item = trans->r_itemq; 2765 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2824 do {
2825 switch (ITEM_TYPE(item)) { 2766 switch (ITEM_TYPE(item)) {
2826 case XFS_LI_BUF: 2767 case XFS_LI_BUF:
2827 error = xlog_recover_do_buffer_trans(log, item, pass); 2768 error = xlog_recover_do_buffer_trans(log, item, pass);
@@ -2854,8 +2795,7 @@ xlog_recover_do_trans(
2854 2795
2855 if (error) 2796 if (error)
2856 return error; 2797 return error;
2857 item = item->ri_next; 2798 }
2858 } while (first_item != item);
2859 2799
2860 return 0; 2800 return 0;
2861} 2801}
@@ -2869,21 +2809,18 @@ STATIC void
2869xlog_recover_free_trans( 2809xlog_recover_free_trans(
2870 xlog_recover_t *trans) 2810 xlog_recover_t *trans)
2871{ 2811{
2872 xlog_recover_item_t *first_item, *item, *free_item; 2812 xlog_recover_item_t *item, *n;
2873 int i; 2813 int i;
2874 2814
2875 item = first_item = trans->r_itemq; 2815 list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
2876 do { 2816 /* Free the regions in the item. */
2877 free_item = item; 2817 list_del(&item->ri_list);
2878 item = item->ri_next; 2818 for (i = 0; i < item->ri_cnt; i++)
2879 /* Free the regions in the item. */ 2819 kmem_free(item->ri_buf[i].i_addr);
2880 for (i = 0; i < free_item->ri_cnt; i++) {
2881 kmem_free(free_item->ri_buf[i].i_addr);
2882 }
2883 /* Free the item itself */ 2820 /* Free the item itself */
2884 kmem_free(free_item->ri_buf); 2821 kmem_free(item->ri_buf);
2885 kmem_free(free_item); 2822 kmem_free(item);
2886 } while (first_item != item); 2823 }
2887 /* Free the transaction recover structure */ 2824 /* Free the transaction recover structure */
2888 kmem_free(trans); 2825 kmem_free(trans);
2889} 2826}
@@ -2891,14 +2828,12 @@ xlog_recover_free_trans(
2891STATIC int 2828STATIC int
2892xlog_recover_commit_trans( 2829xlog_recover_commit_trans(
2893 xlog_t *log, 2830 xlog_t *log,
2894 xlog_recover_t **q,
2895 xlog_recover_t *trans, 2831 xlog_recover_t *trans,
2896 int pass) 2832 int pass)
2897{ 2833{
2898 int error; 2834 int error;
2899 2835
2900 if ((error = xlog_recover_unlink_tid(q, trans))) 2836 hlist_del(&trans->r_list);
2901 return error;
2902 if ((error = xlog_recover_do_trans(log, trans, pass))) 2837 if ((error = xlog_recover_do_trans(log, trans, pass)))
2903 return error; 2838 return error;
2904 xlog_recover_free_trans(trans); /* no error */ 2839 xlog_recover_free_trans(trans); /* no error */
@@ -2926,7 +2861,7 @@ xlog_recover_unmount_trans(
2926STATIC int 2861STATIC int
2927xlog_recover_process_data( 2862xlog_recover_process_data(
2928 xlog_t *log, 2863 xlog_t *log,
2929 xlog_recover_t *rhash[], 2864 struct hlist_head rhash[],
2930 xlog_rec_header_t *rhead, 2865 xlog_rec_header_t *rhead,
2931 xfs_caddr_t dp, 2866 xfs_caddr_t dp,
2932 int pass) 2867 int pass)
@@ -2960,7 +2895,7 @@ xlog_recover_process_data(
2960 } 2895 }
2961 tid = be32_to_cpu(ohead->oh_tid); 2896 tid = be32_to_cpu(ohead->oh_tid);
2962 hash = XLOG_RHASH(tid); 2897 hash = XLOG_RHASH(tid);
2963 trans = xlog_recover_find_tid(rhash[hash], tid); 2898 trans = xlog_recover_find_tid(&rhash[hash], tid);
2964 if (trans == NULL) { /* not found; add new tid */ 2899 if (trans == NULL) { /* not found; add new tid */
2965 if (ohead->oh_flags & XLOG_START_TRANS) 2900 if (ohead->oh_flags & XLOG_START_TRANS)
2966 xlog_recover_new_tid(&rhash[hash], tid, 2901 xlog_recover_new_tid(&rhash[hash], tid,
@@ -2978,7 +2913,7 @@ xlog_recover_process_data(
2978 switch (flags) { 2913 switch (flags) {
2979 case XLOG_COMMIT_TRANS: 2914 case XLOG_COMMIT_TRANS:
2980 error = xlog_recover_commit_trans(log, 2915 error = xlog_recover_commit_trans(log,
2981 &rhash[hash], trans, pass); 2916 trans, pass);
2982 break; 2917 break;
2983 case XLOG_UNMOUNT_TRANS: 2918 case XLOG_UNMOUNT_TRANS:
2984 error = xlog_recover_unmount_trans(trans); 2919 error = xlog_recover_unmount_trans(trans);
@@ -3211,7 +3146,7 @@ xlog_recover_process_one_iunlink(
3211 /* 3146 /*
3212 * Get the on disk inode to find the next inode in the bucket. 3147 * Get the on disk inode to find the next inode in the bucket.
3213 */ 3148 */
3214 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); 3149 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK);
3215 if (error) 3150 if (error)
3216 goto fail_iput; 3151 goto fail_iput;
3217 3152
@@ -3517,7 +3452,7 @@ xlog_do_recovery_pass(
3517 int error = 0, h_size; 3452 int error = 0, h_size;
3518 int bblks, split_bblks; 3453 int bblks, split_bblks;
3519 int hblks, split_hblks, wrapped_hblks; 3454 int hblks, split_hblks, wrapped_hblks;
3520 xlog_recover_t *rhash[XLOG_RHASH_SIZE]; 3455 struct hlist_head rhash[XLOG_RHASH_SIZE];
3521 3456
3522 ASSERT(head_blk != tail_blk); 3457 ASSERT(head_blk != tail_blk);
3523 3458
@@ -3978,8 +3913,7 @@ xlog_recover_finish(
3978 * case the unlink transactions would have problems 3913 * case the unlink transactions would have problems
3979 * pushing the EFIs out of the way. 3914 * pushing the EFIs out of the way.
3980 */ 3915 */
3981 xfs_log_force(log->l_mp, (xfs_lsn_t)0, 3916 xfs_log_force(log->l_mp, XFS_LOG_SYNC);
3982 (XFS_LOG_FORCE | XFS_LOG_SYNC));
3983 3917
3984 xlog_recover_process_iunlinks(log); 3918 xlog_recover_process_iunlinks(log);
3985 3919
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h
index b22545555301..75d749207258 100644
--- a/fs/xfs/xfs_log_recover.h
+++ b/fs/xfs/xfs_log_recover.h
@@ -35,22 +35,21 @@
35 * item headers are in ri_buf[0]. Additional buffers follow. 35 * item headers are in ri_buf[0]. Additional buffers follow.
36 */ 36 */
37typedef struct xlog_recover_item { 37typedef struct xlog_recover_item {
38 struct xlog_recover_item *ri_next; 38 struct list_head ri_list;
39 struct xlog_recover_item *ri_prev; 39 int ri_type;
40 int ri_type; 40 int ri_cnt; /* count of regions found */
41 int ri_cnt; /* count of regions found */ 41 int ri_total; /* total regions */
42 int ri_total; /* total regions */ 42 xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
43 xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
44} xlog_recover_item_t; 43} xlog_recover_item_t;
45 44
46struct xlog_tid; 45struct xlog_tid;
47typedef struct xlog_recover { 46typedef struct xlog_recover {
48 struct xlog_recover *r_next; 47 struct hlist_node r_list;
49 xlog_tid_t r_log_tid; /* log's transaction id */ 48 xlog_tid_t r_log_tid; /* log's transaction id */
50 xfs_trans_header_t r_theader; /* trans header for partial */ 49 xfs_trans_header_t r_theader; /* trans header for partial */
51 int r_state; /* not needed */ 50 int r_state; /* not needed */
52 xfs_lsn_t r_lsn; /* xact lsn */ 51 xfs_lsn_t r_lsn; /* xact lsn */
53 xlog_recover_item_t *r_itemq; /* q for items */ 52 struct list_head r_itemq; /* q for items */
54} xlog_recover_t; 53} xlog_recover_t;
55 54
56#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr) 55#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index eb403b40e120..6afaaeb2950a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -201,6 +201,38 @@ xfs_uuid_unmount(
201 201
202 202
203/* 203/*
204 * Reference counting access wrappers to the perag structures.
205 */
206struct xfs_perag *
207xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
208{
209 struct xfs_perag *pag;
210 int ref = 0;
211
212 spin_lock(&mp->m_perag_lock);
213 pag = radix_tree_lookup(&mp->m_perag_tree, agno);
214 if (pag) {
215 ASSERT(atomic_read(&pag->pag_ref) >= 0);
216 /* catch leaks in the positive direction during testing */
217 ASSERT(atomic_read(&pag->pag_ref) < 1000);
218 ref = atomic_inc_return(&pag->pag_ref);
219 }
220 spin_unlock(&mp->m_perag_lock);
221 trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
222 return pag;
223}
224
225void
226xfs_perag_put(struct xfs_perag *pag)
227{
228 int ref;
229
230 ASSERT(atomic_read(&pag->pag_ref) > 0);
231 ref = atomic_dec_return(&pag->pag_ref);
232 trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
233}
234
235/*
204 * Free up the resources associated with a mount structure. Assume that 236 * Free up the resources associated with a mount structure. Assume that
205 * the structure was initially zeroed, so we can tell which fields got 237 * the structure was initially zeroed, so we can tell which fields got
206 * initialized. 238 * initialized.
@@ -209,13 +241,16 @@ STATIC void
209xfs_free_perag( 241xfs_free_perag(
210 xfs_mount_t *mp) 242 xfs_mount_t *mp)
211{ 243{
212 if (mp->m_perag) { 244 xfs_agnumber_t agno;
213 int agno; 245 struct xfs_perag *pag;
214 246
215 for (agno = 0; agno < mp->m_maxagi; agno++) 247 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
216 if (mp->m_perag[agno].pagb_list) 248 spin_lock(&mp->m_perag_lock);
217 kmem_free(mp->m_perag[agno].pagb_list); 249 pag = radix_tree_delete(&mp->m_perag_tree, agno);
218 kmem_free(mp->m_perag); 250 ASSERT(pag);
251 ASSERT(atomic_read(&pag->pag_ref) == 0);
252 spin_unlock(&mp->m_perag_lock);
253 kmem_free(pag);
219 } 254 }
220} 255}
221 256
@@ -389,22 +424,57 @@ xfs_initialize_perag_icache(
389 } 424 }
390} 425}
391 426
392xfs_agnumber_t 427int
393xfs_initialize_perag( 428xfs_initialize_perag(
394 xfs_mount_t *mp, 429 xfs_mount_t *mp,
395 xfs_agnumber_t agcount) 430 xfs_agnumber_t agcount,
431 xfs_agnumber_t *maxagi)
396{ 432{
397 xfs_agnumber_t index, max_metadata; 433 xfs_agnumber_t index, max_metadata;
434 xfs_agnumber_t first_initialised = 0;
398 xfs_perag_t *pag; 435 xfs_perag_t *pag;
399 xfs_agino_t agino; 436 xfs_agino_t agino;
400 xfs_ino_t ino; 437 xfs_ino_t ino;
401 xfs_sb_t *sbp = &mp->m_sb; 438 xfs_sb_t *sbp = &mp->m_sb;
402 xfs_ino_t max_inum = XFS_MAXINUMBER_32; 439 xfs_ino_t max_inum = XFS_MAXINUMBER_32;
440 int error = -ENOMEM;
403 441
404 /* Check to see if the filesystem can overflow 32 bit inodes */ 442 /* Check to see if the filesystem can overflow 32 bit inodes */
405 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 443 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
406 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 444 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
407 445
446 /*
447 * Walk the current per-ag tree so we don't try to initialise AGs
448 * that already exist (growfs case). Allocate and insert all the
449 * AGs we don't find ready for initialisation.
450 */
451 for (index = 0; index < agcount; index++) {
452 pag = xfs_perag_get(mp, index);
453 if (pag) {
454 xfs_perag_put(pag);
455 continue;
456 }
457 if (!first_initialised)
458 first_initialised = index;
459 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
460 if (!pag)
461 goto out_unwind;
462 if (radix_tree_preload(GFP_NOFS))
463 goto out_unwind;
464 spin_lock(&mp->m_perag_lock);
465 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
466 BUG();
467 spin_unlock(&mp->m_perag_lock);
468 radix_tree_preload_end();
469 error = -EEXIST;
470 goto out_unwind;
471 }
472 pag->pag_agno = index;
473 pag->pag_mount = mp;
474 spin_unlock(&mp->m_perag_lock);
475 radix_tree_preload_end();
476 }
477
408 /* Clear the mount flag if no inode can overflow 32 bits 478 /* Clear the mount flag if no inode can overflow 32 bits
409 * on this filesystem, or if specifically requested.. 479 * on this filesystem, or if specifically requested..
410 */ 480 */
@@ -438,21 +508,33 @@ xfs_initialize_perag(
438 } 508 }
439 509
440 /* This ag is preferred for inodes */ 510 /* This ag is preferred for inodes */
441 pag = &mp->m_perag[index]; 511 pag = xfs_perag_get(mp, index);
442 pag->pagi_inodeok = 1; 512 pag->pagi_inodeok = 1;
443 if (index < max_metadata) 513 if (index < max_metadata)
444 pag->pagf_metadata = 1; 514 pag->pagf_metadata = 1;
445 xfs_initialize_perag_icache(pag); 515 xfs_initialize_perag_icache(pag);
516 xfs_perag_put(pag);
446 } 517 }
447 } else { 518 } else {
448 /* Setup default behavior for smaller filesystems */ 519 /* Setup default behavior for smaller filesystems */
449 for (index = 0; index < agcount; index++) { 520 for (index = 0; index < agcount; index++) {
450 pag = &mp->m_perag[index]; 521 pag = xfs_perag_get(mp, index);
451 pag->pagi_inodeok = 1; 522 pag->pagi_inodeok = 1;
452 xfs_initialize_perag_icache(pag); 523 xfs_initialize_perag_icache(pag);
524 xfs_perag_put(pag);
453 } 525 }
454 } 526 }
455 return index; 527 if (maxagi)
528 *maxagi = index;
529 return 0;
530
531out_unwind:
532 kmem_free(pag);
533 for (; index > first_initialised; index--) {
534 pag = radix_tree_delete(&mp->m_perag_tree, index);
535 kmem_free(pag);
536 }
537 return error;
456} 538}
457 539
458void 540void
@@ -583,7 +665,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
583 * access to the superblock. 665 * access to the superblock.
584 */ 666 */
585 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 667 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
586 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 668 extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED;
587 669
588 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), 670 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
589 extra_flags); 671 extra_flags);
@@ -731,12 +813,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
731 error = xfs_ialloc_pagi_init(mp, NULL, index); 813 error = xfs_ialloc_pagi_init(mp, NULL, index);
732 if (error) 814 if (error)
733 return error; 815 return error;
734 pag = &mp->m_perag[index]; 816 pag = xfs_perag_get(mp, index);
735 ifree += pag->pagi_freecount; 817 ifree += pag->pagi_freecount;
736 ialloc += pag->pagi_count; 818 ialloc += pag->pagi_count;
737 bfree += pag->pagf_freeblks; 819 bfree += pag->pagf_freeblks;
738 bfreelst += pag->pagf_flcount; 820 bfreelst += pag->pagf_flcount;
739 btree += pag->pagf_btreeblks; 821 btree += pag->pagf_btreeblks;
822 xfs_perag_put(pag);
740 } 823 }
741 /* 824 /*
742 * Overwrite incore superblock counters with just-read data 825 * Overwrite incore superblock counters with just-read data
@@ -1008,6 +1091,22 @@ xfs_mount_reset_sbqflags(
1008 return xfs_trans_commit(tp, 0); 1091 return xfs_trans_commit(tp, 0);
1009} 1092}
1010 1093
1094__uint64_t
1095xfs_default_resblks(xfs_mount_t *mp)
1096{
1097 __uint64_t resblks;
1098
1099 /*
1100 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1101 * This may drive us straight to ENOSPC on mount, but that implies
1102 * we were already there on the last unmount. Warn if this occurs.
1103 */
1104 resblks = mp->m_sb.sb_dblocks;
1105 do_div(resblks, 20);
1106 resblks = min_t(__uint64_t, resblks, 1024);
1107 return resblks;
1108}
1109
1011/* 1110/*
1012 * This function does the following on an initial mount of a file system: 1111 * This function does the following on an initial mount of a file system:
1013 * - reads the superblock from disk and init the mount struct 1112 * - reads the superblock from disk and init the mount struct
@@ -1152,13 +1251,13 @@ xfs_mountfs(
1152 /* 1251 /*
1153 * Allocate and initialize the per-ag data. 1252 * Allocate and initialize the per-ag data.
1154 */ 1253 */
1155 init_rwsem(&mp->m_peraglock); 1254 spin_lock_init(&mp->m_perag_lock);
1156 mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), 1255 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
1157 KM_MAYFAIL); 1256 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1158 if (!mp->m_perag) 1257 if (error) {
1258 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
1159 goto out_remove_uuid; 1259 goto out_remove_uuid;
1160 1260 }
1161 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
1162 1261
1163 if (!sbp->sb_logblocks) { 1262 if (!sbp->sb_logblocks) {
1164 cmn_err(CE_WARN, "XFS: no log defined"); 1263 cmn_err(CE_WARN, "XFS: no log defined");
@@ -1318,18 +1417,14 @@ xfs_mountfs(
1318 * when at ENOSPC. This is needed for operations like create with 1417 * when at ENOSPC. This is needed for operations like create with
1319 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations 1418 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
1320 * are not allowed to use this reserved space. 1419 * are not allowed to use this reserved space.
1321 *
1322 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1323 * This may drive us straight to ENOSPC on mount, but that implies
1324 * we were already there on the last unmount. Warn if this occurs.
1325 */ 1420 */
1326 resblks = mp->m_sb.sb_dblocks; 1421 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1327 do_div(resblks, 20); 1422 resblks = xfs_default_resblks(mp);
1328 resblks = min_t(__uint64_t, resblks, 1024); 1423 error = xfs_reserve_blocks(mp, &resblks, NULL);
1329 error = xfs_reserve_blocks(mp, &resblks, NULL); 1424 if (error)
1330 if (error) 1425 cmn_err(CE_WARN, "XFS: Unable to allocate reserve "
1331 cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " 1426 "blocks. Continuing without a reserve pool.");
1332 "Continuing without a reserve pool."); 1427 }
1333 1428
1334 return 0; 1429 return 0;
1335 1430
@@ -1372,8 +1467,19 @@ xfs_unmountfs(
1372 * push out the iclog we will never get that unlocked. hence we 1467 * push out the iclog we will never get that unlocked. hence we
1373 * need to force the log first. 1468 * need to force the log first.
1374 */ 1469 */
1375 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1470 xfs_log_force(mp, XFS_LOG_SYNC);
1376 xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); 1471
1472 /*
1473 * Do a delwri reclaim pass first so that as many dirty inodes are
1474 * queued up for IO as possible. Then flush the buffers before making
1475 * a synchronous path to catch all the remaining inodes are reclaimed.
1476 * This makes the reclaim process as quick as possible by avoiding
1477 * synchronous writeout and blocking on inodes already in the delwri
1478 * state as much as possible.
1479 */
1480 xfs_reclaim_inodes(mp, 0);
1481 XFS_bflush(mp->m_ddev_targp);
1482 xfs_reclaim_inodes(mp, SYNC_WAIT);
1377 1483
1378 xfs_qm_unmount(mp); 1484 xfs_qm_unmount(mp);
1379 1485
@@ -1382,7 +1488,7 @@ xfs_unmountfs(
1382 * that nothing is pinned. This is important because bflush() 1488 * that nothing is pinned. This is important because bflush()
1383 * will skip pinned buffers. 1489 * will skip pinned buffers.
1384 */ 1490 */
1385 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1491 xfs_log_force(mp, XFS_LOG_SYNC);
1386 1492
1387 xfs_binval(mp->m_ddev_targp); 1493 xfs_binval(mp->m_ddev_targp);
1388 if (mp->m_rtdev_targp) { 1494 if (mp->m_rtdev_targp) {
@@ -1548,15 +1654,14 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1548 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); 1654 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
1549 1655
1550 /* find modified range */ 1656 /* find modified range */
1657 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1658 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1659 last = xfs_sb_info[f + 1].offset - 1;
1551 1660
1552 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1661 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
1553 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1662 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1554 first = xfs_sb_info[f].offset; 1663 first = xfs_sb_info[f].offset;
1555 1664
1556 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1557 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1558 last = xfs_sb_info[f + 1].offset - 1;
1559
1560 xfs_trans_log_buf(tp, bp, first, last); 1665 xfs_trans_log_buf(tp, bp, first, last);
1561} 1666}
1562 1667
@@ -1887,7 +1992,7 @@ xfs_getsb(
1887 1992
1888 ASSERT(mp->m_sb_bp != NULL); 1993 ASSERT(mp->m_sb_bp != NULL);
1889 bp = mp->m_sb_bp; 1994 bp = mp->m_sb_bp;
1890 if (flags & XFS_BUF_TRYLOCK) { 1995 if (flags & XBF_TRYLOCK) {
1891 if (!XFS_BUF_CPSEMA(bp)) { 1996 if (!XFS_BUF_CPSEMA(bp)) {
1892 return NULL; 1997 return NULL;
1893 } 1998 }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1df7e4502967..70504fcf14cd 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -78,7 +78,8 @@ typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
78typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, 78typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
79 struct xfs_inode *, dm_right_t, 79 struct xfs_inode *, dm_right_t,
80 struct xfs_inode *, dm_right_t, 80 struct xfs_inode *, dm_right_t,
81 const char *, const char *, mode_t, int, int); 81 const unsigned char *, const unsigned char *,
82 mode_t, int, int);
82typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, 83typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
83 char *, char *); 84 char *, char *);
84typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *, 85typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
@@ -207,8 +208,8 @@ typedef struct xfs_mount {
207 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 208 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
208 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 209 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
209 uint m_in_maxlevels; /* max inobt btree levels. */ 210 uint m_in_maxlevels; /* max inobt btree levels. */
210 struct xfs_perag *m_perag; /* per-ag accounting info */ 211 struct radix_tree_root m_perag_tree; /* per-ag accounting info */
211 struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ 212 spinlock_t m_perag_lock; /* lock for m_perag_tree */
212 struct mutex m_growlock; /* growfs mutex */ 213 struct mutex m_growlock; /* growfs mutex */
213 int m_fixedfsid[2]; /* unchanged for life of FS */ 214 int m_fixedfsid[2]; /* unchanged for life of FS */
214 uint m_dmevmask; /* DMI events for this FS */ 215 uint m_dmevmask; /* DMI events for this FS */
@@ -224,6 +225,7 @@ typedef struct xfs_mount {
224 __uint64_t m_maxioffset; /* maximum inode offset */ 225 __uint64_t m_maxioffset; /* maximum inode offset */
225 __uint64_t m_resblks; /* total reserved blocks */ 226 __uint64_t m_resblks; /* total reserved blocks */
226 __uint64_t m_resblks_avail;/* available reserved blocks */ 227 __uint64_t m_resblks_avail;/* available reserved blocks */
228 __uint64_t m_resblks_save; /* reserved blks @ remount,ro */
227 int m_dalign; /* stripe unit */ 229 int m_dalign; /* stripe unit */
228 int m_swidth; /* stripe width */ 230 int m_swidth; /* stripe width */
229 int m_sinoalign; /* stripe unit inode alignment */ 231 int m_sinoalign; /* stripe unit inode alignment */
@@ -384,19 +386,10 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
384} 386}
385 387
386/* 388/*
387 * perag get/put wrappers for eventual ref counting 389 * perag get/put wrappers for ref counting
388 */ 390 */
389static inline xfs_perag_t * 391struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
390xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino) 392void xfs_perag_put(struct xfs_perag *pag);
391{
392 return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)];
393}
394
395static inline void
396xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
397{
398 /* nothing to see here, move along */
399}
400 393
401/* 394/*
402 * Per-cpu superblock locking functions 395 * Per-cpu superblock locking functions
@@ -428,6 +421,7 @@ typedef struct xfs_mod_sb {
428} xfs_mod_sb_t; 421} xfs_mod_sb_t;
429 422
430extern int xfs_log_sbcount(xfs_mount_t *, uint); 423extern int xfs_log_sbcount(xfs_mount_t *, uint);
424extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
431extern int xfs_mountfs(xfs_mount_t *mp); 425extern int xfs_mountfs(xfs_mount_t *mp);
432 426
433extern void xfs_unmountfs(xfs_mount_t *); 427extern void xfs_unmountfs(xfs_mount_t *);
@@ -450,7 +444,8 @@ extern struct xfs_dmops xfs_dmcore_xfs;
450#endif /* __KERNEL__ */ 444#endif /* __KERNEL__ */
451 445
452extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 446extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
453extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t); 447extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
448 xfs_agnumber_t *);
454extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); 449extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
455extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); 450extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
456 451
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 4b0613d99faa..45ce15dc5b2b 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -398,7 +398,7 @@ exit:
398 * guaranteed that all the free functions for all the elements have finished 398 * guaranteed that all the free functions for all the elements have finished
399 * executing and the reaper is not running. 399 * executing and the reaper is not running.
400 */ 400 */
401void 401static void
402xfs_mru_cache_flush( 402xfs_mru_cache_flush(
403 xfs_mru_cache_t *mru) 403 xfs_mru_cache_t *mru)
404{ 404{
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 5d439f34b0c9..36dd3ec8b4eb 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -42,7 +42,6 @@ void xfs_mru_cache_uninit(void);
42int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, 42int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
43 unsigned int grp_count, 43 unsigned int grp_count,
44 xfs_mru_cache_free_func_t free_func); 44 xfs_mru_cache_free_func_t free_func);
45void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
46void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 45void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
47int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 46int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
48 void *value); 47 void *value);
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 91bfd60f4c74..fdcab3f81dde 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -223,16 +223,9 @@ typedef struct xfs_qoff_logformat {
223#define XFS_QMOPT_RES_INOS 0x0800000 223#define XFS_QMOPT_RES_INOS 0x0800000
224 224
225/* 225/*
226 * flags for dqflush and dqflush_all.
227 */
228#define XFS_QMOPT_SYNC 0x1000000
229#define XFS_QMOPT_ASYNC 0x2000000
230#define XFS_QMOPT_DELWRI 0x4000000
231
232/*
233 * flags for dqalloc. 226 * flags for dqalloc.
234 */ 227 */
235#define XFS_QMOPT_INHERIT 0x8000000 228#define XFS_QMOPT_INHERIT 0x1000000
236 229
237/* 230/*
238 * flags to xfs_trans_mod_dquot. 231 * flags to xfs_trans_mod_dquot.
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 5aa07caea5f1..e336742a58a4 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -47,48 +47,6 @@
47#include "xfs_trace.h" 47#include "xfs_trace.h"
48 48
49/* 49/*
50 * This is a subroutine for xfs_write() and other writers (xfs_ioctl)
51 * which clears the setuid and setgid bits when a file is written.
52 */
53int
54xfs_write_clear_setuid(
55 xfs_inode_t *ip)
56{
57 xfs_mount_t *mp;
58 xfs_trans_t *tp;
59 int error;
60
61 mp = ip->i_mount;
62 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
63 if ((error = xfs_trans_reserve(tp, 0,
64 XFS_WRITEID_LOG_RES(mp),
65 0, 0, 0))) {
66 xfs_trans_cancel(tp, 0);
67 return error;
68 }
69 xfs_ilock(ip, XFS_ILOCK_EXCL);
70 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
71 xfs_trans_ihold(tp, ip);
72 ip->i_d.di_mode &= ~S_ISUID;
73
74 /*
75 * Note that we don't have to worry about mandatory
76 * file locking being disabled here because we only
77 * clear the S_ISGID bit if the Group execute bit is
78 * on, but if it was on then mandatory locking wouldn't
79 * have been enabled.
80 */
81 if (ip->i_d.di_mode & S_IXGRP) {
82 ip->i_d.di_mode &= ~S_ISGID;
83 }
84 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
85 xfs_trans_set_sync(tp);
86 error = xfs_trans_commit(tp, 0);
87 xfs_iunlock(ip, XFS_ILOCK_EXCL);
88 return 0;
89}
90
91/*
92 * Force a shutdown of the filesystem instantly while keeping 50 * Force a shutdown of the filesystem instantly while keeping
93 * the filesystem consistent. We don't do an unmount here; just shutdown 51 * the filesystem consistent. We don't do an unmount here; just shutdown
94 * the shop, make sure that absolutely nothing persistent happens to 52 * the shop, make sure that absolutely nothing persistent happens to
@@ -153,88 +111,6 @@ xfs_do_force_shutdown(
153 } 111 }
154} 112}
155 113
156
157/*
158 * Called when we want to stop a buffer from getting written or read.
159 * We attach the EIO error, muck with its flags, and call biodone
160 * so that the proper iodone callbacks get called.
161 */
162int
163xfs_bioerror(
164 xfs_buf_t *bp)
165{
166
167#ifdef XFSERRORDEBUG
168 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
169#endif
170
171 /*
172 * No need to wait until the buffer is unpinned.
173 * We aren't flushing it.
174 */
175 XFS_BUF_ERROR(bp, EIO);
176 /*
177 * We're calling biodone, so delete B_DONE flag. Either way
178 * we have to call the iodone callback, and calling biodone
179 * probably is the best way since it takes care of
180 * GRIO as well.
181 */
182 XFS_BUF_UNREAD(bp);
183 XFS_BUF_UNDELAYWRITE(bp);
184 XFS_BUF_UNDONE(bp);
185 XFS_BUF_STALE(bp);
186
187 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
188 xfs_biodone(bp);
189
190 return (EIO);
191}
192
193/*
194 * Same as xfs_bioerror, except that we are releasing the buffer
195 * here ourselves, and avoiding the biodone call.
196 * This is meant for userdata errors; metadata bufs come with
197 * iodone functions attached, so that we can track down errors.
198 */
199int
200xfs_bioerror_relse(
201 xfs_buf_t *bp)
202{
203 int64_t fl;
204
205 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks);
206 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
207
208 fl = XFS_BUF_BFLAGS(bp);
209 /*
210 * No need to wait until the buffer is unpinned.
211 * We aren't flushing it.
212 *
213 * chunkhold expects B_DONE to be set, whether
214 * we actually finish the I/O or not. We don't want to
215 * change that interface.
216 */
217 XFS_BUF_UNREAD(bp);
218 XFS_BUF_UNDELAYWRITE(bp);
219 XFS_BUF_DONE(bp);
220 XFS_BUF_STALE(bp);
221 XFS_BUF_CLR_IODONE_FUNC(bp);
222 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
223 if (!(fl & XFS_B_ASYNC)) {
224 /*
225 * Mark b_error and B_ERROR _both_.
226 * Lot's of chunkcache code assumes that.
227 * There's no reason to mark error for
228 * ASYNC buffers.
229 */
230 XFS_BUF_ERROR(bp, EIO);
231 XFS_BUF_FINISH_IOWAIT(bp);
232 } else {
233 xfs_buf_relse(bp);
234 }
235 return (EIO);
236}
237
238/* 114/*
239 * Prints out an ALERT message about I/O error. 115 * Prints out an ALERT message about I/O error.
240 */ 116 */
@@ -306,37 +182,6 @@ xfs_read_buf(
306} 182}
307 183
308/* 184/*
309 * Wrapper around bwrite() so that we can trap
310 * write errors, and act accordingly.
311 */
312int
313xfs_bwrite(
314 struct xfs_mount *mp,
315 struct xfs_buf *bp)
316{
317 int error;
318
319 /*
320 * XXXsup how does this work for quotas.
321 */
322 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
323 bp->b_mount = mp;
324 XFS_BUF_WRITE(bp);
325
326 if ((error = XFS_bwrite(bp))) {
327 ASSERT(mp);
328 /*
329 * Cannot put a buftrace here since if the buffer is not
330 * B_HOLD then we will brelse() the buffer before returning
331 * from bwrite and we could be tracing a buffer that has
332 * been reused.
333 */
334 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
335 }
336 return (error);
337}
338
339/*
340 * helper function to extract extent size hint from inode 185 * helper function to extract extent size hint from inode
341 */ 186 */
342xfs_extlen_t 187xfs_extlen_t
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index 571f2174435c..11c41ec6ed75 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -39,10 +39,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
39/* 39/*
40 * Prototypes for functions in xfs_rw.c. 40 * Prototypes for functions in xfs_rw.c.
41 */ 41 */
42extern int xfs_write_clear_setuid(struct xfs_inode *ip);
43extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
44extern int xfs_bioerror(struct xfs_buf *bp);
45extern int xfs_bioerror_relse(struct xfs_buf *bp);
46extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, 42extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
47 xfs_daddr_t blkno, int len, uint flags, 43 xfs_daddr_t blkno, int len, uint flags,
48 struct xfs_buf **bpp); 44 struct xfs_buf **bpp);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 237badcbac3b..be942d4e3324 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -981,9 +981,8 @@ shut_us_down:
981 */ 981 */
982 if (sync) { 982 if (sync) {
983 if (!error) { 983 if (!error) {
984 error = _xfs_log_force(mp, commit_lsn, 984 error = _xfs_log_force_lsn(mp, commit_lsn,
985 XFS_LOG_FORCE | XFS_LOG_SYNC, 985 XFS_LOG_SYNC, log_flushed);
986 log_flushed);
987 } 986 }
988 XFS_STATS_INC(xs_trans_sync); 987 XFS_STATS_INC(xs_trans_sync);
989 } else { 988 } else {
@@ -1121,7 +1120,7 @@ xfs_trans_fill_vecs(
1121 tp->t_header.th_num_items = nitems; 1120 tp->t_header.th_num_items = nitems;
1122 log_vector->i_addr = (xfs_caddr_t)&tp->t_header; 1121 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
1123 log_vector->i_len = sizeof(xfs_trans_header_t); 1122 log_vector->i_len = sizeof(xfs_trans_header_t);
1124 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR); 1123 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
1125} 1124}
1126 1125
1127 1126
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ca64f33c63a3..c93e3a102857 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -861,8 +861,7 @@ typedef struct xfs_item_ops {
861#define XFS_ITEM_SUCCESS 0 861#define XFS_ITEM_SUCCESS 0
862#define XFS_ITEM_PINNED 1 862#define XFS_ITEM_PINNED 1
863#define XFS_ITEM_LOCKED 2 863#define XFS_ITEM_LOCKED 2
864#define XFS_ITEM_FLUSHING 3 864#define XFS_ITEM_PUSHBUF 3
865#define XFS_ITEM_PUSHBUF 4
866 865
867/* 866/*
868 * This structure is used to maintain a list of block ranges that have been 867 * This structure is used to maintain a list of block ranges that have been
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 2ffc570679be..e799824f7245 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -237,14 +237,15 @@ out:
237} 237}
238 238
239/* 239/*
240 * Function that does the work of pushing on the AIL 240 * xfsaild_push does the work of pushing on the AIL. Returning a timeout of
241 * zero indicates that the caller should sleep until woken.
241 */ 242 */
242long 243long
243xfsaild_push( 244xfsaild_push(
244 struct xfs_ail *ailp, 245 struct xfs_ail *ailp,
245 xfs_lsn_t *last_lsn) 246 xfs_lsn_t *last_lsn)
246{ 247{
247 long tout = 1000; /* milliseconds */ 248 long tout = 0;
248 xfs_lsn_t last_pushed_lsn = *last_lsn; 249 xfs_lsn_t last_pushed_lsn = *last_lsn;
249 xfs_lsn_t target = ailp->xa_target; 250 xfs_lsn_t target = ailp->xa_target;
250 xfs_lsn_t lsn; 251 xfs_lsn_t lsn;
@@ -252,6 +253,7 @@ xfsaild_push(
252 int flush_log, count, stuck; 253 int flush_log, count, stuck;
253 xfs_mount_t *mp = ailp->xa_mount; 254 xfs_mount_t *mp = ailp->xa_mount;
254 struct xfs_ail_cursor *cur = &ailp->xa_cursors; 255 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
256 int push_xfsbufd = 0;
255 257
256 spin_lock(&ailp->xa_lock); 258 spin_lock(&ailp->xa_lock);
257 xfs_trans_ail_cursor_init(ailp, cur); 259 xfs_trans_ail_cursor_init(ailp, cur);
@@ -262,7 +264,7 @@ xfsaild_push(
262 */ 264 */
263 xfs_trans_ail_cursor_done(ailp, cur); 265 xfs_trans_ail_cursor_done(ailp, cur);
264 spin_unlock(&ailp->xa_lock); 266 spin_unlock(&ailp->xa_lock);
265 last_pushed_lsn = 0; 267 *last_lsn = 0;
266 return tout; 268 return tout;
267 } 269 }
268 270
@@ -279,7 +281,6 @@ xfsaild_push(
279 * prevents use from spinning when we can't do anything or there is 281 * prevents use from spinning when we can't do anything or there is
280 * lots of contention on the AIL lists. 282 * lots of contention on the AIL lists.
281 */ 283 */
282 tout = 10;
283 lsn = lip->li_lsn; 284 lsn = lip->li_lsn;
284 flush_log = stuck = count = 0; 285 flush_log = stuck = count = 0;
285 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { 286 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
@@ -308,6 +309,7 @@ xfsaild_push(
308 XFS_STATS_INC(xs_push_ail_pushbuf); 309 XFS_STATS_INC(xs_push_ail_pushbuf);
309 IOP_PUSHBUF(lip); 310 IOP_PUSHBUF(lip);
310 last_pushed_lsn = lsn; 311 last_pushed_lsn = lsn;
312 push_xfsbufd = 1;
311 break; 313 break;
312 314
313 case XFS_ITEM_PINNED: 315 case XFS_ITEM_PINNED:
@@ -322,12 +324,6 @@ xfsaild_push(
322 stuck++; 324 stuck++;
323 break; 325 break;
324 326
325 case XFS_ITEM_FLUSHING:
326 XFS_STATS_INC(xs_push_ail_flushing);
327 last_pushed_lsn = lsn;
328 stuck++;
329 break;
330
331 default: 327 default:
332 ASSERT(0); 328 ASSERT(0);
333 break; 329 break;
@@ -371,19 +367,24 @@ xfsaild_push(
371 * move forward in the AIL. 367 * move forward in the AIL.
372 */ 368 */
373 XFS_STATS_INC(xs_push_ail_flush); 369 XFS_STATS_INC(xs_push_ail_flush);
374 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 370 xfs_log_force(mp, 0);
371 }
372
373 if (push_xfsbufd) {
374 /* we've got delayed write buffers to flush */
375 wake_up_process(mp->m_ddev_targp->bt_task);
375 } 376 }
376 377
377 if (!count) { 378 if (!count) {
378 /* We're past our target or empty, so idle */ 379 /* We're past our target or empty, so idle */
379 tout = 1000; 380 last_pushed_lsn = 0;
380 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 381 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
381 /* 382 /*
382 * We reached the target so wait a bit longer for I/O to 383 * We reached the target so wait a bit longer for I/O to
383 * complete and remove pushed items from the AIL before we 384 * complete and remove pushed items from the AIL before we
384 * start the next scan from the start of the AIL. 385 * start the next scan from the start of the AIL.
385 */ 386 */
386 tout += 20; 387 tout = 50;
387 last_pushed_lsn = 0; 388 last_pushed_lsn = 0;
388 } else if ((stuck * 100) / count > 90) { 389 } else if ((stuck * 100) / count > 90) {
389 /* 390 /*
@@ -395,11 +396,14 @@ xfsaild_push(
395 * Backoff a bit more to allow some I/O to complete before 396 * Backoff a bit more to allow some I/O to complete before
396 * continuing from where we were. 397 * continuing from where we were.
397 */ 398 */
398 tout += 10; 399 tout = 20;
400 } else {
401 /* more to do, but wait a short while before continuing */
402 tout = 10;
399 } 403 }
400 *last_lsn = last_pushed_lsn; 404 *last_lsn = last_pushed_lsn;
401 return tout; 405 return tout;
402} /* xfsaild_push */ 406}
403 407
404 408
405/* 409/*
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 49130628d5ef..5ffd544434eb 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -75,13 +75,14 @@ xfs_trans_get_buf(xfs_trans_t *tp,
75 xfs_buf_log_item_t *bip; 75 xfs_buf_log_item_t *bip;
76 76
77 if (flags == 0) 77 if (flags == 0)
78 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 78 flags = XBF_LOCK | XBF_MAPPED;
79 79
80 /* 80 /*
81 * Default to a normal get_buf() call if the tp is NULL. 81 * Default to a normal get_buf() call if the tp is NULL.
82 */ 82 */
83 if (tp == NULL) 83 if (tp == NULL)
84 return xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY); 84 return xfs_buf_get(target_dev, blkno, len,
85 flags | XBF_DONT_BLOCK);
85 86
86 /* 87 /*
87 * If we find the buffer in the cache with this transaction 88 * If we find the buffer in the cache with this transaction
@@ -117,14 +118,14 @@ xfs_trans_get_buf(xfs_trans_t *tp,
117 } 118 }
118 119
119 /* 120 /*
120 * We always specify the BUF_BUSY flag within a transaction so 121 * We always specify the XBF_DONT_BLOCK flag within a transaction
121 * that get_buf does not try to push out a delayed write buffer 122 * so that get_buf does not try to push out a delayed write buffer
122 * which might cause another transaction to take place (if the 123 * which might cause another transaction to take place (if the
123 * buffer was delayed alloc). Such recursive transactions can 124 * buffer was delayed alloc). Such recursive transactions can
124 * easily deadlock with our current transaction as well as cause 125 * easily deadlock with our current transaction as well as cause
125 * us to run out of stack space. 126 * us to run out of stack space.
126 */ 127 */
127 bp = xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY); 128 bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK);
128 if (bp == NULL) { 129 if (bp == NULL) {
129 return NULL; 130 return NULL;
130 } 131 }
@@ -290,15 +291,15 @@ xfs_trans_read_buf(
290 int error; 291 int error;
291 292
292 if (flags == 0) 293 if (flags == 0)
293 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 294 flags = XBF_LOCK | XBF_MAPPED;
294 295
295 /* 296 /*
296 * Default to a normal get_buf() call if the tp is NULL. 297 * Default to a normal get_buf() call if the tp is NULL.
297 */ 298 */
298 if (tp == NULL) { 299 if (tp == NULL) {
299 bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY); 300 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
300 if (!bp) 301 if (!bp)
301 return (flags & XFS_BUF_TRYLOCK) ? 302 return (flags & XBF_TRYLOCK) ?
302 EAGAIN : XFS_ERROR(ENOMEM); 303 EAGAIN : XFS_ERROR(ENOMEM);
303 304
304 if (XFS_BUF_GETERROR(bp) != 0) { 305 if (XFS_BUF_GETERROR(bp) != 0) {
@@ -385,14 +386,14 @@ xfs_trans_read_buf(
385 } 386 }
386 387
387 /* 388 /*
388 * We always specify the BUF_BUSY flag within a transaction so 389 * We always specify the XBF_DONT_BLOCK flag within a transaction
389 * that get_buf does not try to push out a delayed write buffer 390 * so that get_buf does not try to push out a delayed write buffer
390 * which might cause another transaction to take place (if the 391 * which might cause another transaction to take place (if the
391 * buffer was delayed alloc). Such recursive transactions can 392 * buffer was delayed alloc). Such recursive transactions can
392 * easily deadlock with our current transaction as well as cause 393 * easily deadlock with our current transaction as well as cause
393 * us to run out of stack space. 394 * us to run out of stack space.
394 */ 395 */
395 bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY); 396 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
396 if (bp == NULL) { 397 if (bp == NULL) {
397 *bpp = NULL; 398 *bpp = NULL;
398 return 0; 399 return 0;
@@ -472,8 +473,8 @@ shutdown_abort:
472 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) 473 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
473 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); 474 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp);
474#endif 475#endif
475 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != 476 ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
476 (XFS_B_STALE|XFS_B_DELWRI)); 477 (XBF_STALE|XBF_DELWRI));
477 478
478 trace_xfs_trans_read_buf_shut(bp, _RET_IP_); 479 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
479 xfs_buf_relse(bp); 480 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index d725428c9df6..b09904555d07 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -151,8 +151,8 @@ typedef enum {
151} xfs_btnum_t; 151} xfs_btnum_t;
152 152
153struct xfs_name { 153struct xfs_name {
154 const char *name; 154 const unsigned char *name;
155 int len; 155 int len;
156}; 156};
157 157
158#endif /* __XFS_TYPES_H__ */ 158#endif /* __XFS_TYPES_H__ */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6f268756bf36..ddd2c5d1b854 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -256,7 +256,7 @@ xfs_setattr(
256 iattr->ia_size > ip->i_d.di_size) { 256 iattr->ia_size > ip->i_d.di_size) {
257 code = xfs_flush_pages(ip, 257 code = xfs_flush_pages(ip,
258 ip->i_d.di_size, iattr->ia_size, 258 ip->i_d.di_size, iattr->ia_size,
259 XFS_B_ASYNC, FI_NONE); 259 XBF_ASYNC, FI_NONE);
260 } 260 }
261 261
262 /* wait for all I/O to complete */ 262 /* wait for all I/O to complete */
@@ -597,7 +597,7 @@ xfs_fsync(
597{ 597{
598 xfs_trans_t *tp; 598 xfs_trans_t *tp;
599 int error = 0; 599 int error = 0;
600 int log_flushed = 0, changed = 1; 600 int log_flushed = 0;
601 601
602 xfs_itrace_entry(ip); 602 xfs_itrace_entry(ip);
603 603
@@ -627,19 +627,16 @@ xfs_fsync(
627 * disk yet, the inode will be still be pinned. If it is, 627 * disk yet, the inode will be still be pinned. If it is,
628 * force the log. 628 * force the log.
629 */ 629 */
630
631 xfs_iunlock(ip, XFS_ILOCK_SHARED); 630 xfs_iunlock(ip, XFS_ILOCK_SHARED);
632
633 if (xfs_ipincount(ip)) { 631 if (xfs_ipincount(ip)) {
634 error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 632 if (ip->i_itemp->ili_last_lsn) {
635 XFS_LOG_FORCE | XFS_LOG_SYNC, 633 error = _xfs_log_force_lsn(ip->i_mount,
636 &log_flushed); 634 ip->i_itemp->ili_last_lsn,
637 } else { 635 XFS_LOG_SYNC, &log_flushed);
638 /* 636 } else {
639 * If the inode is not pinned and nothing has changed 637 error = _xfs_log_force(ip->i_mount,
640 * we don't need to flush the cache. 638 XFS_LOG_SYNC, &log_flushed);
641 */ 639 }
642 changed = 0;
643 } 640 }
644 } else { 641 } else {
645 /* 642 /*
@@ -674,7 +671,7 @@ xfs_fsync(
674 xfs_iunlock(ip, XFS_ILOCK_EXCL); 671 xfs_iunlock(ip, XFS_ILOCK_EXCL);
675 } 672 }
676 673
677 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { 674 if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
678 /* 675 /*
679 * If the log write didn't issue an ordered tag we need 676 * If the log write didn't issue an ordered tag we need
680 * to flush the disk cache for the data device now. 677 * to flush the disk cache for the data device now.
@@ -1096,7 +1093,7 @@ xfs_release(
1096 */ 1093 */
1097 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 1094 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
1098 if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) 1095 if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
1099 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); 1096 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
1100 } 1097 }
1101 1098
1102 if (ip->i_d.di_nlink != 0) { 1099 if (ip->i_d.di_nlink != 0) {
@@ -2199,7 +2196,8 @@ xfs_symlink(
2199 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { 2196 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
2200 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, 2197 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
2201 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2198 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
2202 link_name->name, target_path, 0, 0, 0); 2199 link_name->name,
2200 (unsigned char *)target_path, 0, 0, 0);
2203 if (error) 2201 if (error)
2204 return error; 2202 return error;
2205 } 2203 }
@@ -2395,7 +2393,8 @@ std_return:
2395 dp, DM_RIGHT_NULL, 2393 dp, DM_RIGHT_NULL,
2396 error ? NULL : ip, 2394 error ? NULL : ip,
2397 DM_RIGHT_NULL, link_name->name, 2395 DM_RIGHT_NULL, link_name->name,
2398 target_path, 0, error, 0); 2396 (unsigned char *)target_path,
2397 0, error, 0);
2399 } 2398 }
2400 2399
2401 if (!error) 2400 if (!error)
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 167a467403a5..774f40729ca1 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -43,11 +43,11 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
43int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 43int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
44 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 44 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
45 struct xfs_name *target_name, struct xfs_inode *target_ip); 45 struct xfs_name *target_name, struct xfs_inode *target_ip);
46int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, 46int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
47 int *valuelenp, int flags); 47 unsigned char *value, int *valuelenp, int flags);
48int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, 48int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
49 int valuelen, int flags); 49 unsigned char *value, int valuelen, int flags);
50int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); 50int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
51int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 51int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
52 int flags, struct attrlist_cursor_kern *cursor); 52 int flags, struct attrlist_cursor_kern *cursor);
53ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, 53ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,