aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
commitada47b5fe13d89735805b566185f4885f5a3f750 (patch)
tree644b88f8a71896307d71438e9b3af49126ffb22b /fs/xfs
parent43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff)
parent3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff)
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Makefile11
-rw-r--r--fs/xfs/linux-2.6/kmem.c57
-rw-r--r--fs/xfs/linux-2.6/kmem.h21
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c73
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c392
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c542
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h95
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c856
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c922
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h77
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c354
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c447
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h9
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c62
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c59
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1503
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c98
-rw-r--r--fs/xfs/quota/xfs_dquot.c157
-rw-r--r--fs/xfs/quota/xfs_dquot.h23
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c99
-rw-r--r--fs/xfs/quota/xfs_dquot_item.h4
-rw-r--r--fs/xfs/quota/xfs_qm.c80
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c11
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c49
-rw-r--r--fs/xfs/support/debug.h18
-rw-r--r--fs/xfs/support/ktrace.c323
-rw-r--r--fs/xfs/support/ktrace.h85
-rw-r--r--fs/xfs/xfs.h16
-rw-r--r--fs/xfs/xfs_acl.h7
-rw-r--r--fs/xfs/xfs_ag.h31
-rw-r--r--fs/xfs/xfs_alloc.c354
-rw-r--r--fs/xfs/xfs_alloc.h27
-rw-r--r--fs/xfs/xfs_alloc_btree.c10
-rw-r--r--fs/xfs/xfs_attr.c171
-rw-r--r--fs/xfs/xfs_attr.h14
-rw-r--r--fs/xfs/xfs_attr_leaf.c46
-rw-r--r--fs/xfs/xfs_attr_sf.h42
-rw-r--r--fs/xfs/xfs_bmap.c1163
-rw-r--r--fs/xfs/xfs_bmap.h58
-rw-r--r--fs/xfs/xfs_bmap_btree.c11
-rw-r--r--fs/xfs/xfs_bmap_btree.h15
-rw-r--r--fs/xfs/xfs_btree.c9
-rw-r--r--fs/xfs/xfs_btree_trace.h17
-rw-r--r--fs/xfs/xfs_buf_item.c159
-rw-r--r--fs/xfs/xfs_buf_item.h20
-rw-r--r--fs/xfs/xfs_da_btree.c7
-rw-r--r--fs/xfs/xfs_da_btree.h12
-rw-r--r--fs/xfs/xfs_dfrag.c161
-rw-r--r--fs/xfs/xfs_dfrag.h3
-rw-r--r--fs/xfs/xfs_dir2.c16
-rw-r--r--fs/xfs/xfs_dir2.h4
-rw-r--r--fs/xfs/xfs_dir2_block.c29
-rw-r--r--fs/xfs/xfs_dir2_leaf.c23
-rw-r--r--fs/xfs/xfs_dir2_node.c29
-rw-r--r--fs/xfs/xfs_dir2_node.h2
-rw-r--r--fs/xfs/xfs_dir2_sf.c28
-rw-r--r--fs/xfs/xfs_dir2_trace.c216
-rw-r--r--fs/xfs/xfs_dir2_trace.h72
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_filestream.c50
-rw-r--r--fs/xfs/xfs_filestream.h36
-rw-r--r--fs/xfs/xfs_fs.h3
-rw-r--r--fs/xfs/xfs_fsops.c69
-rw-r--r--fs/xfs/xfs_ialloc.c64
-rw-r--r--fs/xfs/xfs_iget.c160
-rw-r--r--fs/xfs/xfs_inode.c278
-rw-r--r--fs/xfs/xfs_inode.h96
-rw-r--r--fs/xfs/xfs_inode_item.c146
-rw-r--r--fs/xfs/xfs_inode_item.h12
-rw-r--r--fs/xfs/xfs_iomap.c94
-rw-r--r--fs/xfs/xfs_iomap.h8
-rw-r--r--fs/xfs/xfs_itable.c14
-rw-r--r--fs/xfs/xfs_log.c708
-rw-r--r--fs/xfs/xfs_log.h35
-rw-r--r--fs/xfs/xfs_log_priv.h25
-rw-r--r--fs/xfs/xfs_log_recover.c271
-rw-r--r--fs/xfs/xfs_log_recover.h23
-rw-r--r--fs/xfs/xfs_mount.c268
-rw-r--r--fs/xfs/xfs_mount.h59
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_mru_cache.h1
-rw-r--r--fs/xfs/xfs_quota.h17
-rw-r--r--fs/xfs/xfs_rename.c1
-rw-r--r--fs/xfs/xfs_rtalloc.c3
-rw-r--r--fs/xfs/xfs_rw.c174
-rw-r--r--fs/xfs/xfs_rw.h33
-rw-r--r--fs/xfs/xfs_trans.c16
-rw-r--r--fs/xfs/xfs_trans.h54
-rw-r--r--fs/xfs/xfs_trans_ail.c34
-rw-r--r--fs/xfs/xfs_trans_buf.c302
-rw-r--r--fs/xfs/xfs_types.h4
-rw-r--r--fs/xfs/xfs_vnodeops.c294
-rw-r--r--fs/xfs/xfs_vnodeops.h26
105 files changed, 6107 insertions, 6603 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 7a59daed1782..b4769e40e8bc 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,7 +16,7 @@
16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17# 17#
18 18
19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 -funsigned-char 19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6
20 20
21XFS_LINUX := linux-2.6 21XFS_LINUX := linux-2.6
22 22
@@ -26,6 +26,8 @@ endif
26 26
27obj-$(CONFIG_XFS_FS) += xfs.o 27obj-$(CONFIG_XFS_FS) += xfs.o
28 28
29xfs-y += linux-2.6/xfs_trace.o
30
29xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ 31xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
30 xfs_dquot.o \ 32 xfs_dquot.o \
31 xfs_dquot_item.o \ 33 xfs_dquot_item.o \
@@ -90,8 +92,7 @@ xfs-y += xfs_alloc.o \
90 xfs_rw.o \ 92 xfs_rw.o \
91 xfs_dmops.o 93 xfs_dmops.o
92 94
93xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \ 95xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o
94 xfs_dir2_trace.o
95 96
96# Objects in linux/ 97# Objects in linux/
97xfs-y += $(addprefix $(XFS_LINUX)/, \ 98xfs-y += $(addprefix $(XFS_LINUX)/, \
@@ -104,7 +105,6 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
104 xfs_globals.o \ 105 xfs_globals.o \
105 xfs_ioctl.o \ 106 xfs_ioctl.o \
106 xfs_iops.o \ 107 xfs_iops.o \
107 xfs_lrw.o \
108 xfs_super.o \ 108 xfs_super.o \
109 xfs_sync.o \ 109 xfs_sync.o \
110 xfs_xattr.o) 110 xfs_xattr.o)
@@ -113,6 +113,3 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
113xfs-y += $(addprefix support/, \ 113xfs-y += $(addprefix support/, \
114 debug.o \ 114 debug.o \
115 uuid.o) 115 uuid.o)
116
117xfs-$(CONFIG_XFS_TRACE) += support/ktrace.o
118
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 2d3f90afe5f1..666c9db48eb6 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -16,16 +16,33 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/vmalloc.h>
20#include <linux/highmem.h> 19#include <linux/highmem.h>
20#include <linux/slab.h>
21#include <linux/swap.h> 21#include <linux/swap.h>
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include "time.h" 24#include "time.h"
25#include "kmem.h" 25#include "kmem.h"
26 26
27#define MAX_VMALLOCS 6 27/*
28#define MAX_SLAB_SIZE 0x20000 28 * Greedy allocation. May fail and may return vmalloced memory.
29 *
30 * Must be freed using kmem_free_large.
31 */
32void *
33kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
34{
35 void *ptr;
36 size_t kmsize = maxsize;
37
38 while (!(ptr = kmem_zalloc_large(kmsize))) {
39 if ((kmsize >>= 1) <= minsize)
40 kmsize = minsize;
41 }
42 if (ptr)
43 *size = kmsize;
44 return ptr;
45}
29 46
30void * 47void *
31kmem_alloc(size_t size, unsigned int __nocast flags) 48kmem_alloc(size_t size, unsigned int __nocast flags)
@@ -34,19 +51,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
34 gfp_t lflags = kmem_flags_convert(flags); 51 gfp_t lflags = kmem_flags_convert(flags);
35 void *ptr; 52 void *ptr;
36 53
37#ifdef DEBUG
38 if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) {
39 printk(KERN_WARNING "Large %s attempt, size=%ld\n",
40 __func__, (long)size);
41 dump_stack();
42 }
43#endif
44
45 do { 54 do {
46 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) 55 ptr = kmalloc(size, lflags);
47 ptr = kmalloc(size, lflags);
48 else
49 ptr = __vmalloc(size, lflags, PAGE_KERNEL);
50 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 56 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
51 return ptr; 57 return ptr;
52 if (!(++retries % 100)) 58 if (!(++retries % 100))
@@ -68,27 +74,6 @@ kmem_zalloc(size_t size, unsigned int __nocast flags)
68 return ptr; 74 return ptr;
69} 75}
70 76
71void *
72kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
73 unsigned int __nocast flags)
74{
75 void *ptr;
76 size_t kmsize = maxsize;
77 unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP;
78
79 while (!(ptr = kmem_zalloc(kmsize, kmflags))) {
80 if ((kmsize <= minsize) && (flags & KM_NOSLEEP))
81 break;
82 if ((kmsize >>= 1) <= minsize) {
83 kmsize = minsize;
84 kmflags = flags;
85 }
86 }
87 if (ptr)
88 *size = kmsize;
89 return ptr;
90}
91
92void 77void
93kmem_free(const void *ptr) 78kmem_free(const void *ptr)
94{ 79{
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 179cbd630f69..f7c8f7a9ea6d 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -21,6 +21,7 @@
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/vmalloc.h>
24 25
25/* 26/*
26 * General memory allocation interfaces 27 * General memory allocation interfaces
@@ -30,7 +31,6 @@
30#define KM_NOSLEEP 0x0002u 31#define KM_NOSLEEP 0x0002u
31#define KM_NOFS 0x0004u 32#define KM_NOFS 0x0004u
32#define KM_MAYFAIL 0x0008u 33#define KM_MAYFAIL 0x0008u
33#define KM_LARGE 0x0010u
34 34
35/* 35/*
36 * We use a special process flag to avoid recursive callbacks into 36 * We use a special process flag to avoid recursive callbacks into
@@ -42,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags)
42{ 42{
43 gfp_t lflags; 43 gfp_t lflags;
44 44
45 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE)); 45 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
46 46
47 if (flags & KM_NOSLEEP) { 47 if (flags & KM_NOSLEEP) {
48 lflags = GFP_ATOMIC | __GFP_NOWARN; 48 lflags = GFP_ATOMIC | __GFP_NOWARN;
@@ -56,10 +56,25 @@ kmem_flags_convert(unsigned int __nocast flags)
56 56
57extern void *kmem_alloc(size_t, unsigned int __nocast); 57extern void *kmem_alloc(size_t, unsigned int __nocast);
58extern void *kmem_zalloc(size_t, unsigned int __nocast); 58extern void *kmem_zalloc(size_t, unsigned int __nocast);
59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
60extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); 59extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
61extern void kmem_free(const void *); 60extern void kmem_free(const void *);
62 61
62static inline void *kmem_zalloc_large(size_t size)
63{
64 void *ptr;
65
66 ptr = vmalloc(size);
67 if (ptr)
68 memset(ptr, 0, size);
69 return ptr;
70}
71static inline void kmem_free_large(void *ptr)
72{
73 vfree(ptr);
74}
75
76extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
77
63/* 78/*
64 * Zone interfaces 79 * Zone interfaces
65 */ 80 */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b23a54506446..a7bc925c4d60 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -21,6 +21,8 @@
21#include "xfs_bmap_btree.h" 21#include "xfs_bmap_btree.h"
22#include "xfs_inode.h" 22#include "xfs_inode.h"
23#include "xfs_vnodeops.h" 23#include "xfs_vnodeops.h"
24#include "xfs_trace.h"
25#include <linux/slab.h>
24#include <linux/xattr.h> 26#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h> 27#include <linux/posix_acl_xattr.h>
26 28
@@ -105,7 +107,7 @@ xfs_get_acl(struct inode *inode, int type)
105 struct posix_acl *acl; 107 struct posix_acl *acl;
106 struct xfs_acl *xfs_acl; 108 struct xfs_acl *xfs_acl;
107 int len = sizeof(struct xfs_acl); 109 int len = sizeof(struct xfs_acl);
108 char *ea_name; 110 unsigned char *ea_name;
109 int error; 111 int error;
110 112
111 acl = get_cached_acl(inode, type); 113 acl = get_cached_acl(inode, type);
@@ -132,7 +134,8 @@ xfs_get_acl(struct inode *inode, int type)
132 if (!xfs_acl) 134 if (!xfs_acl)
133 return ERR_PTR(-ENOMEM); 135 return ERR_PTR(-ENOMEM);
134 136
135 error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); 137 error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
138 &len, ATTR_ROOT);
136 if (error) { 139 if (error) {
137 /* 140 /*
138 * If the attribute doesn't exist make sure we have a negative 141 * If the attribute doesn't exist make sure we have a negative
@@ -161,7 +164,7 @@ STATIC int
161xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 164xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
162{ 165{
163 struct xfs_inode *ip = XFS_I(inode); 166 struct xfs_inode *ip = XFS_I(inode);
164 char *ea_name; 167 unsigned char *ea_name;
165 int error; 168 int error;
166 169
167 if (S_ISLNK(inode->i_mode)) 170 if (S_ISLNK(inode->i_mode))
@@ -193,7 +196,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
193 (sizeof(struct xfs_acl_entry) * 196 (sizeof(struct xfs_acl_entry) *
194 (XFS_ACL_MAX_ENTRIES - acl->a_count)); 197 (XFS_ACL_MAX_ENTRIES - acl->a_count));
195 198
196 error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, 199 error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
197 len, ATTR_ROOT); 200 len, ATTR_ROOT);
198 201
199 kfree(xfs_acl); 202 kfree(xfs_acl);
@@ -250,8 +253,9 @@ xfs_set_mode(struct inode *inode, mode_t mode)
250 if (mode != inode->i_mode) { 253 if (mode != inode->i_mode) {
251 struct iattr iattr; 254 struct iattr iattr;
252 255
253 iattr.ia_valid = ATTR_MODE; 256 iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
254 iattr.ia_mode = mode; 257 iattr.ia_mode = mode;
258 iattr.ia_ctime = current_fs_time(inode->i_sb);
255 259
256 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 260 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
257 } 261 }
@@ -260,7 +264,7 @@ xfs_set_mode(struct inode *inode, mode_t mode)
260} 264}
261 265
262static int 266static int
263xfs_acl_exists(struct inode *inode, char *name) 267xfs_acl_exists(struct inode *inode, unsigned char *name)
264{ 268{
265 int len = sizeof(struct xfs_acl); 269 int len = sizeof(struct xfs_acl);
266 270
@@ -353,37 +357,14 @@ xfs_acl_chmod(struct inode *inode)
353 return error; 357 return error;
354} 358}
355 359
356/*
357 * System xattr handlers.
358 *
359 * Currently Posix ACLs are the only system namespace extended attribute
360 * handlers supported by XFS, so we just implement the handlers here.
361 * If we ever support other system extended attributes this will need
362 * some refactoring.
363 */
364
365static int 360static int
366xfs_decode_acl(const char *name) 361xfs_xattr_acl_get(struct dentry *dentry, const char *name,
367{ 362 void *value, size_t size, int type)
368 if (strcmp(name, "posix_acl_access") == 0)
369 return ACL_TYPE_ACCESS;
370 else if (strcmp(name, "posix_acl_default") == 0)
371 return ACL_TYPE_DEFAULT;
372 return -EINVAL;
373}
374
375static int
376xfs_xattr_system_get(struct inode *inode, const char *name,
377 void *value, size_t size)
378{ 363{
379 struct posix_acl *acl; 364 struct posix_acl *acl;
380 int type, error; 365 int error;
381
382 type = xfs_decode_acl(name);
383 if (type < 0)
384 return type;
385 366
386 acl = xfs_get_acl(inode, type); 367 acl = xfs_get_acl(dentry->d_inode, type);
387 if (IS_ERR(acl)) 368 if (IS_ERR(acl))
388 return PTR_ERR(acl); 369 return PTR_ERR(acl);
389 if (acl == NULL) 370 if (acl == NULL)
@@ -396,15 +377,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
396} 377}
397 378
398static int 379static int
399xfs_xattr_system_set(struct inode *inode, const char *name, 380xfs_xattr_acl_set(struct dentry *dentry, const char *name,
400 const void *value, size_t size, int flags) 381 const void *value, size_t size, int flags, int type)
401{ 382{
383 struct inode *inode = dentry->d_inode;
402 struct posix_acl *acl = NULL; 384 struct posix_acl *acl = NULL;
403 int error = 0, type; 385 int error = 0;
404 386
405 type = xfs_decode_acl(name);
406 if (type < 0)
407 return type;
408 if (flags & XATTR_CREATE) 387 if (flags & XATTR_CREATE)
409 return -EINVAL; 388 return -EINVAL;
410 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 389 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -461,8 +440,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
461 return error; 440 return error;
462} 441}
463 442
464struct xattr_handler xfs_xattr_system_handler = { 443struct xattr_handler xfs_xattr_acl_access_handler = {
465 .prefix = XATTR_SYSTEM_PREFIX, 444 .prefix = POSIX_ACL_XATTR_ACCESS,
466 .get = xfs_xattr_system_get, 445 .flags = ACL_TYPE_ACCESS,
467 .set = xfs_xattr_system_set, 446 .get = xfs_xattr_acl_get,
447 .set = xfs_xattr_acl_set,
448};
449
450struct xattr_handler xfs_xattr_acl_default_handler = {
451 .prefix = POSIX_ACL_XATTR_DEFAULT,
452 .flags = ACL_TYPE_DEFAULT,
453 .get = xfs_xattr_acl_get,
454 .set = xfs_xattr_acl_set,
468}; 455};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c2e30eea74dc..0f8b9968a803 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -38,6 +38,9 @@
38#include "xfs_rw.h" 38#include "xfs_rw.h"
39#include "xfs_iomap.h" 39#include "xfs_iomap.h"
40#include "xfs_vnodeops.h" 40#include "xfs_vnodeops.h"
41#include "xfs_trace.h"
42#include "xfs_bmap.h"
43#include <linux/gfp.h>
41#include <linux/mpage.h> 44#include <linux/mpage.h>
42#include <linux/pagevec.h> 45#include <linux/pagevec.h>
43#include <linux/writeback.h> 46#include <linux/writeback.h>
@@ -76,7 +79,7 @@ xfs_ioend_wake(
76 wake_up(to_ioend_wq(ip)); 79 wake_up(to_ioend_wq(ip));
77} 80}
78 81
79STATIC void 82void
80xfs_count_page_state( 83xfs_count_page_state(
81 struct page *page, 84 struct page *page,
82 int *delalloc, 85 int *delalloc,
@@ -98,48 +101,6 @@ xfs_count_page_state(
98 } while ((bh = bh->b_this_page) != head); 101 } while ((bh = bh->b_this_page) != head);
99} 102}
100 103
101#if defined(XFS_RW_TRACE)
102void
103xfs_page_trace(
104 int tag,
105 struct inode *inode,
106 struct page *page,
107 unsigned long pgoff)
108{
109 xfs_inode_t *ip;
110 loff_t isize = i_size_read(inode);
111 loff_t offset = page_offset(page);
112 int delalloc = -1, unmapped = -1, unwritten = -1;
113
114 if (page_has_buffers(page))
115 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
116
117 ip = XFS_I(inode);
118 if (!ip->i_rwtrace)
119 return;
120
121 ktrace_enter(ip->i_rwtrace,
122 (void *)((unsigned long)tag),
123 (void *)ip,
124 (void *)inode,
125 (void *)page,
126 (void *)pgoff,
127 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
128 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
129 (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
130 (void *)((unsigned long)(isize & 0xffffffff)),
131 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
132 (void *)((unsigned long)(offset & 0xffffffff)),
133 (void *)((unsigned long)delalloc),
134 (void *)((unsigned long)unmapped),
135 (void *)((unsigned long)unwritten),
136 (void *)((unsigned long)current_pid()),
137 (void *)NULL);
138}
139#else
140#define xfs_page_trace(tag, inode, page, pgoff)
141#endif
142
143STATIC struct block_device * 104STATIC struct block_device *
144xfs_find_bdev_for_inode( 105xfs_find_bdev_for_inode(
145 struct xfs_inode *ip) 106 struct xfs_inode *ip)
@@ -204,14 +165,17 @@ xfs_ioend_new_eof(
204} 165}
205 166
206/* 167/*
207 * Update on-disk file size now that data has been written to disk. 168 * Update on-disk file size now that data has been written to disk. The
208 * The current in-memory file size is i_size. If a write is beyond 169 * current in-memory file size is i_size. If a write is beyond eof i_new_size
209 * eof i_new_size will be the intended file size until i_size is 170 * will be the intended file size until i_size is updated. If this write does
210 * updated. If this write does not extend all the way to the valid 171 * not extend all the way to the valid file size then restrict this update to
211 * file size then restrict this update to the end of the write. 172 * the end of the write.
173 *
174 * This function does not block as blocking on the inode lock in IO completion
175 * can lead to IO completion order dependency deadlocks.. If it can't get the
176 * inode ilock it will return EAGAIN. Callers must handle this.
212 */ 177 */
213 178STATIC int
214STATIC void
215xfs_setfilesize( 179xfs_setfilesize(
216 xfs_ioend_t *ioend) 180 xfs_ioend_t *ioend)
217{ 181{
@@ -222,85 +186,19 @@ xfs_setfilesize(
222 ASSERT(ioend->io_type != IOMAP_READ); 186 ASSERT(ioend->io_type != IOMAP_READ);
223 187
224 if (unlikely(ioend->io_error)) 188 if (unlikely(ioend->io_error))
225 return; 189 return 0;
190
191 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
192 return EAGAIN;
226 193
227 xfs_ilock(ip, XFS_ILOCK_EXCL);
228 isize = xfs_ioend_new_eof(ioend); 194 isize = xfs_ioend_new_eof(ioend);
229 if (isize) { 195 if (isize) {
230 ip->i_d.di_size = isize; 196 ip->i_d.di_size = isize;
231 xfs_mark_inode_dirty_sync(ip); 197 xfs_mark_inode_dirty(ip);
232 } 198 }
233 199
234 xfs_iunlock(ip, XFS_ILOCK_EXCL); 200 xfs_iunlock(ip, XFS_ILOCK_EXCL);
235} 201 return 0;
236
237/*
238 * Buffered IO write completion for delayed allocate extents.
239 */
240STATIC void
241xfs_end_bio_delalloc(
242 struct work_struct *work)
243{
244 xfs_ioend_t *ioend =
245 container_of(work, xfs_ioend_t, io_work);
246
247 xfs_setfilesize(ioend);
248 xfs_destroy_ioend(ioend);
249}
250
251/*
252 * Buffered IO write completion for regular, written extents.
253 */
254STATIC void
255xfs_end_bio_written(
256 struct work_struct *work)
257{
258 xfs_ioend_t *ioend =
259 container_of(work, xfs_ioend_t, io_work);
260
261 xfs_setfilesize(ioend);
262 xfs_destroy_ioend(ioend);
263}
264
265/*
266 * IO write completion for unwritten extents.
267 *
268 * Issue transactions to convert a buffer range from unwritten
269 * to written extents.
270 */
271STATIC void
272xfs_end_bio_unwritten(
273 struct work_struct *work)
274{
275 xfs_ioend_t *ioend =
276 container_of(work, xfs_ioend_t, io_work);
277 struct xfs_inode *ip = XFS_I(ioend->io_inode);
278 xfs_off_t offset = ioend->io_offset;
279 size_t size = ioend->io_size;
280
281 if (likely(!ioend->io_error)) {
282 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
283 int error;
284 error = xfs_iomap_write_unwritten(ip, offset, size);
285 if (error)
286 ioend->io_error = error;
287 }
288 xfs_setfilesize(ioend);
289 }
290 xfs_destroy_ioend(ioend);
291}
292
293/*
294 * IO read completion for regular, written extents.
295 */
296STATIC void
297xfs_end_bio_read(
298 struct work_struct *work)
299{
300 xfs_ioend_t *ioend =
301 container_of(work, xfs_ioend_t, io_work);
302
303 xfs_destroy_ioend(ioend);
304} 202}
305 203
306/* 204/*
@@ -314,10 +212,10 @@ xfs_finish_ioend(
314 int wait) 212 int wait)
315{ 213{
316 if (atomic_dec_and_test(&ioend->io_remaining)) { 214 if (atomic_dec_and_test(&ioend->io_remaining)) {
317 struct workqueue_struct *wq = xfsdatad_workqueue; 215 struct workqueue_struct *wq;
318 if (ioend->io_work.func == xfs_end_bio_unwritten)
319 wq = xfsconvertd_workqueue;
320 216
217 wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
218 xfsconvertd_workqueue : xfsdatad_workqueue;
321 queue_work(wq, &ioend->io_work); 219 queue_work(wq, &ioend->io_work);
322 if (wait) 220 if (wait)
323 flush_workqueue(wq); 221 flush_workqueue(wq);
@@ -325,6 +223,53 @@ xfs_finish_ioend(
325} 223}
326 224
327/* 225/*
226 * IO write completion.
227 */
228STATIC void
229xfs_end_io(
230 struct work_struct *work)
231{
232 xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
233 struct xfs_inode *ip = XFS_I(ioend->io_inode);
234 int error = 0;
235
236 /*
237 * For unwritten extents we need to issue transactions to convert a
238 * range to normal written extens after the data I/O has finished.
239 */
240 if (ioend->io_type == IOMAP_UNWRITTEN &&
241 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
242
243 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
244 ioend->io_size);
245 if (error)
246 ioend->io_error = error;
247 }
248
249 /*
250 * We might have to update the on-disk file size after extending
251 * writes.
252 */
253 if (ioend->io_type != IOMAP_READ) {
254 error = xfs_setfilesize(ioend);
255 ASSERT(!error || error == EAGAIN);
256 }
257
258 /*
259 * If we didn't complete processing of the ioend, requeue it to the
260 * tail of the workqueue for another attempt later. Otherwise destroy
261 * it.
262 */
263 if (error == EAGAIN) {
264 atomic_inc(&ioend->io_remaining);
265 xfs_finish_ioend(ioend, 0);
266 /* ensure we don't spin on blocked ioends */
267 delay(1);
268 } else
269 xfs_destroy_ioend(ioend);
270}
271
272/*
328 * Allocate and initialise an IO completion structure. 273 * Allocate and initialise an IO completion structure.
329 * We need to track unwritten extent write completion here initially. 274 * We need to track unwritten extent write completion here initially.
330 * We'll need to extend this for updating the ondisk inode size later 275 * We'll need to extend this for updating the ondisk inode size later
@@ -355,15 +300,7 @@ xfs_alloc_ioend(
355 ioend->io_offset = 0; 300 ioend->io_offset = 0;
356 ioend->io_size = 0; 301 ioend->io_size = 0;
357 302
358 if (type == IOMAP_UNWRITTEN) 303 INIT_WORK(&ioend->io_work, xfs_end_io);
359 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten);
360 else if (type == IOMAP_DELAY)
361 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc);
362 else if (type == IOMAP_READ)
363 INIT_WORK(&ioend->io_work, xfs_end_bio_read);
364 else
365 INIT_WORK(&ioend->io_work, xfs_end_bio_written);
366
367 return ioend; 304 return ioend;
368} 305}
369 306
@@ -380,7 +317,7 @@ xfs_map_blocks(
380 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); 317 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
381} 318}
382 319
383STATIC_INLINE int 320STATIC int
384xfs_iomap_valid( 321xfs_iomap_valid(
385 xfs_iomap_t *iomapp, 322 xfs_iomap_t *iomapp,
386 loff_t offset) 323 loff_t offset)
@@ -412,8 +349,9 @@ xfs_end_bio(
412 349
413STATIC void 350STATIC void
414xfs_submit_ioend_bio( 351xfs_submit_ioend_bio(
415 xfs_ioend_t *ioend, 352 struct writeback_control *wbc,
416 struct bio *bio) 353 xfs_ioend_t *ioend,
354 struct bio *bio)
417{ 355{
418 atomic_inc(&ioend->io_remaining); 356 atomic_inc(&ioend->io_remaining);
419 bio->bi_private = ioend; 357 bio->bi_private = ioend;
@@ -424,9 +362,10 @@ xfs_submit_ioend_bio(
424 * but don't update the inode size until I/O completion. 362 * but don't update the inode size until I/O completion.
425 */ 363 */
426 if (xfs_ioend_new_eof(ioend)) 364 if (xfs_ioend_new_eof(ioend))
427 xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); 365 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
428 366
429 submit_bio(WRITE, bio); 367 submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
368 WRITE_SYNC_PLUG : WRITE, bio);
430 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); 369 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
431 bio_put(bio); 370 bio_put(bio);
432} 371}
@@ -505,6 +444,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
505 */ 444 */
506STATIC void 445STATIC void
507xfs_submit_ioend( 446xfs_submit_ioend(
447 struct writeback_control *wbc,
508 xfs_ioend_t *ioend) 448 xfs_ioend_t *ioend)
509{ 449{
510 xfs_ioend_t *head = ioend; 450 xfs_ioend_t *head = ioend;
@@ -533,19 +473,19 @@ xfs_submit_ioend(
533 retry: 473 retry:
534 bio = xfs_alloc_ioend_bio(bh); 474 bio = xfs_alloc_ioend_bio(bh);
535 } else if (bh->b_blocknr != lastblock + 1) { 475 } else if (bh->b_blocknr != lastblock + 1) {
536 xfs_submit_ioend_bio(ioend, bio); 476 xfs_submit_ioend_bio(wbc, ioend, bio);
537 goto retry; 477 goto retry;
538 } 478 }
539 479
540 if (bio_add_buffer(bio, bh) != bh->b_size) { 480 if (bio_add_buffer(bio, bh) != bh->b_size) {
541 xfs_submit_ioend_bio(ioend, bio); 481 xfs_submit_ioend_bio(wbc, ioend, bio);
542 goto retry; 482 goto retry;
543 } 483 }
544 484
545 lastblock = bh->b_blocknr; 485 lastblock = bh->b_blocknr;
546 } 486 }
547 if (bio) 487 if (bio)
548 xfs_submit_ioend_bio(ioend, bio); 488 xfs_submit_ioend_bio(wbc, ioend, bio);
549 xfs_finish_ioend(ioend, 0); 489 xfs_finish_ioend(ioend, 0);
550 } while ((ioend = next) != NULL); 490 } while ((ioend = next) != NULL);
551} 491}
@@ -904,16 +844,9 @@ xfs_convert_page(
904 844
905 if (startio) { 845 if (startio) {
906 if (count) { 846 if (count) {
907 struct backing_dev_info *bdi;
908
909 bdi = inode->i_mapping->backing_dev_info;
910 wbc->nr_to_write--; 847 wbc->nr_to_write--;
911 if (bdi_write_congested(bdi)) { 848 if (wbc->nr_to_write <= 0)
912 wbc->encountered_congestion = 1;
913 done = 1;
914 } else if (wbc->nr_to_write <= 0) {
915 done = 1; 849 done = 1;
916 }
917 } 850 }
918 xfs_start_page_writeback(page, !page_dirty, count); 851 xfs_start_page_writeback(page, !page_dirty, count);
919 } 852 }
@@ -962,6 +895,125 @@ xfs_cluster_write(
962 } 895 }
963} 896}
964 897
898STATIC void
899xfs_vm_invalidatepage(
900 struct page *page,
901 unsigned long offset)
902{
903 trace_xfs_invalidatepage(page->mapping->host, page, offset);
904 block_invalidatepage(page, offset);
905}
906
907/*
908 * If the page has delalloc buffers on it, we need to punch them out before we
909 * invalidate the page. If we don't, we leave a stale delalloc mapping on the
910 * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
911 * is done on that same region - the delalloc extent is returned when none is
912 * supposed to be there.
913 *
914 * We prevent this by truncating away the delalloc regions on the page before
915 * invalidating it. Because they are delalloc, we can do this without needing a
916 * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
917 * truncation without a transaction as there is no space left for block
918 * reservation (typically why we see a ENOSPC in writeback).
919 *
920 * This is not a performance critical path, so for now just do the punching a
921 * buffer head at a time.
922 */
923STATIC void
924xfs_aops_discard_page(
925 struct page *page)
926{
927 struct inode *inode = page->mapping->host;
928 struct xfs_inode *ip = XFS_I(inode);
929 struct buffer_head *bh, *head;
930 loff_t offset = page_offset(page);
931 ssize_t len = 1 << inode->i_blkbits;
932
933 if (!xfs_is_delayed_page(page, IOMAP_DELAY))
934 goto out_invalidate;
935
936 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
937 goto out_invalidate;
938
939 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
940 "page discard on page %p, inode 0x%llx, offset %llu.",
941 page, ip->i_ino, offset);
942
943 xfs_ilock(ip, XFS_ILOCK_EXCL);
944 bh = head = page_buffers(page);
945 do {
946 int done;
947 xfs_fileoff_t offset_fsb;
948 xfs_bmbt_irec_t imap;
949 int nimaps = 1;
950 int error;
951 xfs_fsblock_t firstblock;
952 xfs_bmap_free_t flist;
953
954 if (!buffer_delay(bh))
955 goto next_buffer;
956
957 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
958
959 /*
960 * Map the range first and check that it is a delalloc extent
961 * before trying to unmap the range. Otherwise we will be
962 * trying to remove a real extent (which requires a
963 * transaction) or a hole, which is probably a bad idea...
964 */
965 error = xfs_bmapi(NULL, ip, offset_fsb, 1,
966 XFS_BMAPI_ENTIRE, NULL, 0, &imap,
967 &nimaps, NULL, NULL);
968
969 if (error) {
970 /* something screwed, just bail */
971 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
972 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
973 "page discard failed delalloc mapping lookup.");
974 }
975 break;
976 }
977 if (!nimaps) {
978 /* nothing there */
979 goto next_buffer;
980 }
981 if (imap.br_startblock != DELAYSTARTBLOCK) {
982 /* been converted, ignore */
983 goto next_buffer;
984 }
985 WARN_ON(imap.br_blockcount == 0);
986
987 /*
988 * Note: while we initialise the firstblock/flist pair, they
989 * should never be used because blocks should never be
990 * allocated or freed for a delalloc extent and hence we need
991 * don't cancel or finish them after the xfs_bunmapi() call.
992 */
993 xfs_bmap_init(&flist, &firstblock);
994 error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
995 &flist, NULL, &done);
996
997 ASSERT(!flist.xbf_count && !flist.xbf_first);
998 if (error) {
999 /* something screwed, just bail */
1000 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1001 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
1002 "page discard unable to remove delalloc mapping.");
1003 }
1004 break;
1005 }
1006next_buffer:
1007 offset += len;
1008
1009 } while ((bh = bh->b_this_page) != head);
1010
1011 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1012out_invalidate:
1013 xfs_vm_invalidatepage(page, 0);
1014 return;
1015}
1016
965/* 1017/*
966 * Calling this without startio set means we are being asked to make a dirty 1018 * Calling this without startio set means we are being asked to make a dirty
967 * page ready for freeing it's buffers. When called with startio set then 1019 * page ready for freeing it's buffers. When called with startio set then
@@ -1198,7 +1250,7 @@ xfs_page_state_convert(
1198 } 1250 }
1199 1251
1200 if (iohead) 1252 if (iohead)
1201 xfs_submit_ioend(iohead); 1253 xfs_submit_ioend(wbc, iohead);
1202 1254
1203 return page_dirty; 1255 return page_dirty;
1204 1256
@@ -1213,7 +1265,7 @@ error:
1213 */ 1265 */
1214 if (err != -EAGAIN) { 1266 if (err != -EAGAIN) {
1215 if (!unmapped) 1267 if (!unmapped)
1216 block_invalidatepage(page, 0); 1268 xfs_aops_discard_page(page);
1217 ClearPageUptodate(page); 1269 ClearPageUptodate(page);
1218 } 1270 }
1219 return err; 1271 return err;
@@ -1249,7 +1301,7 @@ xfs_vm_writepage(
1249 int delalloc, unmapped, unwritten; 1301 int delalloc, unmapped, unwritten;
1250 struct inode *inode = page->mapping->host; 1302 struct inode *inode = page->mapping->host;
1251 1303
1252 xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); 1304 trace_xfs_writepage(inode, page, 0);
1253 1305
1254 /* 1306 /*
1255 * We need a transaction if: 1307 * We need a transaction if:
@@ -1354,7 +1406,7 @@ xfs_vm_releasepage(
1354 .nr_to_write = 1, 1406 .nr_to_write = 1,
1355 }; 1407 };
1356 1408
1357 xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0); 1409 trace_xfs_releasepage(inode, page, 0);
1358 1410
1359 if (!page_has_buffers(page)) 1411 if (!page_has_buffers(page))
1360 return 0; 1412 return 0;
@@ -1535,7 +1587,7 @@ xfs_end_io_direct(
1535 * didn't map an unwritten extent so switch it's completion 1587 * didn't map an unwritten extent so switch it's completion
1536 * handler. 1588 * handler.
1537 */ 1589 */
1538 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 1590 ioend->io_type = IOMAP_NEW;
1539 xfs_finish_ioend(ioend, 0); 1591 xfs_finish_ioend(ioend, 0);
1540 } 1592 }
1541 1593
@@ -1562,19 +1614,13 @@ xfs_vm_direct_IO(
1562 1614
1563 bdev = xfs_find_bdev_for_inode(XFS_I(inode)); 1615 bdev = xfs_find_bdev_for_inode(XFS_I(inode));
1564 1616
1565 if (rw == WRITE) { 1617 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
1566 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); 1618 IOMAP_UNWRITTEN : IOMAP_READ);
1567 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1619
1568 bdev, iov, offset, nr_segs, 1620 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
1569 xfs_get_blocks_direct, 1621 offset, nr_segs,
1570 xfs_end_io_direct); 1622 xfs_get_blocks_direct,
1571 } else { 1623 xfs_end_io_direct);
1572 iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
1573 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
1574 bdev, iov, offset, nr_segs,
1575 xfs_get_blocks_direct,
1576 xfs_end_io_direct);
1577 }
1578 1624
1579 if (unlikely(ret != -EIOCBQUEUED && iocb->private)) 1625 if (unlikely(ret != -EIOCBQUEUED && iocb->private))
1580 xfs_destroy_ioend(iocb->private); 1626 xfs_destroy_ioend(iocb->private);
@@ -1629,16 +1675,6 @@ xfs_vm_readpages(
1629 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1675 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1630} 1676}
1631 1677
1632STATIC void
1633xfs_vm_invalidatepage(
1634 struct page *page,
1635 unsigned long offset)
1636{
1637 xfs_page_trace(XFS_INVALIDPAGE_ENTER,
1638 page->mapping->host, page, offset);
1639 block_invalidatepage(page, offset);
1640}
1641
1642const struct address_space_operations xfs_address_space_operations = { 1678const struct address_space_operations xfs_address_space_operations = {
1643 .readpage = xfs_vm_readpage, 1679 .readpage = xfs_vm_readpage,
1644 .readpages = xfs_vm_readpages, 1680 .readpages = xfs_vm_readpages,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 221b3e66ceef..4cfc6ea87df8 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -45,4 +45,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
45extern void xfs_ioend_init(void); 45extern void xfs_ioend_init(void);
46extern void xfs_ioend_wait(struct xfs_inode *); 46extern void xfs_ioend_wait(struct xfs_inode *);
47 47
48extern void xfs_count_page_state(struct page *, int *, int *, int *);
49
48#endif /* __XFS_AOPS_H__ */ 50#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 965df1227d64..44c2b0ef9a41 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -18,7 +18,7 @@
18#include "xfs.h" 18#include "xfs.h"
19#include <linux/stddef.h> 19#include <linux/stddef.h>
20#include <linux/errno.h> 20#include <linux/errno.h>
21#include <linux/slab.h> 21#include <linux/gfp.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/vmalloc.h> 24#include <linux/vmalloc.h>
@@ -33,12 +33,14 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
36 37
37#include "xfs_sb.h" 38#include "xfs_sb.h"
38#include "xfs_inum.h" 39#include "xfs_inum.h"
39#include "xfs_ag.h" 40#include "xfs_ag.h"
40#include "xfs_dmapi.h" 41#include "xfs_dmapi.h"
41#include "xfs_mount.h" 42#include "xfs_mount.h"
43#include "xfs_trace.h"
42 44
43static kmem_zone_t *xfs_buf_zone; 45static kmem_zone_t *xfs_buf_zone;
44STATIC int xfsbufd(void *); 46STATIC int xfsbufd(void *);
@@ -53,34 +55,6 @@ static struct workqueue_struct *xfslogd_workqueue;
53struct workqueue_struct *xfsdatad_workqueue; 55struct workqueue_struct *xfsdatad_workqueue;
54struct workqueue_struct *xfsconvertd_workqueue; 56struct workqueue_struct *xfsconvertd_workqueue;
55 57
56#ifdef XFS_BUF_TRACE
57void
58xfs_buf_trace(
59 xfs_buf_t *bp,
60 char *id,
61 void *data,
62 void *ra)
63{
64 ktrace_enter(xfs_buf_trace_buf,
65 bp, id,
66 (void *)(unsigned long)bp->b_flags,
67 (void *)(unsigned long)bp->b_hold.counter,
68 (void *)(unsigned long)bp->b_sema.count,
69 (void *)current,
70 data, ra,
71 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
72 (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
73 (void *)(unsigned long)bp->b_buffer_length,
74 NULL, NULL, NULL, NULL, NULL);
75}
76ktrace_t *xfs_buf_trace_buf;
77#define XFS_BUF_TRACE_SIZE 4096
78#define XB_TRACE(bp, id, data) \
79 xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
80#else
81#define XB_TRACE(bp, id, data) do { } while (0)
82#endif
83
84#ifdef XFS_BUF_LOCK_TRACKING 58#ifdef XFS_BUF_LOCK_TRACKING
85# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 59# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
86# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) 60# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
@@ -103,6 +77,27 @@ ktrace_t *xfs_buf_trace_buf;
103#define xfs_buf_deallocate(bp) \ 77#define xfs_buf_deallocate(bp) \
104 kmem_zone_free(xfs_buf_zone, (bp)); 78 kmem_zone_free(xfs_buf_zone, (bp));
105 79
80static inline int
81xfs_buf_is_vmapped(
82 struct xfs_buf *bp)
83{
84 /*
85 * Return true if the buffer is vmapped.
86 *
87 * The XBF_MAPPED flag is set if the buffer should be mapped, but the
88 * code is clever enough to know it doesn't have to map a single page,
89 * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
90 */
91 return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
92}
93
94static inline int
95xfs_buf_vmap_len(
96 struct xfs_buf *bp)
97{
98 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
99}
100
106/* 101/*
107 * Page Region interfaces. 102 * Page Region interfaces.
108 * 103 *
@@ -149,7 +144,7 @@ page_region_mask(
149 return mask; 144 return mask;
150} 145}
151 146
152STATIC_INLINE void 147STATIC void
153set_page_region( 148set_page_region(
154 struct page *page, 149 struct page *page,
155 size_t offset, 150 size_t offset,
@@ -161,7 +156,7 @@ set_page_region(
161 SetPageUptodate(page); 156 SetPageUptodate(page);
162} 157}
163 158
164STATIC_INLINE int 159STATIC int
165test_page_region( 160test_page_region(
166 struct page *page, 161 struct page *page,
167 size_t offset, 162 size_t offset,
@@ -173,75 +168,6 @@ test_page_region(
173} 168}
174 169
175/* 170/*
176 * Mapping of multi-page buffers into contiguous virtual space
177 */
178
179typedef struct a_list {
180 void *vm_addr;
181 struct a_list *next;
182} a_list_t;
183
184static a_list_t *as_free_head;
185static int as_list_len;
186static DEFINE_SPINLOCK(as_lock);
187
188/*
189 * Try to batch vunmaps because they are costly.
190 */
191STATIC void
192free_address(
193 void *addr)
194{
195 a_list_t *aentry;
196
197#ifdef CONFIG_XEN
198 /*
199 * Xen needs to be able to make sure it can get an exclusive
200 * RO mapping of pages it wants to turn into a pagetable. If
201 * a newly allocated page is also still being vmap()ed by xfs,
202 * it will cause pagetable construction to fail. This is a
203 * quick workaround to always eagerly unmap pages so that Xen
204 * is happy.
205 */
206 vunmap(addr);
207 return;
208#endif
209
210 aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
211 if (likely(aentry)) {
212 spin_lock(&as_lock);
213 aentry->next = as_free_head;
214 aentry->vm_addr = addr;
215 as_free_head = aentry;
216 as_list_len++;
217 spin_unlock(&as_lock);
218 } else {
219 vunmap(addr);
220 }
221}
222
223STATIC void
224purge_addresses(void)
225{
226 a_list_t *aentry, *old;
227
228 if (as_free_head == NULL)
229 return;
230
231 spin_lock(&as_lock);
232 aentry = as_free_head;
233 as_free_head = NULL;
234 as_list_len = 0;
235 spin_unlock(&as_lock);
236
237 while ((old = aentry) != NULL) {
238 vunmap(aentry->vm_addr);
239 aentry = aentry->next;
240 kfree(old);
241 }
242}
243
244/*
245 * Internal xfs_buf_t object manipulation 171 * Internal xfs_buf_t object manipulation
246 */ 172 */
247 173
@@ -279,7 +205,8 @@ _xfs_buf_initialize(
279 init_waitqueue_head(&bp->b_waiters); 205 init_waitqueue_head(&bp->b_waiters);
280 206
281 XFS_STATS_INC(xb_create); 207 XFS_STATS_INC(xb_create);
282 XB_TRACE(bp, "initialize", target); 208
209 trace_xfs_buf_init(bp, _RET_IP_);
283} 210}
284 211
285/* 212/*
@@ -318,6 +245,7 @@ _xfs_buf_free_pages(
318{ 245{
319 if (bp->b_pages != bp->b_page_array) { 246 if (bp->b_pages != bp->b_page_array) {
320 kmem_free(bp->b_pages); 247 kmem_free(bp->b_pages);
248 bp->b_pages = NULL;
321 } 249 }
322} 250}
323 251
@@ -332,15 +260,16 @@ void
332xfs_buf_free( 260xfs_buf_free(
333 xfs_buf_t *bp) 261 xfs_buf_t *bp)
334{ 262{
335 XB_TRACE(bp, "free", 0); 263 trace_xfs_buf_free(bp, _RET_IP_);
336 264
337 ASSERT(list_empty(&bp->b_hash_list)); 265 ASSERT(list_empty(&bp->b_hash_list));
338 266
339 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 267 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
340 uint i; 268 uint i;
341 269
342 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 270 if (xfs_buf_is_vmapped(bp))
343 free_address(bp->b_addr - bp->b_offset); 271 vm_unmap_ram(bp->b_addr - bp->b_offset,
272 bp->b_page_count);
344 273
345 for (i = 0; i < bp->b_page_count; i++) { 274 for (i = 0; i < bp->b_page_count; i++) {
346 struct page *page = bp->b_pages[i]; 275 struct page *page = bp->b_pages[i];
@@ -349,9 +278,8 @@ xfs_buf_free(
349 ASSERT(!PagePrivate(page)); 278 ASSERT(!PagePrivate(page));
350 page_cache_release(page); 279 page_cache_release(page);
351 } 280 }
352 _xfs_buf_free_pages(bp);
353 } 281 }
354 282 _xfs_buf_free_pages(bp);
355 xfs_buf_deallocate(bp); 283 xfs_buf_deallocate(bp);
356} 284}
357 285
@@ -445,7 +373,6 @@ _xfs_buf_lookup_pages(
445 if (page_count == bp->b_page_count) 373 if (page_count == bp->b_page_count)
446 bp->b_flags |= XBF_DONE; 374 bp->b_flags |= XBF_DONE;
447 375
448 XB_TRACE(bp, "lookup_pages", (long)page_count);
449 return error; 376 return error;
450} 377}
451 378
@@ -462,10 +389,8 @@ _xfs_buf_map_pages(
462 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 389 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
463 bp->b_flags |= XBF_MAPPED; 390 bp->b_flags |= XBF_MAPPED;
464 } else if (flags & XBF_MAPPED) { 391 } else if (flags & XBF_MAPPED) {
465 if (as_list_len > 64) 392 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
466 purge_addresses(); 393 -1, PAGE_KERNEL);
467 bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
468 VM_MAP, PAGE_KERNEL);
469 if (unlikely(bp->b_addr == NULL)) 394 if (unlikely(bp->b_addr == NULL))
470 return -ENOMEM; 395 return -ENOMEM;
471 bp->b_addr += bp->b_offset; 396 bp->b_addr += bp->b_offset;
@@ -548,7 +473,6 @@ found:
548 if (down_trylock(&bp->b_sema)) { 473 if (down_trylock(&bp->b_sema)) {
549 if (!(flags & XBF_TRYLOCK)) { 474 if (!(flags & XBF_TRYLOCK)) {
550 /* wait for buffer ownership */ 475 /* wait for buffer ownership */
551 XB_TRACE(bp, "get_lock", 0);
552 xfs_buf_lock(bp); 476 xfs_buf_lock(bp);
553 XFS_STATS_INC(xb_get_locked_waited); 477 XFS_STATS_INC(xb_get_locked_waited);
554 } else { 478 } else {
@@ -571,7 +495,8 @@ found:
571 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 495 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
572 bp->b_flags &= XBF_MAPPED; 496 bp->b_flags &= XBF_MAPPED;
573 } 497 }
574 XB_TRACE(bp, "got_lock", 0); 498
499 trace_xfs_buf_find(bp, flags, _RET_IP_);
575 XFS_STATS_INC(xb_get_locked); 500 XFS_STATS_INC(xb_get_locked);
576 return bp; 501 return bp;
577} 502}
@@ -582,7 +507,7 @@ found:
582 * although backing storage may not be. 507 * although backing storage may not be.
583 */ 508 */
584xfs_buf_t * 509xfs_buf_t *
585xfs_buf_get_flags( 510xfs_buf_get(
586 xfs_buftarg_t *target,/* target for buffer */ 511 xfs_buftarg_t *target,/* target for buffer */
587 xfs_off_t ioff, /* starting offset of range */ 512 xfs_off_t ioff, /* starting offset of range */
588 size_t isize, /* length of range */ 513 size_t isize, /* length of range */
@@ -627,7 +552,7 @@ xfs_buf_get_flags(
627 bp->b_bn = ioff; 552 bp->b_bn = ioff;
628 bp->b_count_desired = bp->b_buffer_length; 553 bp->b_count_desired = bp->b_buffer_length;
629 554
630 XB_TRACE(bp, "get", (unsigned long)flags); 555 trace_xfs_buf_get(bp, flags, _RET_IP_);
631 return bp; 556 return bp;
632 557
633 no_buffer: 558 no_buffer:
@@ -644,8 +569,6 @@ _xfs_buf_read(
644{ 569{
645 int status; 570 int status;
646 571
647 XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
648
649 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); 572 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
650 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 573 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
651 574
@@ -661,7 +584,7 @@ _xfs_buf_read(
661} 584}
662 585
663xfs_buf_t * 586xfs_buf_t *
664xfs_buf_read_flags( 587xfs_buf_read(
665 xfs_buftarg_t *target, 588 xfs_buftarg_t *target,
666 xfs_off_t ioff, 589 xfs_off_t ioff,
667 size_t isize, 590 size_t isize,
@@ -671,21 +594,20 @@ xfs_buf_read_flags(
671 594
672 flags |= XBF_READ; 595 flags |= XBF_READ;
673 596
674 bp = xfs_buf_get_flags(target, ioff, isize, flags); 597 bp = xfs_buf_get(target, ioff, isize, flags);
675 if (bp) { 598 if (bp) {
599 trace_xfs_buf_read(bp, flags, _RET_IP_);
600
676 if (!XFS_BUF_ISDONE(bp)) { 601 if (!XFS_BUF_ISDONE(bp)) {
677 XB_TRACE(bp, "read", (unsigned long)flags);
678 XFS_STATS_INC(xb_get_read); 602 XFS_STATS_INC(xb_get_read);
679 _xfs_buf_read(bp, flags); 603 _xfs_buf_read(bp, flags);
680 } else if (flags & XBF_ASYNC) { 604 } else if (flags & XBF_ASYNC) {
681 XB_TRACE(bp, "read_async", (unsigned long)flags);
682 /* 605 /*
683 * Read ahead call which is already satisfied, 606 * Read ahead call which is already satisfied,
684 * drop the buffer 607 * drop the buffer
685 */ 608 */
686 goto no_buffer; 609 goto no_buffer;
687 } else { 610 } else {
688 XB_TRACE(bp, "read_done", (unsigned long)flags);
689 /* We do not want read in the flags */ 611 /* We do not want read in the flags */
690 bp->b_flags &= ~XBF_READ; 612 bp->b_flags &= ~XBF_READ;
691 } 613 }
@@ -718,7 +640,7 @@ xfs_buf_readahead(
718 return; 640 return;
719 641
720 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); 642 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
721 xfs_buf_read_flags(target, ioff, isize, flags); 643 xfs_buf_read(target, ioff, isize, flags);
722} 644}
723 645
724xfs_buf_t * 646xfs_buf_t *
@@ -823,7 +745,7 @@ xfs_buf_get_noaddr(
823 745
824 xfs_buf_unlock(bp); 746 xfs_buf_unlock(bp);
825 747
826 XB_TRACE(bp, "no_daddr", len); 748 trace_xfs_buf_get_noaddr(bp, _RET_IP_);
827 return bp; 749 return bp;
828 750
829 fail_free_mem: 751 fail_free_mem:
@@ -845,8 +767,8 @@ void
845xfs_buf_hold( 767xfs_buf_hold(
846 xfs_buf_t *bp) 768 xfs_buf_t *bp)
847{ 769{
770 trace_xfs_buf_hold(bp, _RET_IP_);
848 atomic_inc(&bp->b_hold); 771 atomic_inc(&bp->b_hold);
849 XB_TRACE(bp, "hold", 0);
850} 772}
851 773
852/* 774/*
@@ -859,7 +781,7 @@ xfs_buf_rele(
859{ 781{
860 xfs_bufhash_t *hash = bp->b_hash; 782 xfs_bufhash_t *hash = bp->b_hash;
861 783
862 XB_TRACE(bp, "rele", bp->b_relse); 784 trace_xfs_buf_rele(bp, _RET_IP_);
863 785
864 if (unlikely(!hash)) { 786 if (unlikely(!hash)) {
865 ASSERT(!bp->b_relse); 787 ASSERT(!bp->b_relse);
@@ -909,21 +831,19 @@ xfs_buf_cond_lock(
909 int locked; 831 int locked;
910 832
911 locked = down_trylock(&bp->b_sema) == 0; 833 locked = down_trylock(&bp->b_sema) == 0;
912 if (locked) { 834 if (locked)
913 XB_SET_OWNER(bp); 835 XB_SET_OWNER(bp);
914 } 836
915 XB_TRACE(bp, "cond_lock", (long)locked); 837 trace_xfs_buf_cond_lock(bp, _RET_IP_);
916 return locked ? 0 : -EBUSY; 838 return locked ? 0 : -EBUSY;
917} 839}
918 840
919#if defined(DEBUG) || defined(XFS_BLI_TRACE)
920int 841int
921xfs_buf_lock_value( 842xfs_buf_lock_value(
922 xfs_buf_t *bp) 843 xfs_buf_t *bp)
923{ 844{
924 return bp->b_sema.count; 845 return bp->b_sema.count;
925} 846}
926#endif
927 847
928/* 848/*
929 * Locks a buffer object. 849 * Locks a buffer object.
@@ -935,12 +855,14 @@ void
935xfs_buf_lock( 855xfs_buf_lock(
936 xfs_buf_t *bp) 856 xfs_buf_t *bp)
937{ 857{
938 XB_TRACE(bp, "lock", 0); 858 trace_xfs_buf_lock(bp, _RET_IP_);
859
939 if (atomic_read(&bp->b_io_remaining)) 860 if (atomic_read(&bp->b_io_remaining))
940 blk_run_address_space(bp->b_target->bt_mapping); 861 blk_run_address_space(bp->b_target->bt_mapping);
941 down(&bp->b_sema); 862 down(&bp->b_sema);
942 XB_SET_OWNER(bp); 863 XB_SET_OWNER(bp);
943 XB_TRACE(bp, "locked", 0); 864
865 trace_xfs_buf_lock_done(bp, _RET_IP_);
944} 866}
945 867
946/* 868/*
@@ -962,7 +884,8 @@ xfs_buf_unlock(
962 884
963 XB_CLEAR_OWNER(bp); 885 XB_CLEAR_OWNER(bp);
964 up(&bp->b_sema); 886 up(&bp->b_sema);
965 XB_TRACE(bp, "unlock", 0); 887
888 trace_xfs_buf_unlock(bp, _RET_IP_);
966} 889}
967 890
968 891
@@ -974,17 +897,18 @@ void
974xfs_buf_pin( 897xfs_buf_pin(
975 xfs_buf_t *bp) 898 xfs_buf_t *bp)
976{ 899{
900 trace_xfs_buf_pin(bp, _RET_IP_);
977 atomic_inc(&bp->b_pin_count); 901 atomic_inc(&bp->b_pin_count);
978 XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
979} 902}
980 903
981void 904void
982xfs_buf_unpin( 905xfs_buf_unpin(
983 xfs_buf_t *bp) 906 xfs_buf_t *bp)
984{ 907{
908 trace_xfs_buf_unpin(bp, _RET_IP_);
909
985 if (atomic_dec_and_test(&bp->b_pin_count)) 910 if (atomic_dec_and_test(&bp->b_pin_count))
986 wake_up_all(&bp->b_waiters); 911 wake_up_all(&bp->b_waiters);
987 XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
988} 912}
989 913
990int 914int
@@ -1035,7 +959,7 @@ xfs_buf_iodone_work(
1035 */ 959 */
1036 if ((bp->b_error == EOPNOTSUPP) && 960 if ((bp->b_error == EOPNOTSUPP) &&
1037 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { 961 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
1038 XB_TRACE(bp, "ordered_retry", bp->b_iodone); 962 trace_xfs_buf_ordered_retry(bp, _RET_IP_);
1039 bp->b_flags &= ~XBF_ORDERED; 963 bp->b_flags &= ~XBF_ORDERED;
1040 bp->b_flags |= _XFS_BARRIER_FAILED; 964 bp->b_flags |= _XFS_BARRIER_FAILED;
1041 xfs_buf_iorequest(bp); 965 xfs_buf_iorequest(bp);
@@ -1050,12 +974,12 @@ xfs_buf_ioend(
1050 xfs_buf_t *bp, 974 xfs_buf_t *bp,
1051 int schedule) 975 int schedule)
1052{ 976{
977 trace_xfs_buf_iodone(bp, _RET_IP_);
978
1053 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 979 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1054 if (bp->b_error == 0) 980 if (bp->b_error == 0)
1055 bp->b_flags |= XBF_DONE; 981 bp->b_flags |= XBF_DONE;
1056 982
1057 XB_TRACE(bp, "iodone", bp->b_iodone);
1058
1059 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { 983 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
1060 if (schedule) { 984 if (schedule) {
1061 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); 985 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
@@ -1075,26 +999,34 @@ xfs_buf_ioerror(
1075{ 999{
1076 ASSERT(error >= 0 && error <= 0xffff); 1000 ASSERT(error >= 0 && error <= 0xffff);
1077 bp->b_error = (unsigned short)error; 1001 bp->b_error = (unsigned short)error;
1078 XB_TRACE(bp, "ioerror", (unsigned long)error); 1002 trace_xfs_buf_ioerror(bp, error, _RET_IP_);
1079} 1003}
1080 1004
1081int 1005int
1082xfs_bawrite( 1006xfs_bwrite(
1083 void *mp, 1007 struct xfs_mount *mp,
1084 struct xfs_buf *bp) 1008 struct xfs_buf *bp)
1085{ 1009{
1086 XB_TRACE(bp, "bawrite", 0); 1010 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
1011 int error = 0;
1087 1012
1088 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 1013 bp->b_strat = xfs_bdstrat_cb;
1014 bp->b_mount = mp;
1015 bp->b_flags |= XBF_WRITE;
1016 if (!iowait)
1017 bp->b_flags |= _XBF_RUN_QUEUES;
1089 1018
1090 xfs_buf_delwri_dequeue(bp); 1019 xfs_buf_delwri_dequeue(bp);
1020 xfs_buf_iostrategy(bp);
1091 1021
1092 bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); 1022 if (iowait) {
1093 bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); 1023 error = xfs_buf_iowait(bp);
1024 if (error)
1025 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1026 xfs_buf_relse(bp);
1027 }
1094 1028
1095 bp->b_mount = mp; 1029 return error;
1096 bp->b_strat = xfs_bdstrat_cb;
1097 return xfs_bdstrat_cb(bp);
1098} 1030}
1099 1031
1100void 1032void
@@ -1102,7 +1034,7 @@ xfs_bdwrite(
1102 void *mp, 1034 void *mp,
1103 struct xfs_buf *bp) 1035 struct xfs_buf *bp)
1104{ 1036{
1105 XB_TRACE(bp, "bdwrite", 0); 1037 trace_xfs_buf_bdwrite(bp, _RET_IP_);
1106 1038
1107 bp->b_strat = xfs_bdstrat_cb; 1039 bp->b_strat = xfs_bdstrat_cb;
1108 bp->b_mount = mp; 1040 bp->b_mount = mp;
@@ -1113,7 +1045,127 @@ xfs_bdwrite(
1113 xfs_buf_delwri_queue(bp, 1); 1045 xfs_buf_delwri_queue(bp, 1);
1114} 1046}
1115 1047
1116STATIC_INLINE void 1048/*
1049 * Called when we want to stop a buffer from getting written or read.
1050 * We attach the EIO error, muck with its flags, and call biodone
1051 * so that the proper iodone callbacks get called.
1052 */
1053STATIC int
1054xfs_bioerror(
1055 xfs_buf_t *bp)
1056{
1057#ifdef XFSERRORDEBUG
1058 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
1059#endif
1060
1061 /*
1062 * No need to wait until the buffer is unpinned, we aren't flushing it.
1063 */
1064 XFS_BUF_ERROR(bp, EIO);
1065
1066 /*
1067 * We're calling biodone, so delete XBF_DONE flag.
1068 */
1069 XFS_BUF_UNREAD(bp);
1070 XFS_BUF_UNDELAYWRITE(bp);
1071 XFS_BUF_UNDONE(bp);
1072 XFS_BUF_STALE(bp);
1073
1074 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1075 xfs_biodone(bp);
1076
1077 return EIO;
1078}
1079
1080/*
1081 * Same as xfs_bioerror, except that we are releasing the buffer
1082 * here ourselves, and avoiding the biodone call.
1083 * This is meant for userdata errors; metadata bufs come with
1084 * iodone functions attached, so that we can track down errors.
1085 */
1086STATIC int
1087xfs_bioerror_relse(
1088 struct xfs_buf *bp)
1089{
1090 int64_t fl = XFS_BUF_BFLAGS(bp);
1091 /*
1092 * No need to wait until the buffer is unpinned.
1093 * We aren't flushing it.
1094 *
1095 * chunkhold expects B_DONE to be set, whether
1096 * we actually finish the I/O or not. We don't want to
1097 * change that interface.
1098 */
1099 XFS_BUF_UNREAD(bp);
1100 XFS_BUF_UNDELAYWRITE(bp);
1101 XFS_BUF_DONE(bp);
1102 XFS_BUF_STALE(bp);
1103 XFS_BUF_CLR_IODONE_FUNC(bp);
1104 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1105 if (!(fl & XBF_ASYNC)) {
1106 /*
1107 * Mark b_error and B_ERROR _both_.
1108 * Lot's of chunkcache code assumes that.
1109 * There's no reason to mark error for
1110 * ASYNC buffers.
1111 */
1112 XFS_BUF_ERROR(bp, EIO);
1113 XFS_BUF_FINISH_IOWAIT(bp);
1114 } else {
1115 xfs_buf_relse(bp);
1116 }
1117
1118 return EIO;
1119}
1120
1121
1122/*
1123 * All xfs metadata buffers except log state machine buffers
1124 * get this attached as their b_bdstrat callback function.
1125 * This is so that we can catch a buffer
1126 * after prematurely unpinning it to forcibly shutdown the filesystem.
1127 */
1128int
1129xfs_bdstrat_cb(
1130 struct xfs_buf *bp)
1131{
1132 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1133 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1134 /*
1135 * Metadata write that didn't get logged but
1136 * written delayed anyway. These aren't associated
1137 * with a transaction, and can be ignored.
1138 */
1139 if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
1140 return xfs_bioerror_relse(bp);
1141 else
1142 return xfs_bioerror(bp);
1143 }
1144
1145 xfs_buf_iorequest(bp);
1146 return 0;
1147}
1148
1149/*
1150 * Wrapper around bdstrat so that we can stop data from going to disk in case
1151 * we are shutting down the filesystem. Typically user data goes thru this
1152 * path; one of the exceptions is the superblock.
1153 */
1154void
1155xfsbdstrat(
1156 struct xfs_mount *mp,
1157 struct xfs_buf *bp)
1158{
1159 if (XFS_FORCED_SHUTDOWN(mp)) {
1160 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1161 xfs_bioerror_relse(bp);
1162 return;
1163 }
1164
1165 xfs_buf_iorequest(bp);
1166}
1167
1168STATIC void
1117_xfs_buf_ioend( 1169_xfs_buf_ioend(
1118 xfs_buf_t *bp, 1170 xfs_buf_t *bp,
1119 int schedule) 1171 int schedule)
@@ -1135,6 +1187,9 @@ xfs_buf_bio_end_io(
1135 1187
1136 xfs_buf_ioerror(bp, -error); 1188 xfs_buf_ioerror(bp, -error);
1137 1189
1190 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1191 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1192
1138 do { 1193 do {
1139 struct page *page = bvec->bv_page; 1194 struct page *page = bvec->bv_page;
1140 1195
@@ -1177,10 +1232,14 @@ _xfs_buf_ioapply(
1177 if (bp->b_flags & XBF_ORDERED) { 1232 if (bp->b_flags & XBF_ORDERED) {
1178 ASSERT(!(bp->b_flags & XBF_READ)); 1233 ASSERT(!(bp->b_flags & XBF_READ));
1179 rw = WRITE_BARRIER; 1234 rw = WRITE_BARRIER;
1180 } else if (bp->b_flags & _XBF_RUN_QUEUES) { 1235 } else if (bp->b_flags & XBF_LOG_BUFFER) {
1181 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1236 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1182 bp->b_flags &= ~_XBF_RUN_QUEUES; 1237 bp->b_flags &= ~_XBF_RUN_QUEUES;
1183 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; 1238 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
1239 } else if (bp->b_flags & _XBF_RUN_QUEUES) {
1240 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1241 bp->b_flags &= ~_XBF_RUN_QUEUES;
1242 rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
1184 } else { 1243 } else {
1185 rw = (bp->b_flags & XBF_WRITE) ? WRITE : 1244 rw = (bp->b_flags & XBF_WRITE) ? WRITE :
1186 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1245 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
@@ -1240,6 +1299,10 @@ next_chunk:
1240 1299
1241submit_io: 1300submit_io:
1242 if (likely(bio->bi_size)) { 1301 if (likely(bio->bi_size)) {
1302 if (xfs_buf_is_vmapped(bp)) {
1303 flush_kernel_vmap_range(bp->b_addr,
1304 xfs_buf_vmap_len(bp));
1305 }
1243 submit_bio(rw, bio); 1306 submit_bio(rw, bio);
1244 if (size) 1307 if (size)
1245 goto next_chunk; 1308 goto next_chunk;
@@ -1253,7 +1316,7 @@ int
1253xfs_buf_iorequest( 1316xfs_buf_iorequest(
1254 xfs_buf_t *bp) 1317 xfs_buf_t *bp)
1255{ 1318{
1256 XB_TRACE(bp, "iorequest", 0); 1319 trace_xfs_buf_iorequest(bp, _RET_IP_);
1257 1320
1258 if (bp->b_flags & XBF_DELWRI) { 1321 if (bp->b_flags & XBF_DELWRI) {
1259 xfs_buf_delwri_queue(bp, 1); 1322 xfs_buf_delwri_queue(bp, 1);
@@ -1287,11 +1350,13 @@ int
1287xfs_buf_iowait( 1350xfs_buf_iowait(
1288 xfs_buf_t *bp) 1351 xfs_buf_t *bp)
1289{ 1352{
1290 XB_TRACE(bp, "iowait", 0); 1353 trace_xfs_buf_iowait(bp, _RET_IP_);
1354
1291 if (atomic_read(&bp->b_io_remaining)) 1355 if (atomic_read(&bp->b_io_remaining))
1292 blk_run_address_space(bp->b_target->bt_mapping); 1356 blk_run_address_space(bp->b_target->bt_mapping);
1293 wait_for_completion(&bp->b_iowait); 1357 wait_for_completion(&bp->b_iowait);
1294 XB_TRACE(bp, "iowaited", (long)bp->b_error); 1358
1359 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1295 return bp->b_error; 1360 return bp->b_error;
1296} 1361}
1297 1362
@@ -1318,7 +1383,7 @@ xfs_buf_iomove(
1318 xfs_buf_t *bp, /* buffer to process */ 1383 xfs_buf_t *bp, /* buffer to process */
1319 size_t boff, /* starting buffer offset */ 1384 size_t boff, /* starting buffer offset */
1320 size_t bsize, /* length to copy */ 1385 size_t bsize, /* length to copy */
1321 caddr_t data, /* data address */ 1386 void *data, /* data address */
1322 xfs_buf_rw_t mode) /* read/write/zero flag */ 1387 xfs_buf_rw_t mode) /* read/write/zero flag */
1323{ 1388{
1324 size_t bend, cpoff, csize; 1389 size_t bend, cpoff, csize;
@@ -1400,8 +1465,8 @@ xfs_alloc_bufhash(
1400 1465
1401 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ 1466 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
1402 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; 1467 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
1403 btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * 1468 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
1404 sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); 1469 sizeof(xfs_bufhash_t));
1405 for (i = 0; i < (1 << btp->bt_hashshift); i++) { 1470 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1406 spin_lock_init(&btp->bt_hash[i].bh_lock); 1471 spin_lock_init(&btp->bt_hash[i].bh_lock);
1407 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); 1472 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
@@ -1412,7 +1477,7 @@ STATIC void
1412xfs_free_bufhash( 1477xfs_free_bufhash(
1413 xfs_buftarg_t *btp) 1478 xfs_buftarg_t *btp)
1414{ 1479{
1415 kmem_free(btp->bt_hash); 1480 kmem_free_large(btp->bt_hash);
1416 btp->bt_hash = NULL; 1481 btp->bt_hash = NULL;
1417} 1482}
1418 1483
@@ -1604,7 +1669,8 @@ xfs_buf_delwri_queue(
1604 struct list_head *dwq = &bp->b_target->bt_delwrite_queue; 1669 struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
1605 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; 1670 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1606 1671
1607 XB_TRACE(bp, "delwri_q", (long)unlock); 1672 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
1673
1608 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); 1674 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
1609 1675
1610 spin_lock(dwlk); 1676 spin_lock(dwlk);
@@ -1616,6 +1682,11 @@ xfs_buf_delwri_queue(
1616 list_del(&bp->b_list); 1682 list_del(&bp->b_list);
1617 } 1683 }
1618 1684
1685 if (list_empty(dwq)) {
1686 /* start xfsbufd as it is about to have something to do */
1687 wake_up_process(bp->b_target->bt_task);
1688 }
1689
1619 bp->b_flags |= _XBF_DELWRI_Q; 1690 bp->b_flags |= _XBF_DELWRI_Q;
1620 list_add_tail(&bp->b_list, dwq); 1691 list_add_tail(&bp->b_list, dwq);
1621 bp->b_queuetime = jiffies; 1692 bp->b_queuetime = jiffies;
@@ -1644,7 +1715,36 @@ xfs_buf_delwri_dequeue(
1644 if (dequeued) 1715 if (dequeued)
1645 xfs_buf_rele(bp); 1716 xfs_buf_rele(bp);
1646 1717
1647 XB_TRACE(bp, "delwri_dq", (long)dequeued); 1718 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
1719}
1720
1721/*
1722 * If a delwri buffer needs to be pushed before it has aged out, then promote
1723 * it to the head of the delwri queue so that it will be flushed on the next
1724 * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
1725 * than the age currently needed to flush the buffer. Hence the next time the
1726 * xfsbufd sees it is guaranteed to be considered old enough to flush.
1727 */
1728void
1729xfs_buf_delwri_promote(
1730 struct xfs_buf *bp)
1731{
1732 struct xfs_buftarg *btp = bp->b_target;
1733 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
1734
1735 ASSERT(bp->b_flags & XBF_DELWRI);
1736 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1737
1738 /*
1739 * Check the buffer age before locking the delayed write queue as we
1740 * don't need to promote buffers that are already past the flush age.
1741 */
1742 if (bp->b_queuetime < jiffies - age)
1743 return;
1744 bp->b_queuetime = jiffies - age;
1745 spin_lock(&btp->bt_delwrite_lock);
1746 list_move(&bp->b_list, &btp->bt_delwrite_queue);
1747 spin_unlock(&btp->bt_delwrite_lock);
1648} 1748}
1649 1749
1650STATIC void 1750STATIC void
@@ -1665,6 +1765,8 @@ xfsbufd_wakeup(
1665 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { 1765 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1666 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) 1766 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1667 continue; 1767 continue;
1768 if (list_empty(&btp->bt_delwrite_queue))
1769 continue;
1668 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); 1770 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1669 wake_up_process(btp->bt_task); 1771 wake_up_process(btp->bt_task);
1670 } 1772 }
@@ -1692,7 +1794,7 @@ xfs_buf_delwri_split(
1692 INIT_LIST_HEAD(list); 1794 INIT_LIST_HEAD(list);
1693 spin_lock(dwlk); 1795 spin_lock(dwlk);
1694 list_for_each_entry_safe(bp, n, dwq, b_list) { 1796 list_for_each_entry_safe(bp, n, dwq, b_list) {
1695 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); 1797 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1696 ASSERT(bp->b_flags & XBF_DELWRI); 1798 ASSERT(bp->b_flags & XBF_DELWRI);
1697 1799
1698 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { 1800 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1715,20 +1817,53 @@ xfs_buf_delwri_split(
1715 1817
1716} 1818}
1717 1819
1820/*
1821 * Compare function is more complex than it needs to be because
1822 * the return value is only 32 bits and we are doing comparisons
1823 * on 64 bit values
1824 */
1825static int
1826xfs_buf_cmp(
1827 void *priv,
1828 struct list_head *a,
1829 struct list_head *b)
1830{
1831 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1832 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1833 xfs_daddr_t diff;
1834
1835 diff = ap->b_bn - bp->b_bn;
1836 if (diff < 0)
1837 return -1;
1838 if (diff > 0)
1839 return 1;
1840 return 0;
1841}
1842
1843void
1844xfs_buf_delwri_sort(
1845 xfs_buftarg_t *target,
1846 struct list_head *list)
1847{
1848 list_sort(NULL, list, xfs_buf_cmp);
1849}
1850
1718STATIC int 1851STATIC int
1719xfsbufd( 1852xfsbufd(
1720 void *data) 1853 void *data)
1721{ 1854{
1722 struct list_head tmp; 1855 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1723 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1724 int count;
1725 xfs_buf_t *bp;
1726 1856
1727 current->flags |= PF_MEMALLOC; 1857 current->flags |= PF_MEMALLOC;
1728 1858
1729 set_freezable(); 1859 set_freezable();
1730 1860
1731 do { 1861 do {
1862 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1863 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1864 int count = 0;
1865 struct list_head tmp;
1866
1732 if (unlikely(freezing(current))) { 1867 if (unlikely(freezing(current))) {
1733 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1868 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1734 refrigerator(); 1869 refrigerator();
@@ -1736,24 +1871,20 @@ xfsbufd(
1736 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1871 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1737 } 1872 }
1738 1873
1739 schedule_timeout_interruptible( 1874 /* sleep for a long time if there is nothing to do. */
1740 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1875 if (list_empty(&target->bt_delwrite_queue))
1876 tout = MAX_SCHEDULE_TIMEOUT;
1877 schedule_timeout_interruptible(tout);
1741 1878
1742 xfs_buf_delwri_split(target, &tmp, 1879 xfs_buf_delwri_split(target, &tmp, age);
1743 xfs_buf_age_centisecs * msecs_to_jiffies(10)); 1880 list_sort(NULL, &tmp, xfs_buf_cmp);
1744
1745 count = 0;
1746 while (!list_empty(&tmp)) { 1881 while (!list_empty(&tmp)) {
1747 bp = list_entry(tmp.next, xfs_buf_t, b_list); 1882 struct xfs_buf *bp;
1748 ASSERT(target == bp->b_target); 1883 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1749
1750 list_del_init(&bp->b_list); 1884 list_del_init(&bp->b_list);
1751 xfs_buf_iostrategy(bp); 1885 xfs_buf_iostrategy(bp);
1752 count++; 1886 count++;
1753 } 1887 }
1754
1755 if (as_list_len > 0)
1756 purge_addresses();
1757 if (count) 1888 if (count)
1758 blk_run_address_space(target->bt_mapping); 1889 blk_run_address_space(target->bt_mapping);
1759 1890
@@ -1772,42 +1903,45 @@ xfs_flush_buftarg(
1772 xfs_buftarg_t *target, 1903 xfs_buftarg_t *target,
1773 int wait) 1904 int wait)
1774{ 1905{
1775 struct list_head tmp; 1906 xfs_buf_t *bp;
1776 xfs_buf_t *bp, *n;
1777 int pincount = 0; 1907 int pincount = 0;
1908 LIST_HEAD(tmp_list);
1909 LIST_HEAD(wait_list);
1778 1910
1779 xfs_buf_runall_queues(xfsconvertd_workqueue); 1911 xfs_buf_runall_queues(xfsconvertd_workqueue);
1780 xfs_buf_runall_queues(xfsdatad_workqueue); 1912 xfs_buf_runall_queues(xfsdatad_workqueue);
1781 xfs_buf_runall_queues(xfslogd_workqueue); 1913 xfs_buf_runall_queues(xfslogd_workqueue);
1782 1914
1783 set_bit(XBT_FORCE_FLUSH, &target->bt_flags); 1915 set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1784 pincount = xfs_buf_delwri_split(target, &tmp, 0); 1916 pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
1785 1917
1786 /* 1918 /*
1787 * Dropped the delayed write list lock, now walk the temporary list 1919 * Dropped the delayed write list lock, now walk the temporary list.
1920 * All I/O is issued async and then if we need to wait for completion
1921 * we do that after issuing all the IO.
1788 */ 1922 */
1789 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1923 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1924 while (!list_empty(&tmp_list)) {
1925 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1790 ASSERT(target == bp->b_target); 1926 ASSERT(target == bp->b_target);
1791 if (wait) 1927 list_del_init(&bp->b_list);
1928 if (wait) {
1792 bp->b_flags &= ~XBF_ASYNC; 1929 bp->b_flags &= ~XBF_ASYNC;
1793 else 1930 list_add(&bp->b_list, &wait_list);
1794 list_del_init(&bp->b_list); 1931 }
1795
1796 xfs_buf_iostrategy(bp); 1932 xfs_buf_iostrategy(bp);
1797 } 1933 }
1798 1934
1799 if (wait) 1935 if (wait) {
1936 /* Expedite and wait for IO to complete. */
1800 blk_run_address_space(target->bt_mapping); 1937 blk_run_address_space(target->bt_mapping);
1938 while (!list_empty(&wait_list)) {
1939 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1801 1940
1802 /* 1941 list_del_init(&bp->b_list);
1803 * Remaining list items must be flushed before returning 1942 xfs_iowait(bp);
1804 */ 1943 xfs_buf_relse(bp);
1805 while (!list_empty(&tmp)) { 1944 }
1806 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1807
1808 list_del_init(&bp->b_list);
1809 xfs_iowait(bp);
1810 xfs_buf_relse(bp);
1811 } 1945 }
1812 1946
1813 return pincount; 1947 return pincount;
@@ -1816,14 +1950,10 @@ xfs_flush_buftarg(
1816int __init 1950int __init
1817xfs_buf_init(void) 1951xfs_buf_init(void)
1818{ 1952{
1819#ifdef XFS_BUF_TRACE
1820 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);
1821#endif
1822
1823 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", 1953 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
1824 KM_ZONE_HWALIGN, NULL); 1954 KM_ZONE_HWALIGN, NULL);
1825 if (!xfs_buf_zone) 1955 if (!xfs_buf_zone)
1826 goto out_free_trace_buf; 1956 goto out;
1827 1957
1828 xfslogd_workqueue = create_workqueue("xfslogd"); 1958 xfslogd_workqueue = create_workqueue("xfslogd");
1829 if (!xfslogd_workqueue) 1959 if (!xfslogd_workqueue)
@@ -1846,10 +1976,7 @@ xfs_buf_init(void)
1846 destroy_workqueue(xfslogd_workqueue); 1976 destroy_workqueue(xfslogd_workqueue);
1847 out_free_buf_zone: 1977 out_free_buf_zone:
1848 kmem_zone_destroy(xfs_buf_zone); 1978 kmem_zone_destroy(xfs_buf_zone);
1849 out_free_trace_buf: 1979 out:
1850#ifdef XFS_BUF_TRACE
1851 ktrace_free(xfs_buf_trace_buf);
1852#endif
1853 return -ENOMEM; 1980 return -ENOMEM;
1854} 1981}
1855 1982
@@ -1861,9 +1988,6 @@ xfs_buf_terminate(void)
1861 destroy_workqueue(xfsdatad_workqueue); 1988 destroy_workqueue(xfsdatad_workqueue);
1862 destroy_workqueue(xfslogd_workqueue); 1989 destroy_workqueue(xfslogd_workqueue);
1863 kmem_zone_destroy(xfs_buf_zone); 1990 kmem_zone_destroy(xfs_buf_zone);
1864#ifdef XFS_BUF_TRACE
1865 ktrace_free(xfs_buf_trace_buf);
1866#endif
1867} 1991}
1868 1992
1869#ifdef CONFIG_KDB_MODULES 1993#ifdef CONFIG_KDB_MODULES
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9b4d666ad31f..386e7361e50e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
56 XBF_ORDERED = (1 << 11), /* use ordered writes */ 56 XBF_ORDERED = (1 << 11), /* use ordered writes */
57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
58 XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */
58 59
59 /* flags used only as arguments to access routines */ 60 /* flags used only as arguments to access routines */
60 XBF_LOCK = (1 << 14), /* lock requested */ 61 XBF_LOCK = (1 << 14), /* lock requested */
@@ -95,6 +96,28 @@ typedef enum {
95 _XFS_BARRIER_FAILED = (1 << 23), 96 _XFS_BARRIER_FAILED = (1 << 23),
96} xfs_buf_flags_t; 97} xfs_buf_flags_t;
97 98
99#define XFS_BUF_FLAGS \
100 { XBF_READ, "READ" }, \
101 { XBF_WRITE, "WRITE" }, \
102 { XBF_MAPPED, "MAPPED" }, \
103 { XBF_ASYNC, "ASYNC" }, \
104 { XBF_DONE, "DONE" }, \
105 { XBF_DELWRI, "DELWRI" }, \
106 { XBF_STALE, "STALE" }, \
107 { XBF_FS_MANAGED, "FS_MANAGED" }, \
108 { XBF_ORDERED, "ORDERED" }, \
109 { XBF_READ_AHEAD, "READ_AHEAD" }, \
110 { XBF_LOCK, "LOCK" }, /* should never be set */\
111 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
112 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
113 { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
114 { _XBF_PAGES, "PAGES" }, \
115 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
116 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
117 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \
118 { _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
119
120
98typedef enum { 121typedef enum {
99 XBT_FORCE_SLEEP = 0, 122 XBT_FORCE_SLEEP = 0,
100 XBT_FORCE_FLUSH = 1, 123 XBT_FORCE_FLUSH = 1,
@@ -186,15 +209,10 @@ extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
186#define xfs_incore(buftarg,blkno,len,lockit) \ 209#define xfs_incore(buftarg,blkno,len,lockit) \
187 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) 210 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
188 211
189extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t, 212extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
190 xfs_buf_flags_t); 213 xfs_buf_flags_t);
191#define xfs_buf_get(target, blkno, len, flags) \ 214extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
192 xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
193
194extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
195 xfs_buf_flags_t); 215 xfs_buf_flags_t);
196#define xfs_buf_read(target, blkno, len, flags) \
197 xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
198 216
199extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 217extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
200extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); 218extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
@@ -214,13 +232,17 @@ extern void xfs_buf_lock(xfs_buf_t *);
214extern void xfs_buf_unlock(xfs_buf_t *); 232extern void xfs_buf_unlock(xfs_buf_t *);
215 233
216/* Buffer Read and Write Routines */ 234/* Buffer Read and Write Routines */
217extern int xfs_bawrite(void *mp, xfs_buf_t *bp); 235extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
218extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); 236extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
237
238extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
239extern int xfs_bdstrat_cb(struct xfs_buf *);
240
219extern void xfs_buf_ioend(xfs_buf_t *, int); 241extern void xfs_buf_ioend(xfs_buf_t *, int);
220extern void xfs_buf_ioerror(xfs_buf_t *, int); 242extern void xfs_buf_ioerror(xfs_buf_t *, int);
221extern int xfs_buf_iorequest(xfs_buf_t *); 243extern int xfs_buf_iorequest(xfs_buf_t *);
222extern int xfs_buf_iowait(xfs_buf_t *); 244extern int xfs_buf_iowait(xfs_buf_t *);
223extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, 245extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
224 xfs_buf_rw_t); 246 xfs_buf_rw_t);
225 247
226static inline int xfs_buf_iostrategy(xfs_buf_t *bp) 248static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
@@ -243,49 +265,29 @@ extern int xfs_buf_ispin(xfs_buf_t *);
243 265
244/* Delayed Write Buffer Routines */ 266/* Delayed Write Buffer Routines */
245extern void xfs_buf_delwri_dequeue(xfs_buf_t *); 267extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
268extern void xfs_buf_delwri_promote(xfs_buf_t *);
246 269
247/* Buffer Daemon Setup Routines */ 270/* Buffer Daemon Setup Routines */
248extern int xfs_buf_init(void); 271extern int xfs_buf_init(void);
249extern void xfs_buf_terminate(void); 272extern void xfs_buf_terminate(void);
250 273
251#ifdef XFS_BUF_TRACE
252extern ktrace_t *xfs_buf_trace_buf;
253extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
254#else
255#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0)
256#endif
257
258#define xfs_buf_target_name(target) \ 274#define xfs_buf_target_name(target) \
259 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) 275 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
260 276
261 277
262#define XFS_B_ASYNC XBF_ASYNC
263#define XFS_B_DELWRI XBF_DELWRI
264#define XFS_B_READ XBF_READ
265#define XFS_B_WRITE XBF_WRITE
266#define XFS_B_STALE XBF_STALE
267
268#define XFS_BUF_TRYLOCK XBF_TRYLOCK
269#define XFS_INCORE_TRYLOCK XBF_TRYLOCK
270#define XFS_BUF_LOCK XBF_LOCK
271#define XFS_BUF_MAPPED XBF_MAPPED
272
273#define BUF_BUSY XBF_DONT_BLOCK
274
275#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) 278#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
276#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ 279#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
277 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) 280 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
278 281
279#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) 282#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE)
280#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) 283#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
281#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) 284#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
282#define XFS_BUF_SUPER_STALE(bp) do { \ 285#define XFS_BUF_SUPER_STALE(bp) do { \
283 XFS_BUF_STALE(bp); \ 286 XFS_BUF_STALE(bp); \
284 xfs_buf_delwri_dequeue(bp); \ 287 xfs_buf_delwri_dequeue(bp); \
285 XFS_BUF_DONE(bp); \ 288 XFS_BUF_DONE(bp); \
286 } while (0) 289 } while (0)
287 290
288#define XFS_BUF_MANAGE XBF_FS_MANAGED
289#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) 291#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
290 292
291#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) 293#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
@@ -370,39 +372,15 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
370 372
371#define xfs_bpin(bp) xfs_buf_pin(bp) 373#define xfs_bpin(bp) xfs_buf_pin(bp)
372#define xfs_bunpin(bp) xfs_buf_unpin(bp) 374#define xfs_bunpin(bp) xfs_buf_unpin(bp)
373
374#define xfs_buftrace(id, bp) \
375 xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
376
377#define xfs_biodone(bp) xfs_buf_ioend(bp, 0) 375#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
378 376
379#define xfs_biomove(bp, off, len, data, rw) \ 377#define xfs_biomove(bp, off, len, data, rw) \
380 xfs_buf_iomove((bp), (off), (len), (data), \ 378 xfs_buf_iomove((bp), (off), (len), (data), \
381 ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) 379 ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
382 380
383#define xfs_biozero(bp, off, len) \ 381#define xfs_biozero(bp, off, len) \
384 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 382 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
385 383
386
387static inline int XFS_bwrite(xfs_buf_t *bp)
388{
389 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
390 int error = 0;
391
392 if (!iowait)
393 bp->b_flags |= _XBF_RUN_QUEUES;
394
395 xfs_buf_delwri_dequeue(bp);
396 xfs_buf_iostrategy(bp);
397 if (iowait) {
398 error = xfs_buf_iowait(bp);
399 xfs_buf_relse(bp);
400 }
401 return error;
402}
403
404#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
405
406#define xfs_iowait(bp) xfs_buf_iowait(bp) 384#define xfs_iowait(bp) xfs_buf_iowait(bp)
407 385
408#define xfs_baread(target, rablkno, ralen) \ 386#define xfs_baread(target, rablkno, ralen) \
@@ -417,6 +395,7 @@ extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
417extern void xfs_wait_buftarg(xfs_buftarg_t *); 395extern void xfs_wait_buftarg(xfs_buftarg_t *);
418extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 396extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
419extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 397extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
398
420#ifdef CONFIG_KDB_MODULES 399#ifdef CONFIG_KDB_MODULES
421extern struct list_head *xfs_get_buftarg_list(void); 400extern struct list_head *xfs_get_buftarg_list(void);
422#endif 401#endif
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 87b8cbd23d4b..846b75aeb2ab 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -29,6 +29,7 @@
29#include "xfs_vnodeops.h" 29#include "xfs_vnodeops.h"
30#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
31#include "xfs_inode.h" 31#include "xfs_inode.h"
32#include "xfs_inode_item.h"
32 33
33/* 34/*
34 * Note that we only accept fileids which are long enough rather than allow 35 * Note that we only accept fileids which are long enough rather than allow
@@ -215,9 +216,28 @@ xfs_fs_get_parent(
215 return d_obtain_alias(VFS_I(cip)); 216 return d_obtain_alias(VFS_I(cip));
216} 217}
217 218
219STATIC int
220xfs_fs_nfs_commit_metadata(
221 struct inode *inode)
222{
223 struct xfs_inode *ip = XFS_I(inode);
224 struct xfs_mount *mp = ip->i_mount;
225 int error = 0;
226
227 xfs_ilock(ip, XFS_ILOCK_SHARED);
228 if (xfs_ipincount(ip)) {
229 error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
230 XFS_LOG_SYNC, NULL);
231 }
232 xfs_iunlock(ip, XFS_ILOCK_SHARED);
233
234 return error;
235}
236
218const struct export_operations xfs_export_operations = { 237const struct export_operations xfs_export_operations = {
219 .encode_fh = xfs_fs_encode_fh, 238 .encode_fh = xfs_fs_encode_fh,
220 .fh_to_dentry = xfs_fs_fh_to_dentry, 239 .fh_to_dentry = xfs_fs_fh_to_dentry,
221 .fh_to_parent = xfs_fs_fh_to_parent, 240 .fh_to_parent = xfs_fs_fh_to_parent,
222 .get_parent = xfs_fs_get_parent, 241 .get_parent = xfs_fs_get_parent,
242 .commit_metadata = xfs_fs_nfs_commit_metadata,
223}; 243};
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index eff61e2732af..42dd3bcfba6b 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -16,6 +16,7 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_fs.h"
19#include "xfs_bit.h" 20#include "xfs_bit.h"
20#include "xfs_log.h" 21#include "xfs_log.h"
21#include "xfs_inum.h" 22#include "xfs_inum.h"
@@ -34,52 +35,279 @@
34#include "xfs_dir2_sf.h" 35#include "xfs_dir2_sf.h"
35#include "xfs_dinode.h" 36#include "xfs_dinode.h"
36#include "xfs_inode.h" 37#include "xfs_inode.h"
38#include "xfs_inode_item.h"
39#include "xfs_bmap.h"
37#include "xfs_error.h" 40#include "xfs_error.h"
38#include "xfs_rw.h" 41#include "xfs_rw.h"
39#include "xfs_vnodeops.h" 42#include "xfs_vnodeops.h"
40#include "xfs_da_btree.h" 43#include "xfs_da_btree.h"
41#include "xfs_ioctl.h" 44#include "xfs_ioctl.h"
45#include "xfs_trace.h"
42 46
43#include <linux/dcache.h> 47#include <linux/dcache.h>
44 48
45static const struct vm_operations_struct xfs_file_vm_ops; 49static const struct vm_operations_struct xfs_file_vm_ops;
46 50
47STATIC ssize_t 51/*
48xfs_file_aio_read( 52 * xfs_iozero
49 struct kiocb *iocb, 53 *
50 const struct iovec *iov, 54 * xfs_iozero clears the specified range of buffer supplied,
51 unsigned long nr_segs, 55 * and marks all the affected blocks as valid and modified. If
52 loff_t pos) 56 * an affected block is not allocated, it will be allocated. If
57 * an affected block is not completely overwritten, and is not
58 * valid before the operation, it will be read from disk before
59 * being partially zeroed.
60 */
61STATIC int
62xfs_iozero(
63 struct xfs_inode *ip, /* inode */
64 loff_t pos, /* offset in file */
65 size_t count) /* size of data to zero */
53{ 66{
54 struct file *file = iocb->ki_filp; 67 struct page *page;
55 int ioflags = IO_ISAIO; 68 struct address_space *mapping;
69 int status;
56 70
57 BUG_ON(iocb->ki_pos != pos); 71 mapping = VFS_I(ip)->i_mapping;
58 if (unlikely(file->f_flags & O_DIRECT)) 72 do {
59 ioflags |= IO_ISDIRECT; 73 unsigned offset, bytes;
60 if (file->f_mode & FMODE_NOCMTIME) 74 void *fsdata;
61 ioflags |= IO_INVIS; 75
62 return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov, 76 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
63 nr_segs, &iocb->ki_pos, ioflags); 77 bytes = PAGE_CACHE_SIZE - offset;
78 if (bytes > count)
79 bytes = count;
80
81 status = pagecache_write_begin(NULL, mapping, pos, bytes,
82 AOP_FLAG_UNINTERRUPTIBLE,
83 &page, &fsdata);
84 if (status)
85 break;
86
87 zero_user(page, offset, bytes);
88
89 status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
90 page, fsdata);
91 WARN_ON(status <= 0); /* can't return less than zero! */
92 pos += bytes;
93 count -= bytes;
94 status = 0;
95 } while (count);
96
97 return (-status);
98}
99
100STATIC int
101xfs_file_fsync(
102 struct file *file,
103 struct dentry *dentry,
104 int datasync)
105{
106 struct xfs_inode *ip = XFS_I(dentry->d_inode);
107 struct xfs_trans *tp;
108 int error = 0;
109 int log_flushed = 0;
110
111 xfs_itrace_entry(ip);
112
113 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
114 return -XFS_ERROR(EIO);
115
116 xfs_iflags_clear(ip, XFS_ITRUNCATED);
117
118 /*
119 * We always need to make sure that the required inode state is safe on
120 * disk. The inode might be clean but we still might need to force the
121 * log because of committed transactions that haven't hit the disk yet.
122 * Likewise, there could be unflushed non-transactional changes to the
123 * inode core that have to go to disk and this requires us to issue
124 * a synchronous transaction to capture these changes correctly.
125 *
126 * This code relies on the assumption that if the i_update_core field
127 * of the inode is clear and the inode is unpinned then it is clean
128 * and no action is required.
129 */
130 xfs_ilock(ip, XFS_ILOCK_SHARED);
131
132 /*
133 * First check if the VFS inode is marked dirty. All the dirtying
134 * of non-transactional updates no goes through mark_inode_dirty*,
135 * which allows us to distinguish beteeen pure timestamp updates
136 * and i_size updates which need to be caught for fdatasync.
137 * After that also theck for the dirty state in the XFS inode, which
138 * might gets cleared when the inode gets written out via the AIL
139 * or xfs_iflush_cluster.
140 */
141 if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) ||
142 ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
143 ip->i_update_core) {
144 /*
145 * Kick off a transaction to log the inode core to get the
146 * updates. The sync transaction will also force the log.
147 */
148 xfs_iunlock(ip, XFS_ILOCK_SHARED);
149 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
150 error = xfs_trans_reserve(tp, 0,
151 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
152 if (error) {
153 xfs_trans_cancel(tp, 0);
154 return -error;
155 }
156 xfs_ilock(ip, XFS_ILOCK_EXCL);
157
158 /*
159 * Note - it's possible that we might have pushed ourselves out
160 * of the way during trans_reserve which would flush the inode.
161 * But there's no guarantee that the inode buffer has actually
162 * gone out yet (it's delwri). Plus the buffer could be pinned
163 * anyway if it's part of an inode in another recent
164 * transaction. So we play it safe and fire off the
165 * transaction anyway.
166 */
167 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
168 xfs_trans_ihold(tp, ip);
169 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
170 xfs_trans_set_sync(tp);
171 error = _xfs_trans_commit(tp, 0, &log_flushed);
172
173 xfs_iunlock(ip, XFS_ILOCK_EXCL);
174 } else {
175 /*
176 * Timestamps/size haven't changed since last inode flush or
177 * inode transaction commit. That means either nothing got
178 * written or a transaction committed which caught the updates.
179 * If the latter happened and the transaction hasn't hit the
180 * disk yet, the inode will be still be pinned. If it is,
181 * force the log.
182 */
183 if (xfs_ipincount(ip)) {
184 error = _xfs_log_force_lsn(ip->i_mount,
185 ip->i_itemp->ili_last_lsn,
186 XFS_LOG_SYNC, &log_flushed);
187 }
188 xfs_iunlock(ip, XFS_ILOCK_SHARED);
189 }
190
191 if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
192 /*
193 * If the log write didn't issue an ordered tag we need
194 * to flush the disk cache for the data device now.
195 */
196 if (!log_flushed)
197 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
198
199 /*
200 * If this inode is on the RT dev we need to flush that
201 * cache as well.
202 */
203 if (XFS_IS_REALTIME_INODE(ip))
204 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
205 }
206
207 return -error;
64} 208}
65 209
66STATIC ssize_t 210STATIC ssize_t
67xfs_file_aio_write( 211xfs_file_aio_read(
68 struct kiocb *iocb, 212 struct kiocb *iocb,
69 const struct iovec *iov, 213 const struct iovec *iovp,
70 unsigned long nr_segs, 214 unsigned long nr_segs,
71 loff_t pos) 215 loff_t pos)
72{ 216{
73 struct file *file = iocb->ki_filp; 217 struct file *file = iocb->ki_filp;
74 int ioflags = IO_ISAIO; 218 struct inode *inode = file->f_mapping->host;
219 struct xfs_inode *ip = XFS_I(inode);
220 struct xfs_mount *mp = ip->i_mount;
221 size_t size = 0;
222 ssize_t ret = 0;
223 int ioflags = 0;
224 xfs_fsize_t n;
225 unsigned long seg;
226
227 XFS_STATS_INC(xs_read_calls);
75 228
76 BUG_ON(iocb->ki_pos != pos); 229 BUG_ON(iocb->ki_pos != pos);
230
77 if (unlikely(file->f_flags & O_DIRECT)) 231 if (unlikely(file->f_flags & O_DIRECT))
78 ioflags |= IO_ISDIRECT; 232 ioflags |= IO_ISDIRECT;
79 if (file->f_mode & FMODE_NOCMTIME) 233 if (file->f_mode & FMODE_NOCMTIME)
80 ioflags |= IO_INVIS; 234 ioflags |= IO_INVIS;
81 return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs, 235
82 &iocb->ki_pos, ioflags); 236 /* START copy & waste from filemap.c */
237 for (seg = 0; seg < nr_segs; seg++) {
238 const struct iovec *iv = &iovp[seg];
239
240 /*
241 * If any segment has a negative length, or the cumulative
242 * length ever wraps negative then return -EINVAL.
243 */
244 size += iv->iov_len;
245 if (unlikely((ssize_t)(size|iv->iov_len) < 0))
246 return XFS_ERROR(-EINVAL);
247 }
248 /* END copy & waste from filemap.c */
249
250 if (unlikely(ioflags & IO_ISDIRECT)) {
251 xfs_buftarg_t *target =
252 XFS_IS_REALTIME_INODE(ip) ?
253 mp->m_rtdev_targp : mp->m_ddev_targp;
254 if ((iocb->ki_pos & target->bt_smask) ||
255 (size & target->bt_smask)) {
256 if (iocb->ki_pos == ip->i_size)
257 return 0;
258 return -XFS_ERROR(EINVAL);
259 }
260 }
261
262 n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
263 if (n <= 0 || size == 0)
264 return 0;
265
266 if (n < size)
267 size = n;
268
269 if (XFS_FORCED_SHUTDOWN(mp))
270 return -EIO;
271
272 if (unlikely(ioflags & IO_ISDIRECT))
273 mutex_lock(&inode->i_mutex);
274 xfs_ilock(ip, XFS_IOLOCK_SHARED);
275
276 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
277 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
278 int iolock = XFS_IOLOCK_SHARED;
279
280 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
281 dmflags, &iolock);
282 if (ret) {
283 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
284 if (unlikely(ioflags & IO_ISDIRECT))
285 mutex_unlock(&inode->i_mutex);
286 return ret;
287 }
288 }
289
290 if (unlikely(ioflags & IO_ISDIRECT)) {
291 if (inode->i_mapping->nrpages) {
292 ret = -xfs_flushinval_pages(ip,
293 (iocb->ki_pos & PAGE_CACHE_MASK),
294 -1, FI_REMAPF_LOCKED);
295 }
296 mutex_unlock(&inode->i_mutex);
297 if (ret) {
298 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
299 return ret;
300 }
301 }
302
303 trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
304
305 ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
306 if (ret > 0)
307 XFS_STATS_ADD(xs_read_bytes, ret);
308
309 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
310 return ret;
83} 311}
84 312
85STATIC ssize_t 313STATIC ssize_t
@@ -87,16 +315,44 @@ xfs_file_splice_read(
87 struct file *infilp, 315 struct file *infilp,
88 loff_t *ppos, 316 loff_t *ppos,
89 struct pipe_inode_info *pipe, 317 struct pipe_inode_info *pipe,
90 size_t len, 318 size_t count,
91 unsigned int flags) 319 unsigned int flags)
92{ 320{
321 struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
322 struct xfs_mount *mp = ip->i_mount;
93 int ioflags = 0; 323 int ioflags = 0;
324 ssize_t ret;
325
326 XFS_STATS_INC(xs_read_calls);
94 327
95 if (infilp->f_mode & FMODE_NOCMTIME) 328 if (infilp->f_mode & FMODE_NOCMTIME)
96 ioflags |= IO_INVIS; 329 ioflags |= IO_INVIS;
97 330
98 return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), 331 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
99 infilp, ppos, pipe, len, flags, ioflags); 332 return -EIO;
333
334 xfs_ilock(ip, XFS_IOLOCK_SHARED);
335
336 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
337 int iolock = XFS_IOLOCK_SHARED;
338 int error;
339
340 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
341 FILP_DELAY_FLAG(infilp), &iolock);
342 if (error) {
343 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
344 return -error;
345 }
346 }
347
348 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
349
350 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
351 if (ret > 0)
352 XFS_STATS_ADD(xs_read_bytes, ret);
353
354 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
355 return ret;
100} 356}
101 357
102STATIC ssize_t 358STATIC ssize_t
@@ -104,16 +360,538 @@ xfs_file_splice_write(
104 struct pipe_inode_info *pipe, 360 struct pipe_inode_info *pipe,
105 struct file *outfilp, 361 struct file *outfilp,
106 loff_t *ppos, 362 loff_t *ppos,
107 size_t len, 363 size_t count,
108 unsigned int flags) 364 unsigned int flags)
109{ 365{
366 struct inode *inode = outfilp->f_mapping->host;
367 struct xfs_inode *ip = XFS_I(inode);
368 struct xfs_mount *mp = ip->i_mount;
369 xfs_fsize_t isize, new_size;
110 int ioflags = 0; 370 int ioflags = 0;
371 ssize_t ret;
372
373 XFS_STATS_INC(xs_write_calls);
111 374
112 if (outfilp->f_mode & FMODE_NOCMTIME) 375 if (outfilp->f_mode & FMODE_NOCMTIME)
113 ioflags |= IO_INVIS; 376 ioflags |= IO_INVIS;
114 377
115 return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), 378 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
116 pipe, outfilp, ppos, len, flags, ioflags); 379 return -EIO;
380
381 xfs_ilock(ip, XFS_IOLOCK_EXCL);
382
383 if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
384 int iolock = XFS_IOLOCK_EXCL;
385 int error;
386
387 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
388 FILP_DELAY_FLAG(outfilp), &iolock);
389 if (error) {
390 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
391 return -error;
392 }
393 }
394
395 new_size = *ppos + count;
396
397 xfs_ilock(ip, XFS_ILOCK_EXCL);
398 if (new_size > ip->i_size)
399 ip->i_new_size = new_size;
400 xfs_iunlock(ip, XFS_ILOCK_EXCL);
401
402 trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
403
404 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
405 if (ret > 0)
406 XFS_STATS_ADD(xs_write_bytes, ret);
407
408 isize = i_size_read(inode);
409 if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
410 *ppos = isize;
411
412 if (*ppos > ip->i_size) {
413 xfs_ilock(ip, XFS_ILOCK_EXCL);
414 if (*ppos > ip->i_size)
415 ip->i_size = *ppos;
416 xfs_iunlock(ip, XFS_ILOCK_EXCL);
417 }
418
419 if (ip->i_new_size) {
420 xfs_ilock(ip, XFS_ILOCK_EXCL);
421 ip->i_new_size = 0;
422 if (ip->i_d.di_size > ip->i_size)
423 ip->i_d.di_size = ip->i_size;
424 xfs_iunlock(ip, XFS_ILOCK_EXCL);
425 }
426 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
427 return ret;
428}
429
430/*
431 * This routine is called to handle zeroing any space in the last
432 * block of the file that is beyond the EOF. We do this since the
433 * size is being increased without writing anything to that block
434 * and we don't want anyone to read the garbage on the disk.
435 */
436STATIC int /* error (positive) */
437xfs_zero_last_block(
438 xfs_inode_t *ip,
439 xfs_fsize_t offset,
440 xfs_fsize_t isize)
441{
442 xfs_fileoff_t last_fsb;
443 xfs_mount_t *mp = ip->i_mount;
444 int nimaps;
445 int zero_offset;
446 int zero_len;
447 int error = 0;
448 xfs_bmbt_irec_t imap;
449
450 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
451
452 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
453 if (zero_offset == 0) {
454 /*
455 * There are no extra bytes in the last block on disk to
456 * zero, so return.
457 */
458 return 0;
459 }
460
461 last_fsb = XFS_B_TO_FSBT(mp, isize);
462 nimaps = 1;
463 error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
464 &nimaps, NULL, NULL);
465 if (error) {
466 return error;
467 }
468 ASSERT(nimaps > 0);
469 /*
470 * If the block underlying isize is just a hole, then there
471 * is nothing to zero.
472 */
473 if (imap.br_startblock == HOLESTARTBLOCK) {
474 return 0;
475 }
476 /*
477 * Zero the part of the last block beyond the EOF, and write it
478 * out sync. We need to drop the ilock while we do this so we
479 * don't deadlock when the buffer cache calls back to us.
480 */
481 xfs_iunlock(ip, XFS_ILOCK_EXCL);
482
483 zero_len = mp->m_sb.sb_blocksize - zero_offset;
484 if (isize + zero_len > offset)
485 zero_len = offset - isize;
486 error = xfs_iozero(ip, isize, zero_len);
487
488 xfs_ilock(ip, XFS_ILOCK_EXCL);
489 ASSERT(error >= 0);
490 return error;
491}
492
493/*
494 * Zero any on disk space between the current EOF and the new,
495 * larger EOF. This handles the normal case of zeroing the remainder
496 * of the last block in the file and the unusual case of zeroing blocks
497 * out beyond the size of the file. This second case only happens
498 * with fixed size extents and when the system crashes before the inode
499 * size was updated but after blocks were allocated. If fill is set,
500 * then any holes in the range are filled and zeroed. If not, the holes
501 * are left alone as holes.
502 */
503
504int /* error (positive) */
505xfs_zero_eof(
506 xfs_inode_t *ip,
507 xfs_off_t offset, /* starting I/O offset */
508 xfs_fsize_t isize) /* current inode size */
509{
510 xfs_mount_t *mp = ip->i_mount;
511 xfs_fileoff_t start_zero_fsb;
512 xfs_fileoff_t end_zero_fsb;
513 xfs_fileoff_t zero_count_fsb;
514 xfs_fileoff_t last_fsb;
515 xfs_fileoff_t zero_off;
516 xfs_fsize_t zero_len;
517 int nimaps;
518 int error = 0;
519 xfs_bmbt_irec_t imap;
520
521 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
522 ASSERT(offset > isize);
523
524 /*
525 * First handle zeroing the block on which isize resides.
526 * We only zero a part of that block so it is handled specially.
527 */
528 error = xfs_zero_last_block(ip, offset, isize);
529 if (error) {
530 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
531 return error;
532 }
533
534 /*
535 * Calculate the range between the new size and the old
536 * where blocks needing to be zeroed may exist. To get the
537 * block where the last byte in the file currently resides,
538 * we need to subtract one from the size and truncate back
539 * to a block boundary. We subtract 1 in case the size is
540 * exactly on a block boundary.
541 */
542 last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
543 start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
544 end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
545 ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
546 if (last_fsb == end_zero_fsb) {
547 /*
548 * The size was only incremented on its last block.
549 * We took care of that above, so just return.
550 */
551 return 0;
552 }
553
554 ASSERT(start_zero_fsb <= end_zero_fsb);
555 while (start_zero_fsb <= end_zero_fsb) {
556 nimaps = 1;
557 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
558 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
559 0, NULL, 0, &imap, &nimaps, NULL, NULL);
560 if (error) {
561 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
562 return error;
563 }
564 ASSERT(nimaps > 0);
565
566 if (imap.br_state == XFS_EXT_UNWRITTEN ||
567 imap.br_startblock == HOLESTARTBLOCK) {
568 /*
569 * This loop handles initializing pages that were
570 * partially initialized by the code below this
571 * loop. It basically zeroes the part of the page
572 * that sits on a hole and sets the page as P_HOLE
573 * and calls remapf if it is a mapped file.
574 */
575 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
576 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
577 continue;
578 }
579
580 /*
581 * There are blocks we need to zero.
582 * Drop the inode lock while we're doing the I/O.
583 * We'll still have the iolock to protect us.
584 */
585 xfs_iunlock(ip, XFS_ILOCK_EXCL);
586
587 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
588 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
589
590 if ((zero_off + zero_len) > offset)
591 zero_len = offset - zero_off;
592
593 error = xfs_iozero(ip, zero_off, zero_len);
594 if (error) {
595 goto out_lock;
596 }
597
598 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
599 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
600
601 xfs_ilock(ip, XFS_ILOCK_EXCL);
602 }
603
604 return 0;
605
606out_lock:
607 xfs_ilock(ip, XFS_ILOCK_EXCL);
608 ASSERT(error >= 0);
609 return error;
610}
611
612STATIC ssize_t
613xfs_file_aio_write(
614 struct kiocb *iocb,
615 const struct iovec *iovp,
616 unsigned long nr_segs,
617 loff_t pos)
618{
619 struct file *file = iocb->ki_filp;
620 struct address_space *mapping = file->f_mapping;
621 struct inode *inode = mapping->host;
622 struct xfs_inode *ip = XFS_I(inode);
623 struct xfs_mount *mp = ip->i_mount;
624 ssize_t ret = 0, error = 0;
625 int ioflags = 0;
626 xfs_fsize_t isize, new_size;
627 int iolock;
628 int eventsent = 0;
629 size_t ocount = 0, count;
630 int need_i_mutex;
631
632 XFS_STATS_INC(xs_write_calls);
633
634 BUG_ON(iocb->ki_pos != pos);
635
636 if (unlikely(file->f_flags & O_DIRECT))
637 ioflags |= IO_ISDIRECT;
638 if (file->f_mode & FMODE_NOCMTIME)
639 ioflags |= IO_INVIS;
640
641 error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
642 if (error)
643 return error;
644
645 count = ocount;
646 if (count == 0)
647 return 0;
648
649 xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
650
651 if (XFS_FORCED_SHUTDOWN(mp))
652 return -EIO;
653
654relock:
655 if (ioflags & IO_ISDIRECT) {
656 iolock = XFS_IOLOCK_SHARED;
657 need_i_mutex = 0;
658 } else {
659 iolock = XFS_IOLOCK_EXCL;
660 need_i_mutex = 1;
661 mutex_lock(&inode->i_mutex);
662 }
663
664 xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
665
666start:
667 error = -generic_write_checks(file, &pos, &count,
668 S_ISBLK(inode->i_mode));
669 if (error) {
670 xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
671 goto out_unlock_mutex;
672 }
673
674 if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
675 !(ioflags & IO_INVIS) && !eventsent)) {
676 int dmflags = FILP_DELAY_FLAG(file);
677
678 if (need_i_mutex)
679 dmflags |= DM_FLAGS_IMUX;
680
681 xfs_iunlock(ip, XFS_ILOCK_EXCL);
682 error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
683 pos, count, dmflags, &iolock);
684 if (error) {
685 goto out_unlock_internal;
686 }
687 xfs_ilock(ip, XFS_ILOCK_EXCL);
688 eventsent = 1;
689
690 /*
691 * The iolock was dropped and reacquired in XFS_SEND_DATA
692 * so we have to recheck the size when appending.
693 * We will only "goto start;" once, since having sent the
694 * event prevents another call to XFS_SEND_DATA, which is
695 * what allows the size to change in the first place.
696 */
697 if ((file->f_flags & O_APPEND) && pos != ip->i_size)
698 goto start;
699 }
700
701 if (ioflags & IO_ISDIRECT) {
702 xfs_buftarg_t *target =
703 XFS_IS_REALTIME_INODE(ip) ?
704 mp->m_rtdev_targp : mp->m_ddev_targp;
705
706 if ((pos & target->bt_smask) || (count & target->bt_smask)) {
707 xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
708 return XFS_ERROR(-EINVAL);
709 }
710
711 if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
712 xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
713 iolock = XFS_IOLOCK_EXCL;
714 need_i_mutex = 1;
715 mutex_lock(&inode->i_mutex);
716 xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
717 goto start;
718 }
719 }
720
721 new_size = pos + count;
722 if (new_size > ip->i_size)
723 ip->i_new_size = new_size;
724
725 if (likely(!(ioflags & IO_INVIS)))
726 file_update_time(file);
727
728 /*
729 * If the offset is beyond the size of the file, we have a couple
730 * of things to do. First, if there is already space allocated
731 * we need to either create holes or zero the disk or ...
732 *
733 * If there is a page where the previous size lands, we need
734 * to zero it out up to the new size.
735 */
736
737 if (pos > ip->i_size) {
738 error = xfs_zero_eof(ip, pos, ip->i_size);
739 if (error) {
740 xfs_iunlock(ip, XFS_ILOCK_EXCL);
741 goto out_unlock_internal;
742 }
743 }
744 xfs_iunlock(ip, XFS_ILOCK_EXCL);
745
746 /*
747 * If we're writing the file then make sure to clear the
748 * setuid and setgid bits if the process is not being run
749 * by root. This keeps people from modifying setuid and
750 * setgid binaries.
751 */
752 error = -file_remove_suid(file);
753 if (unlikely(error))
754 goto out_unlock_internal;
755
756 /* We can write back this queue in page reclaim */
757 current->backing_dev_info = mapping->backing_dev_info;
758
759 if ((ioflags & IO_ISDIRECT)) {
760 if (mapping->nrpages) {
761 WARN_ON(need_i_mutex == 0);
762 error = xfs_flushinval_pages(ip,
763 (pos & PAGE_CACHE_MASK),
764 -1, FI_REMAPF_LOCKED);
765 if (error)
766 goto out_unlock_internal;
767 }
768
769 if (need_i_mutex) {
770 /* demote the lock now the cached pages are gone */
771 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
772 mutex_unlock(&inode->i_mutex);
773
774 iolock = XFS_IOLOCK_SHARED;
775 need_i_mutex = 0;
776 }
777
778 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
779 ret = generic_file_direct_write(iocb, iovp,
780 &nr_segs, pos, &iocb->ki_pos, count, ocount);
781
782 /*
783 * direct-io write to a hole: fall through to buffered I/O
784 * for completing the rest of the request.
785 */
786 if (ret >= 0 && ret != count) {
787 XFS_STATS_ADD(xs_write_bytes, ret);
788
789 pos += ret;
790 count -= ret;
791
792 ioflags &= ~IO_ISDIRECT;
793 xfs_iunlock(ip, iolock);
794 goto relock;
795 }
796 } else {
797 int enospc = 0;
798 ssize_t ret2 = 0;
799
800write_retry:
801 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags);
802 ret2 = generic_file_buffered_write(iocb, iovp, nr_segs,
803 pos, &iocb->ki_pos, count, ret);
804 /*
805 * if we just got an ENOSPC, flush the inode now we
806 * aren't holding any page locks and retry *once*
807 */
808 if (ret2 == -ENOSPC && !enospc) {
809 error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
810 if (error)
811 goto out_unlock_internal;
812 enospc = 1;
813 goto write_retry;
814 }
815 ret = ret2;
816 }
817
818 current->backing_dev_info = NULL;
819
820 isize = i_size_read(inode);
821 if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize))
822 iocb->ki_pos = isize;
823
824 if (iocb->ki_pos > ip->i_size) {
825 xfs_ilock(ip, XFS_ILOCK_EXCL);
826 if (iocb->ki_pos > ip->i_size)
827 ip->i_size = iocb->ki_pos;
828 xfs_iunlock(ip, XFS_ILOCK_EXCL);
829 }
830
831 if (ret == -ENOSPC &&
832 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
833 xfs_iunlock(ip, iolock);
834 if (need_i_mutex)
835 mutex_unlock(&inode->i_mutex);
836 error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
837 DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
838 0, 0, 0); /* Delay flag intentionally unused */
839 if (need_i_mutex)
840 mutex_lock(&inode->i_mutex);
841 xfs_ilock(ip, iolock);
842 if (error)
843 goto out_unlock_internal;
844 goto start;
845 }
846
847 error = -ret;
848 if (ret <= 0)
849 goto out_unlock_internal;
850
851 XFS_STATS_ADD(xs_write_bytes, ret);
852
853 /* Handle various SYNC-type writes */
854 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
855 loff_t end = pos + ret - 1;
856 int error2;
857
858 xfs_iunlock(ip, iolock);
859 if (need_i_mutex)
860 mutex_unlock(&inode->i_mutex);
861
862 error2 = filemap_write_and_wait_range(mapping, pos, end);
863 if (!error)
864 error = error2;
865 if (need_i_mutex)
866 mutex_lock(&inode->i_mutex);
867 xfs_ilock(ip, iolock);
868
869 error2 = -xfs_file_fsync(file, file->f_path.dentry,
870 (file->f_flags & __O_SYNC) ? 0 : 1);
871 if (!error)
872 error = error2;
873 }
874
875 out_unlock_internal:
876 if (ip->i_new_size) {
877 xfs_ilock(ip, XFS_ILOCK_EXCL);
878 ip->i_new_size = 0;
879 /*
880 * If this was a direct or synchronous I/O that failed (such
881 * as ENOSPC) then part of the I/O may have been written to
882 * disk before the error occured. In this case the on-disk
883 * file size may have been adjusted beyond the in-memory file
884 * size and now needs to be truncated back.
885 */
886 if (ip->i_d.di_size > ip->i_size)
887 ip->i_d.di_size = ip->i_size;
888 xfs_iunlock(ip, XFS_ILOCK_EXCL);
889 }
890 xfs_iunlock(ip, iolock);
891 out_unlock_mutex:
892 if (need_i_mutex)
893 mutex_unlock(&inode->i_mutex);
894 return -error;
117} 895}
118 896
119STATIC int 897STATIC int
@@ -160,28 +938,6 @@ xfs_file_release(
160 return -xfs_release(XFS_I(inode)); 938 return -xfs_release(XFS_I(inode));
161} 939}
162 940
163/*
164 * We ignore the datasync flag here because a datasync is effectively
165 * identical to an fsync. That is, datasync implies that we need to write
166 * only the metadata needed to be able to access the data that is written
167 * if we crash after the call completes. Hence if we are writing beyond
168 * EOF we have to log the inode size change as well, which makes it a
169 * full fsync. If we don't write beyond EOF, the inode core will be
170 * clean in memory and so we don't need to log the inode, just like
171 * fsync.
172 */
173STATIC int
174xfs_file_fsync(
175 struct file *file,
176 struct dentry *dentry,
177 int datasync)
178{
179 struct xfs_inode *ip = XFS_I(dentry->d_inode);
180
181 xfs_iflags_clear(ip, XFS_ITRUNCATED);
182 return -xfs_fsync(ip);
183}
184
185STATIC int 941STATIC int
186xfs_file_readdir( 942xfs_file_readdir(
187 struct file *filp, 943 struct file *filp,
@@ -203,9 +959,9 @@ xfs_file_readdir(
203 * 959 *
204 * Try to give it an estimate that's good enough, maybe at some 960 * Try to give it an estimate that's good enough, maybe at some
205 * point we can change the ->readdir prototype to include the 961 * point we can change the ->readdir prototype to include the
206 * buffer size. 962 * buffer size. For now we use the current glibc buffer size.
207 */ 963 */
208 bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size); 964 bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
209 965
210 error = xfs_readdir(ip, dirent, bufsize, 966 error = xfs_readdir(ip, dirent, bufsize,
211 (xfs_off_t *)&filp->f_pos, filldir); 967 (xfs_off_t *)&filp->f_pos, filldir);
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 08be36d7326c..b6918d76bc7b 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -19,6 +19,7 @@
19#include "xfs_vnodeops.h" 19#include "xfs_vnodeops.h"
20#include "xfs_bmap_btree.h" 20#include "xfs_bmap_btree.h"
21#include "xfs_inode.h" 21#include "xfs_inode.h"
22#include "xfs_trace.h"
22 23
23int fs_noerr(void) { return 0; } 24int fs_noerr(void) { return 0; }
24int fs_nosys(void) { return ENOSYS; } 25int fs_nosys(void) { return ENOSYS; }
@@ -51,6 +52,8 @@ xfs_flushinval_pages(
51 struct address_space *mapping = VFS_I(ip)->i_mapping; 52 struct address_space *mapping = VFS_I(ip)->i_mapping;
52 int ret = 0; 53 int ret = 0;
53 54
55 trace_xfs_pagecache_inval(ip, first, last);
56
54 if (mapping->nrpages) { 57 if (mapping->nrpages) {
55 xfs_iflags_clear(ip, XFS_ITRUNCATED); 58 xfs_iflags_clear(ip, XFS_ITRUNCATED);
56 ret = filemap_write_and_wait(mapping); 59 ret = filemap_write_and_wait(mapping);
@@ -76,7 +79,7 @@ xfs_flush_pages(
76 xfs_iflags_clear(ip, XFS_ITRUNCATED); 79 xfs_iflags_clear(ip, XFS_ITRUNCATED);
77 ret = -filemap_fdatawrite(mapping); 80 ret = -filemap_fdatawrite(mapping);
78 } 81 }
79 if (flags & XFS_B_ASYNC) 82 if (flags & XBF_ASYNC)
80 return ret; 83 return ret;
81 ret2 = xfs_wait_on_pages(ip, first, last); 84 ret2 = xfs_wait_on_pages(ip, first, last);
82 if (!ret) 85 if (!ret)
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 5bb523d7f37e..7b26cc2fd284 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -51,12 +51,14 @@
51#include "xfs_quota.h" 51#include "xfs_quota.h"
52#include "xfs_inode_item.h" 52#include "xfs_inode_item.h"
53#include "xfs_export.h" 53#include "xfs_export.h"
54#include "xfs_trace.h"
54 55
55#include <linux/capability.h> 56#include <linux/capability.h>
56#include <linux/dcache.h> 57#include <linux/dcache.h>
57#include <linux/mount.h> 58#include <linux/mount.h>
58#include <linux/namei.h> 59#include <linux/namei.h>
59#include <linux/pagemap.h> 60#include <linux/pagemap.h>
61#include <linux/slab.h>
60#include <linux/exportfs.h> 62#include <linux/exportfs.h>
61 63
62/* 64/*
@@ -446,12 +448,12 @@ xfs_attrlist_by_handle(
446int 448int
447xfs_attrmulti_attr_get( 449xfs_attrmulti_attr_get(
448 struct inode *inode, 450 struct inode *inode,
449 char *name, 451 unsigned char *name,
450 char __user *ubuf, 452 unsigned char __user *ubuf,
451 __uint32_t *len, 453 __uint32_t *len,
452 __uint32_t flags) 454 __uint32_t flags)
453{ 455{
454 char *kbuf; 456 unsigned char *kbuf;
455 int error = EFAULT; 457 int error = EFAULT;
456 458
457 if (*len > XATTR_SIZE_MAX) 459 if (*len > XATTR_SIZE_MAX)
@@ -475,12 +477,12 @@ xfs_attrmulti_attr_get(
475int 477int
476xfs_attrmulti_attr_set( 478xfs_attrmulti_attr_set(
477 struct inode *inode, 479 struct inode *inode,
478 char *name, 480 unsigned char *name,
479 const char __user *ubuf, 481 const unsigned char __user *ubuf,
480 __uint32_t len, 482 __uint32_t len,
481 __uint32_t flags) 483 __uint32_t flags)
482{ 484{
483 char *kbuf; 485 unsigned char *kbuf;
484 int error = EFAULT; 486 int error = EFAULT;
485 487
486 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 488 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -500,7 +502,7 @@ xfs_attrmulti_attr_set(
500int 502int
501xfs_attrmulti_attr_remove( 503xfs_attrmulti_attr_remove(
502 struct inode *inode, 504 struct inode *inode,
503 char *name, 505 unsigned char *name,
504 __uint32_t flags) 506 __uint32_t flags)
505{ 507{
506 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 508 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -518,7 +520,7 @@ xfs_attrmulti_by_handle(
518 xfs_fsop_attrmulti_handlereq_t am_hreq; 520 xfs_fsop_attrmulti_handlereq_t am_hreq;
519 struct dentry *dentry; 521 struct dentry *dentry;
520 unsigned int i, size; 522 unsigned int i, size;
521 char *attr_name; 523 unsigned char *attr_name;
522 524
523 if (!capable(CAP_SYS_ADMIN)) 525 if (!capable(CAP_SYS_ADMIN))
524 return -XFS_ERROR(EPERM); 526 return -XFS_ERROR(EPERM);
@@ -546,7 +548,7 @@ xfs_attrmulti_by_handle(
546 548
547 error = 0; 549 error = 0;
548 for (i = 0; i < am_hreq.opcount; i++) { 550 for (i = 0; i < am_hreq.opcount; i++) {
549 ops[i].am_error = strncpy_from_user(attr_name, 551 ops[i].am_error = strncpy_from_user((char *)attr_name,
550 ops[i].am_attrname, MAXNAMELEN); 552 ops[i].am_attrname, MAXNAMELEN);
551 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 553 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
552 error = -ERANGE; 554 error = -ERANGE;
@@ -1430,6 +1432,9 @@ xfs_file_ioctl(
1430 if (!capable(CAP_SYS_ADMIN)) 1432 if (!capable(CAP_SYS_ADMIN))
1431 return -EPERM; 1433 return -EPERM;
1432 1434
1435 if (mp->m_flags & XFS_MOUNT_RDONLY)
1436 return -XFS_ERROR(EROFS);
1437
1433 if (copy_from_user(&inout, arg, sizeof(inout))) 1438 if (copy_from_user(&inout, arg, sizeof(inout)))
1434 return -XFS_ERROR(EFAULT); 1439 return -XFS_ERROR(EFAULT);
1435 1440
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
index 7bd7c6afc1eb..d56173b34a2a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl.h
@@ -45,23 +45,23 @@ xfs_readlink_by_handle(
45extern int 45extern int
46xfs_attrmulti_attr_get( 46xfs_attrmulti_attr_get(
47 struct inode *inode, 47 struct inode *inode,
48 char *name, 48 unsigned char *name,
49 char __user *ubuf, 49 unsigned char __user *ubuf,
50 __uint32_t *len, 50 __uint32_t *len,
51 __uint32_t flags); 51 __uint32_t flags);
52 52
53extern int 53extern int
54 xfs_attrmulti_attr_set( 54xfs_attrmulti_attr_set(
55 struct inode *inode, 55 struct inode *inode,
56 char *name, 56 unsigned char *name,
57 const char __user *ubuf, 57 const unsigned char __user *ubuf,
58 __uint32_t len, 58 __uint32_t len,
59 __uint32_t flags); 59 __uint32_t flags);
60 60
61extern int 61extern int
62xfs_attrmulti_attr_remove( 62xfs_attrmulti_attr_remove(
63 struct inode *inode, 63 struct inode *inode,
64 char *name, 64 unsigned char *name,
65 __uint32_t flags); 65 __uint32_t flags);
66 66
67extern struct dentry * 67extern struct dentry *
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index eafcc7c18706..593c05b4df8d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -18,6 +18,7 @@
18#include <linux/compat.h> 18#include <linux/compat.h>
19#include <linux/ioctl.h> 19#include <linux/ioctl.h>
20#include <linux/mount.h> 20#include <linux/mount.h>
21#include <linux/slab.h>
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include "xfs.h" 23#include "xfs.h"
23#include "xfs_fs.h" 24#include "xfs_fs.h"
@@ -46,6 +47,7 @@
46#include "xfs_attr.h" 47#include "xfs_attr.h"
47#include "xfs_ioctl.h" 48#include "xfs_ioctl.h"
48#include "xfs_ioctl32.h" 49#include "xfs_ioctl32.h"
50#include "xfs_trace.h"
49 51
50#define _NATIVE_IOC(cmd, type) \ 52#define _NATIVE_IOC(cmd, type) \
51 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) 53 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
@@ -410,7 +412,7 @@ xfs_compat_attrmulti_by_handle(
410 compat_xfs_fsop_attrmulti_handlereq_t am_hreq; 412 compat_xfs_fsop_attrmulti_handlereq_t am_hreq;
411 struct dentry *dentry; 413 struct dentry *dentry;
412 unsigned int i, size; 414 unsigned int i, size;
413 char *attr_name; 415 unsigned char *attr_name;
414 416
415 if (!capable(CAP_SYS_ADMIN)) 417 if (!capable(CAP_SYS_ADMIN))
416 return -XFS_ERROR(EPERM); 418 return -XFS_ERROR(EPERM);
@@ -439,7 +441,7 @@ xfs_compat_attrmulti_by_handle(
439 441
440 error = 0; 442 error = 0;
441 for (i = 0; i < am_hreq.opcount; i++) { 443 for (i = 0; i < am_hreq.opcount; i++) {
442 ops[i].am_error = strncpy_from_user(attr_name, 444 ops[i].am_error = strncpy_from_user((char *)attr_name,
443 compat_ptr(ops[i].am_attrname), 445 compat_ptr(ops[i].am_attrname),
444 MAXNAMELEN); 446 MAXNAMELEN);
445 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 447 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index cd42ef78f6b5..e65a7937f3a4 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -47,6 +47,7 @@
47#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
50#include "xfs_trace.h"
50 51
51#include <linux/capability.h> 52#include <linux/capability.h>
52#include <linux/xattr.h> 53#include <linux/xattr.h>
@@ -55,6 +56,7 @@
55#include <linux/security.h> 56#include <linux/security.h>
56#include <linux/falloc.h> 57#include <linux/falloc.h>
57#include <linux/fiemap.h> 58#include <linux/fiemap.h>
59#include <linux/slab.h>
58 60
59/* 61/*
60 * Bring the timestamps in the XFS inode uptodate. 62 * Bring the timestamps in the XFS inode uptodate.
@@ -90,6 +92,16 @@ xfs_mark_inode_dirty_sync(
90 mark_inode_dirty_sync(inode); 92 mark_inode_dirty_sync(inode);
91} 93}
92 94
95void
96xfs_mark_inode_dirty(
97 xfs_inode_t *ip)
98{
99 struct inode *inode = VFS_I(ip);
100
101 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
102 mark_inode_dirty(inode);
103}
104
93/* 105/*
94 * Change the requested timestamp in the given inode. 106 * Change the requested timestamp in the given inode.
95 * We don't lock across timestamp updates, and we don't log them but 107 * We don't lock across timestamp updates, and we don't log them but
@@ -139,10 +151,10 @@ xfs_init_security(
139 struct xfs_inode *ip = XFS_I(inode); 151 struct xfs_inode *ip = XFS_I(inode);
140 size_t length; 152 size_t length;
141 void *value; 153 void *value;
142 char *name; 154 unsigned char *name;
143 int error; 155 int error;
144 156
145 error = security_inode_init_security(inode, dir, &name, 157 error = security_inode_init_security(inode, dir, (char **)&name,
146 &value, &length); 158 &value, &length);
147 if (error) { 159 if (error) {
148 if (error == -EOPNOTSUPP) 160 if (error == -EOPNOTSUPP)
@@ -573,8 +585,8 @@ xfs_vn_fallocate(
573 bf.l_len = len; 585 bf.l_len = len;
574 586
575 xfs_ilock(ip, XFS_IOLOCK_EXCL); 587 xfs_ilock(ip, XFS_IOLOCK_EXCL);
576 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 588 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
577 0, XFS_ATTR_NOLOCK); 589 0, XFS_ATTR_NOLOCK);
578 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 590 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
579 offset + len > i_size_read(inode)) 591 offset + len > i_size_read(inode))
580 new_size = offset + len; 592 new_size = offset + len;
@@ -585,7 +597,7 @@ xfs_vn_fallocate(
585 597
586 iattr.ia_valid = ATTR_SIZE; 598 iattr.ia_valid = ATTR_SIZE;
587 iattr.ia_size = new_size; 599 iattr.ia_size = new_size;
588 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); 600 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
589 } 601 }
590 602
591 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 603 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -793,7 +805,7 @@ xfs_setup_inode(
793 struct inode *inode = &ip->i_vnode; 805 struct inode *inode = &ip->i_vnode;
794 806
795 inode->i_ino = ip->i_ino; 807 inode->i_ino = ip->i_ino;
796 inode->i_state = I_NEW|I_LOCK; 808 inode->i_state = I_NEW;
797 inode_add_to_lists(ip->i_mount->m_super, inode); 809 inode_add_to_lists(ip->i_mount->m_super, inode);
798 810
799 inode->i_mode = ip->i_d.di_mode; 811 inode->i_mode = ip->i_d.di_mode;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 6127e24062d0..facfb323a706 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -40,7 +40,6 @@
40#include <sv.h> 40#include <sv.h>
41#include <time.h> 41#include <time.h>
42 42
43#include <support/ktrace.h>
44#include <support/debug.h> 43#include <support/debug.h>
45#include <support/uuid.h> 44#include <support/uuid.h>
46 45
@@ -89,7 +88,6 @@
89#include <xfs_super.h> 88#include <xfs_super.h>
90#include <xfs_globals.h> 89#include <xfs_globals.h>
91#include <xfs_fs_subr.h> 90#include <xfs_fs_subr.h>
92#include <xfs_lrw.h>
93#include <xfs_buf.h> 91#include <xfs_buf.h>
94 92
95/* 93/*
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
deleted file mode 100644
index 072050f8d346..000000000000
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ /dev/null
@@ -1,922 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_bit.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h"
30#include "xfs_mount.h"
31#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h"
38#include "xfs_bmap.h"
39#include "xfs_btree.h"
40#include "xfs_ialloc.h"
41#include "xfs_rtalloc.h"
42#include "xfs_error.h"
43#include "xfs_itable.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h"
46#include "xfs_inode_item.h"
47#include "xfs_buf_item.h"
48#include "xfs_utils.h"
49#include "xfs_iomap.h"
50#include "xfs_vnodeops.h"
51
52#include <linux/capability.h>
53#include <linux/writeback.h>
54
55
56#if defined(XFS_RW_TRACE)
57void
58xfs_rw_enter_trace(
59 int tag,
60 xfs_inode_t *ip,
61 void *data,
62 size_t segs,
63 loff_t offset,
64 int ioflags)
65{
66 if (ip->i_rwtrace == NULL)
67 return;
68 ktrace_enter(ip->i_rwtrace,
69 (void *)(unsigned long)tag,
70 (void *)ip,
71 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
72 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
73 (void *)data,
74 (void *)((unsigned long)segs),
75 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
76 (void *)((unsigned long)(offset & 0xffffffff)),
77 (void *)((unsigned long)ioflags),
78 (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
79 (void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
80 (void *)((unsigned long)current_pid()),
81 (void *)NULL,
82 (void *)NULL,
83 (void *)NULL,
84 (void *)NULL);
85}
86
87void
88xfs_inval_cached_trace(
89 xfs_inode_t *ip,
90 xfs_off_t offset,
91 xfs_off_t len,
92 xfs_off_t first,
93 xfs_off_t last)
94{
95
96 if (ip->i_rwtrace == NULL)
97 return;
98 ktrace_enter(ip->i_rwtrace,
99 (void *)(__psint_t)XFS_INVAL_CACHED,
100 (void *)ip,
101 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
102 (void *)((unsigned long)(offset & 0xffffffff)),
103 (void *)((unsigned long)((len >> 32) & 0xffffffff)),
104 (void *)((unsigned long)(len & 0xffffffff)),
105 (void *)((unsigned long)((first >> 32) & 0xffffffff)),
106 (void *)((unsigned long)(first & 0xffffffff)),
107 (void *)((unsigned long)((last >> 32) & 0xffffffff)),
108 (void *)((unsigned long)(last & 0xffffffff)),
109 (void *)((unsigned long)current_pid()),
110 (void *)NULL,
111 (void *)NULL,
112 (void *)NULL,
113 (void *)NULL,
114 (void *)NULL);
115}
116#endif
117
118/*
119 * xfs_iozero
120 *
121 * xfs_iozero clears the specified range of buffer supplied,
122 * and marks all the affected blocks as valid and modified. If
123 * an affected block is not allocated, it will be allocated. If
124 * an affected block is not completely overwritten, and is not
125 * valid before the operation, it will be read from disk before
126 * being partially zeroed.
127 */
128STATIC int
129xfs_iozero(
130 struct xfs_inode *ip, /* inode */
131 loff_t pos, /* offset in file */
132 size_t count) /* size of data to zero */
133{
134 struct page *page;
135 struct address_space *mapping;
136 int status;
137
138 mapping = VFS_I(ip)->i_mapping;
139 do {
140 unsigned offset, bytes;
141 void *fsdata;
142
143 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
144 bytes = PAGE_CACHE_SIZE - offset;
145 if (bytes > count)
146 bytes = count;
147
148 status = pagecache_write_begin(NULL, mapping, pos, bytes,
149 AOP_FLAG_UNINTERRUPTIBLE,
150 &page, &fsdata);
151 if (status)
152 break;
153
154 zero_user(page, offset, bytes);
155
156 status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
157 page, fsdata);
158 WARN_ON(status <= 0); /* can't return less than zero! */
159 pos += bytes;
160 count -= bytes;
161 status = 0;
162 } while (count);
163
164 return (-status);
165}
166
167ssize_t /* bytes read, or (-) error */
168xfs_read(
169 xfs_inode_t *ip,
170 struct kiocb *iocb,
171 const struct iovec *iovp,
172 unsigned int segs,
173 loff_t *offset,
174 int ioflags)
175{
176 struct file *file = iocb->ki_filp;
177 struct inode *inode = file->f_mapping->host;
178 xfs_mount_t *mp = ip->i_mount;
179 size_t size = 0;
180 ssize_t ret = 0;
181 xfs_fsize_t n;
182 unsigned long seg;
183
184
185 XFS_STATS_INC(xs_read_calls);
186
187 /* START copy & waste from filemap.c */
188 for (seg = 0; seg < segs; seg++) {
189 const struct iovec *iv = &iovp[seg];
190
191 /*
192 * If any segment has a negative length, or the cumulative
193 * length ever wraps negative then return -EINVAL.
194 */
195 size += iv->iov_len;
196 if (unlikely((ssize_t)(size|iv->iov_len) < 0))
197 return XFS_ERROR(-EINVAL);
198 }
199 /* END copy & waste from filemap.c */
200
201 if (unlikely(ioflags & IO_ISDIRECT)) {
202 xfs_buftarg_t *target =
203 XFS_IS_REALTIME_INODE(ip) ?
204 mp->m_rtdev_targp : mp->m_ddev_targp;
205 if ((*offset & target->bt_smask) ||
206 (size & target->bt_smask)) {
207 if (*offset == ip->i_size) {
208 return (0);
209 }
210 return -XFS_ERROR(EINVAL);
211 }
212 }
213
214 n = XFS_MAXIOFFSET(mp) - *offset;
215 if ((n <= 0) || (size == 0))
216 return 0;
217
218 if (n < size)
219 size = n;
220
221 if (XFS_FORCED_SHUTDOWN(mp))
222 return -EIO;
223
224 if (unlikely(ioflags & IO_ISDIRECT))
225 mutex_lock(&inode->i_mutex);
226 xfs_ilock(ip, XFS_IOLOCK_SHARED);
227
228 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
229 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
230 int iolock = XFS_IOLOCK_SHARED;
231
232 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
233 dmflags, &iolock);
234 if (ret) {
235 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
236 if (unlikely(ioflags & IO_ISDIRECT))
237 mutex_unlock(&inode->i_mutex);
238 return ret;
239 }
240 }
241
242 if (unlikely(ioflags & IO_ISDIRECT)) {
243 if (inode->i_mapping->nrpages)
244 ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
245 -1, FI_REMAPF_LOCKED);
246 mutex_unlock(&inode->i_mutex);
247 if (ret) {
248 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
249 return ret;
250 }
251 }
252
253 xfs_rw_enter_trace(XFS_READ_ENTER, ip,
254 (void *)iovp, segs, *offset, ioflags);
255
256 iocb->ki_pos = *offset;
257 ret = generic_file_aio_read(iocb, iovp, segs, *offset);
258 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
259 ret = wait_on_sync_kiocb(iocb);
260 if (ret > 0)
261 XFS_STATS_ADD(xs_read_bytes, ret);
262
263 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
264 return ret;
265}
266
267ssize_t
268xfs_splice_read(
269 xfs_inode_t *ip,
270 struct file *infilp,
271 loff_t *ppos,
272 struct pipe_inode_info *pipe,
273 size_t count,
274 int flags,
275 int ioflags)
276{
277 xfs_mount_t *mp = ip->i_mount;
278 ssize_t ret;
279
280 XFS_STATS_INC(xs_read_calls);
281 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
282 return -EIO;
283
284 xfs_ilock(ip, XFS_IOLOCK_SHARED);
285
286 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
287 int iolock = XFS_IOLOCK_SHARED;
288 int error;
289
290 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
291 FILP_DELAY_FLAG(infilp), &iolock);
292 if (error) {
293 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
294 return -error;
295 }
296 }
297 xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip,
298 pipe, count, *ppos, ioflags);
299 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
300 if (ret > 0)
301 XFS_STATS_ADD(xs_read_bytes, ret);
302
303 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
304 return ret;
305}
306
307ssize_t
308xfs_splice_write(
309 xfs_inode_t *ip,
310 struct pipe_inode_info *pipe,
311 struct file *outfilp,
312 loff_t *ppos,
313 size_t count,
314 int flags,
315 int ioflags)
316{
317 xfs_mount_t *mp = ip->i_mount;
318 ssize_t ret;
319 struct inode *inode = outfilp->f_mapping->host;
320 xfs_fsize_t isize, new_size;
321
322 XFS_STATS_INC(xs_write_calls);
323 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
324 return -EIO;
325
326 xfs_ilock(ip, XFS_IOLOCK_EXCL);
327
328 if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
329 int iolock = XFS_IOLOCK_EXCL;
330 int error;
331
332 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
333 FILP_DELAY_FLAG(outfilp), &iolock);
334 if (error) {
335 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
336 return -error;
337 }
338 }
339
340 new_size = *ppos + count;
341
342 xfs_ilock(ip, XFS_ILOCK_EXCL);
343 if (new_size > ip->i_size)
344 ip->i_new_size = new_size;
345 xfs_iunlock(ip, XFS_ILOCK_EXCL);
346
347 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip,
348 pipe, count, *ppos, ioflags);
349 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
350 if (ret > 0)
351 XFS_STATS_ADD(xs_write_bytes, ret);
352
353 isize = i_size_read(inode);
354 if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
355 *ppos = isize;
356
357 if (*ppos > ip->i_size) {
358 xfs_ilock(ip, XFS_ILOCK_EXCL);
359 if (*ppos > ip->i_size)
360 ip->i_size = *ppos;
361 xfs_iunlock(ip, XFS_ILOCK_EXCL);
362 }
363
364 if (ip->i_new_size) {
365 xfs_ilock(ip, XFS_ILOCK_EXCL);
366 ip->i_new_size = 0;
367 if (ip->i_d.di_size > ip->i_size)
368 ip->i_d.di_size = ip->i_size;
369 xfs_iunlock(ip, XFS_ILOCK_EXCL);
370 }
371 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
372 return ret;
373}
374
375/*
376 * This routine is called to handle zeroing any space in the last
377 * block of the file that is beyond the EOF. We do this since the
378 * size is being increased without writing anything to that block
379 * and we don't want anyone to read the garbage on the disk.
380 */
381STATIC int /* error (positive) */
382xfs_zero_last_block(
383 xfs_inode_t *ip,
384 xfs_fsize_t offset,
385 xfs_fsize_t isize)
386{
387 xfs_fileoff_t last_fsb;
388 xfs_mount_t *mp = ip->i_mount;
389 int nimaps;
390 int zero_offset;
391 int zero_len;
392 int error = 0;
393 xfs_bmbt_irec_t imap;
394
395 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
396
397 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
398 if (zero_offset == 0) {
399 /*
400 * There are no extra bytes in the last block on disk to
401 * zero, so return.
402 */
403 return 0;
404 }
405
406 last_fsb = XFS_B_TO_FSBT(mp, isize);
407 nimaps = 1;
408 error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
409 &nimaps, NULL, NULL);
410 if (error) {
411 return error;
412 }
413 ASSERT(nimaps > 0);
414 /*
415 * If the block underlying isize is just a hole, then there
416 * is nothing to zero.
417 */
418 if (imap.br_startblock == HOLESTARTBLOCK) {
419 return 0;
420 }
421 /*
422 * Zero the part of the last block beyond the EOF, and write it
423 * out sync. We need to drop the ilock while we do this so we
424 * don't deadlock when the buffer cache calls back to us.
425 */
426 xfs_iunlock(ip, XFS_ILOCK_EXCL);
427
428 zero_len = mp->m_sb.sb_blocksize - zero_offset;
429 if (isize + zero_len > offset)
430 zero_len = offset - isize;
431 error = xfs_iozero(ip, isize, zero_len);
432
433 xfs_ilock(ip, XFS_ILOCK_EXCL);
434 ASSERT(error >= 0);
435 return error;
436}
437
438/*
439 * Zero any on disk space between the current EOF and the new,
440 * larger EOF. This handles the normal case of zeroing the remainder
441 * of the last block in the file and the unusual case of zeroing blocks
442 * out beyond the size of the file. This second case only happens
443 * with fixed size extents and when the system crashes before the inode
444 * size was updated but after blocks were allocated. If fill is set,
445 * then any holes in the range are filled and zeroed. If not, the holes
446 * are left alone as holes.
447 */
448
449int /* error (positive) */
450xfs_zero_eof(
451 xfs_inode_t *ip,
452 xfs_off_t offset, /* starting I/O offset */
453 xfs_fsize_t isize) /* current inode size */
454{
455 xfs_mount_t *mp = ip->i_mount;
456 xfs_fileoff_t start_zero_fsb;
457 xfs_fileoff_t end_zero_fsb;
458 xfs_fileoff_t zero_count_fsb;
459 xfs_fileoff_t last_fsb;
460 xfs_fileoff_t zero_off;
461 xfs_fsize_t zero_len;
462 int nimaps;
463 int error = 0;
464 xfs_bmbt_irec_t imap;
465
466 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
467 ASSERT(offset > isize);
468
469 /*
470 * First handle zeroing the block on which isize resides.
471 * We only zero a part of that block so it is handled specially.
472 */
473 error = xfs_zero_last_block(ip, offset, isize);
474 if (error) {
475 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
476 return error;
477 }
478
479 /*
480 * Calculate the range between the new size and the old
481 * where blocks needing to be zeroed may exist. To get the
482 * block where the last byte in the file currently resides,
483 * we need to subtract one from the size and truncate back
484 * to a block boundary. We subtract 1 in case the size is
485 * exactly on a block boundary.
486 */
487 last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
488 start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
489 end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
490 ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
491 if (last_fsb == end_zero_fsb) {
492 /*
493 * The size was only incremented on its last block.
494 * We took care of that above, so just return.
495 */
496 return 0;
497 }
498
499 ASSERT(start_zero_fsb <= end_zero_fsb);
500 while (start_zero_fsb <= end_zero_fsb) {
501 nimaps = 1;
502 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
503 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
504 0, NULL, 0, &imap, &nimaps, NULL, NULL);
505 if (error) {
506 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
507 return error;
508 }
509 ASSERT(nimaps > 0);
510
511 if (imap.br_state == XFS_EXT_UNWRITTEN ||
512 imap.br_startblock == HOLESTARTBLOCK) {
513 /*
514 * This loop handles initializing pages that were
515 * partially initialized by the code below this
516 * loop. It basically zeroes the part of the page
517 * that sits on a hole and sets the page as P_HOLE
518 * and calls remapf if it is a mapped file.
519 */
520 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
521 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
522 continue;
523 }
524
525 /*
526 * There are blocks we need to zero.
527 * Drop the inode lock while we're doing the I/O.
528 * We'll still have the iolock to protect us.
529 */
530 xfs_iunlock(ip, XFS_ILOCK_EXCL);
531
532 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
533 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
534
535 if ((zero_off + zero_len) > offset)
536 zero_len = offset - zero_off;
537
538 error = xfs_iozero(ip, zero_off, zero_len);
539 if (error) {
540 goto out_lock;
541 }
542
543 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
544 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
545
546 xfs_ilock(ip, XFS_ILOCK_EXCL);
547 }
548
549 return 0;
550
551out_lock:
552 xfs_ilock(ip, XFS_ILOCK_EXCL);
553 ASSERT(error >= 0);
554 return error;
555}
556
557ssize_t /* bytes written, or (-) error */
558xfs_write(
559 struct xfs_inode *xip,
560 struct kiocb *iocb,
561 const struct iovec *iovp,
562 unsigned int nsegs,
563 loff_t *offset,
564 int ioflags)
565{
566 struct file *file = iocb->ki_filp;
567 struct address_space *mapping = file->f_mapping;
568 struct inode *inode = mapping->host;
569 unsigned long segs = nsegs;
570 xfs_mount_t *mp;
571 ssize_t ret = 0, error = 0;
572 xfs_fsize_t isize, new_size;
573 int iolock;
574 int eventsent = 0;
575 size_t ocount = 0, count;
576 loff_t pos;
577 int need_i_mutex;
578
579 XFS_STATS_INC(xs_write_calls);
580
581 error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
582 if (error)
583 return error;
584
585 count = ocount;
586 pos = *offset;
587
588 if (count == 0)
589 return 0;
590
591 mp = xip->i_mount;
592
593 xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
594
595 if (XFS_FORCED_SHUTDOWN(mp))
596 return -EIO;
597
598relock:
599 if (ioflags & IO_ISDIRECT) {
600 iolock = XFS_IOLOCK_SHARED;
601 need_i_mutex = 0;
602 } else {
603 iolock = XFS_IOLOCK_EXCL;
604 need_i_mutex = 1;
605 mutex_lock(&inode->i_mutex);
606 }
607
608 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
609
610start:
611 error = -generic_write_checks(file, &pos, &count,
612 S_ISBLK(inode->i_mode));
613 if (error) {
614 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
615 goto out_unlock_mutex;
616 }
617
618 if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) &&
619 !(ioflags & IO_INVIS) && !eventsent)) {
620 int dmflags = FILP_DELAY_FLAG(file);
621
622 if (need_i_mutex)
623 dmflags |= DM_FLAGS_IMUX;
624
625 xfs_iunlock(xip, XFS_ILOCK_EXCL);
626 error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
627 pos, count, dmflags, &iolock);
628 if (error) {
629 goto out_unlock_internal;
630 }
631 xfs_ilock(xip, XFS_ILOCK_EXCL);
632 eventsent = 1;
633
634 /*
635 * The iolock was dropped and reacquired in XFS_SEND_DATA
636 * so we have to recheck the size when appending.
637 * We will only "goto start;" once, since having sent the
638 * event prevents another call to XFS_SEND_DATA, which is
639 * what allows the size to change in the first place.
640 */
641 if ((file->f_flags & O_APPEND) && pos != xip->i_size)
642 goto start;
643 }
644
645 if (ioflags & IO_ISDIRECT) {
646 xfs_buftarg_t *target =
647 XFS_IS_REALTIME_INODE(xip) ?
648 mp->m_rtdev_targp : mp->m_ddev_targp;
649
650 if ((pos & target->bt_smask) || (count & target->bt_smask)) {
651 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
652 return XFS_ERROR(-EINVAL);
653 }
654
655 if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
656 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
657 iolock = XFS_IOLOCK_EXCL;
658 need_i_mutex = 1;
659 mutex_lock(&inode->i_mutex);
660 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
661 goto start;
662 }
663 }
664
665 new_size = pos + count;
666 if (new_size > xip->i_size)
667 xip->i_new_size = new_size;
668
669 if (likely(!(ioflags & IO_INVIS)))
670 file_update_time(file);
671
672 /*
673 * If the offset is beyond the size of the file, we have a couple
674 * of things to do. First, if there is already space allocated
675 * we need to either create holes or zero the disk or ...
676 *
677 * If there is a page where the previous size lands, we need
678 * to zero it out up to the new size.
679 */
680
681 if (pos > xip->i_size) {
682 error = xfs_zero_eof(xip, pos, xip->i_size);
683 if (error) {
684 xfs_iunlock(xip, XFS_ILOCK_EXCL);
685 goto out_unlock_internal;
686 }
687 }
688 xfs_iunlock(xip, XFS_ILOCK_EXCL);
689
690 /*
691 * If we're writing the file then make sure to clear the
692 * setuid and setgid bits if the process is not being run
693 * by root. This keeps people from modifying setuid and
694 * setgid binaries.
695 */
696
697 if (((xip->i_d.di_mode & S_ISUID) ||
698 ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) ==
699 (S_ISGID | S_IXGRP))) &&
700 !capable(CAP_FSETID)) {
701 error = xfs_write_clear_setuid(xip);
702 if (likely(!error))
703 error = -file_remove_suid(file);
704 if (unlikely(error)) {
705 goto out_unlock_internal;
706 }
707 }
708
709 /* We can write back this queue in page reclaim */
710 current->backing_dev_info = mapping->backing_dev_info;
711
712 if ((ioflags & IO_ISDIRECT)) {
713 if (mapping->nrpages) {
714 WARN_ON(need_i_mutex == 0);
715 xfs_inval_cached_trace(xip, pos, -1,
716 (pos & PAGE_CACHE_MASK), -1);
717 error = xfs_flushinval_pages(xip,
718 (pos & PAGE_CACHE_MASK),
719 -1, FI_REMAPF_LOCKED);
720 if (error)
721 goto out_unlock_internal;
722 }
723
724 if (need_i_mutex) {
725 /* demote the lock now the cached pages are gone */
726 xfs_ilock_demote(xip, XFS_IOLOCK_EXCL);
727 mutex_unlock(&inode->i_mutex);
728
729 iolock = XFS_IOLOCK_SHARED;
730 need_i_mutex = 0;
731 }
732
733 xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs,
734 *offset, ioflags);
735 ret = generic_file_direct_write(iocb, iovp,
736 &segs, pos, offset, count, ocount);
737
738 /*
739 * direct-io write to a hole: fall through to buffered I/O
740 * for completing the rest of the request.
741 */
742 if (ret >= 0 && ret != count) {
743 XFS_STATS_ADD(xs_write_bytes, ret);
744
745 pos += ret;
746 count -= ret;
747
748 ioflags &= ~IO_ISDIRECT;
749 xfs_iunlock(xip, iolock);
750 goto relock;
751 }
752 } else {
753 int enospc = 0;
754 ssize_t ret2 = 0;
755
756write_retry:
757 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
758 *offset, ioflags);
759 ret2 = generic_file_buffered_write(iocb, iovp, segs,
760 pos, offset, count, ret);
761 /*
762 * if we just got an ENOSPC, flush the inode now we
763 * aren't holding any page locks and retry *once*
764 */
765 if (ret2 == -ENOSPC && !enospc) {
766 error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
767 if (error)
768 goto out_unlock_internal;
769 enospc = 1;
770 goto write_retry;
771 }
772 ret = ret2;
773 }
774
775 current->backing_dev_info = NULL;
776
777 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
778 ret = wait_on_sync_kiocb(iocb);
779
780 isize = i_size_read(inode);
781 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
782 *offset = isize;
783
784 if (*offset > xip->i_size) {
785 xfs_ilock(xip, XFS_ILOCK_EXCL);
786 if (*offset > xip->i_size)
787 xip->i_size = *offset;
788 xfs_iunlock(xip, XFS_ILOCK_EXCL);
789 }
790
791 if (ret == -ENOSPC &&
792 DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
793 xfs_iunlock(xip, iolock);
794 if (need_i_mutex)
795 mutex_unlock(&inode->i_mutex);
796 error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
797 DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
798 0, 0, 0); /* Delay flag intentionally unused */
799 if (need_i_mutex)
800 mutex_lock(&inode->i_mutex);
801 xfs_ilock(xip, iolock);
802 if (error)
803 goto out_unlock_internal;
804 goto start;
805 }
806
807 error = -ret;
808 if (ret <= 0)
809 goto out_unlock_internal;
810
811 XFS_STATS_ADD(xs_write_bytes, ret);
812
813 /* Handle various SYNC-type writes */
814 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
815 loff_t end = pos + ret - 1;
816 int error2;
817
818 xfs_iunlock(xip, iolock);
819 if (need_i_mutex)
820 mutex_unlock(&inode->i_mutex);
821
822 error2 = filemap_write_and_wait_range(mapping, pos, end);
823 if (!error)
824 error = error2;
825 if (need_i_mutex)
826 mutex_lock(&inode->i_mutex);
827 xfs_ilock(xip, iolock);
828
829 error2 = xfs_fsync(xip);
830 if (!error)
831 error = error2;
832 }
833
834 out_unlock_internal:
835 if (xip->i_new_size) {
836 xfs_ilock(xip, XFS_ILOCK_EXCL);
837 xip->i_new_size = 0;
838 /*
839 * If this was a direct or synchronous I/O that failed (such
840 * as ENOSPC) then part of the I/O may have been written to
841 * disk before the error occured. In this case the on-disk
842 * file size may have been adjusted beyond the in-memory file
843 * size and now needs to be truncated back.
844 */
845 if (xip->i_d.di_size > xip->i_size)
846 xip->i_d.di_size = xip->i_size;
847 xfs_iunlock(xip, XFS_ILOCK_EXCL);
848 }
849 xfs_iunlock(xip, iolock);
850 out_unlock_mutex:
851 if (need_i_mutex)
852 mutex_unlock(&inode->i_mutex);
853 return -error;
854}
855
856/*
857 * All xfs metadata buffers except log state machine buffers
858 * get this attached as their b_bdstrat callback function.
859 * This is so that we can catch a buffer
860 * after prematurely unpinning it to forcibly shutdown the filesystem.
861 */
862int
863xfs_bdstrat_cb(struct xfs_buf *bp)
864{
865 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
866 xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
867 /*
868 * Metadata write that didn't get logged but
869 * written delayed anyway. These aren't associated
870 * with a transaction, and can be ignored.
871 */
872 if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
873 (XFS_BUF_ISREAD(bp)) == 0)
874 return (xfs_bioerror_relse(bp));
875 else
876 return (xfs_bioerror(bp));
877 }
878
879 xfs_buf_iorequest(bp);
880 return 0;
881}
882
883/*
884 * Wrapper around bdstrat so that we can stop data from going to disk in case
885 * we are shutting down the filesystem. Typically user data goes thru this
886 * path; one of the exceptions is the superblock.
887 */
888void
889xfsbdstrat(
890 struct xfs_mount *mp,
891 struct xfs_buf *bp)
892{
893 ASSERT(mp);
894 if (!XFS_FORCED_SHUTDOWN(mp)) {
895 xfs_buf_iorequest(bp);
896 return;
897 }
898
899 xfs_buftrace("XFSBDSTRAT IOERROR", bp);
900 xfs_bioerror_relse(bp);
901}
902
903/*
904 * If the underlying (data/log/rt) device is readonly, there are some
905 * operations that cannot proceed.
906 */
907int
908xfs_dev_is_read_only(
909 xfs_mount_t *mp,
910 char *message)
911{
912 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
913 xfs_readonly_buftarg(mp->m_logdev_targp) ||
914 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
915 cmn_err(CE_NOTE,
916 "XFS: %s required on read-only device.", message);
917 cmn_err(CE_NOTE,
918 "XFS: write access unavailable, cannot proceed.");
919 return EROFS;
920 }
921 return 0;
922}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
deleted file mode 100644
index e6be37dbd0e9..000000000000
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ /dev/null
@@ -1,77 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_LRW_H__
19#define __XFS_LRW_H__
20
21struct xfs_mount;
22struct xfs_inode;
23struct xfs_bmbt_irec;
24struct xfs_buf;
25struct xfs_iomap;
26
27#if defined(XFS_RW_TRACE)
28/*
29 * Defines for the trace mechanisms in xfs_lrw.c.
30 */
31#define XFS_RW_KTRACE_SIZE 128
32
33#define XFS_READ_ENTER 1
34#define XFS_WRITE_ENTER 2
35#define XFS_IOMAP_READ_ENTER 3
36#define XFS_IOMAP_WRITE_ENTER 4
37#define XFS_IOMAP_READ_MAP 5
38#define XFS_IOMAP_WRITE_MAP 6
39#define XFS_IOMAP_WRITE_NOSPACE 7
40#define XFS_ITRUNC_START 8
41#define XFS_ITRUNC_FINISH1 9
42#define XFS_ITRUNC_FINISH2 10
43#define XFS_CTRUNC1 11
44#define XFS_CTRUNC2 12
45#define XFS_CTRUNC3 13
46#define XFS_CTRUNC4 14
47#define XFS_CTRUNC5 15
48#define XFS_CTRUNC6 16
49#define XFS_BUNMAP 17
50#define XFS_INVAL_CACHED 18
51#define XFS_DIORD_ENTER 19
52#define XFS_DIOWR_ENTER 20
53#define XFS_WRITEPAGE_ENTER 22
54#define XFS_RELEASEPAGE_ENTER 23
55#define XFS_INVALIDPAGE_ENTER 24
56#define XFS_IOMAP_ALLOC_ENTER 25
57#define XFS_IOMAP_ALLOC_MAP 26
58#define XFS_IOMAP_UNWRITTEN 27
59#define XFS_SPLICE_READ_ENTER 28
60#define XFS_SPLICE_WRITE_ENTER 29
61extern void xfs_rw_enter_trace(int, struct xfs_inode *,
62 void *, size_t, loff_t, int);
63extern void xfs_inval_cached_trace(struct xfs_inode *,
64 xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
65#else
66#define xfs_rw_enter_trace(tag, ip, data, size, offset, ioflags)
67#define xfs_inval_cached_trace(ip, offset, len, first, last)
68#endif
69
70/* errors from xfsbdstrat() must be extracted from the buffer */
71extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
72extern int xfs_bdstrat_cb(struct xfs_buf *);
73extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
74
75extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
76
77#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 3d4a0c84d634..1947514ce1ad 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -44,20 +44,6 @@ xfs_quota_type(int type)
44} 44}
45 45
46STATIC int 46STATIC int
47xfs_fs_quota_sync(
48 struct super_block *sb,
49 int type)
50{
51 struct xfs_mount *mp = XFS_M(sb);
52
53 if (sb->s_flags & MS_RDONLY)
54 return -EROFS;
55 if (!XFS_IS_QUOTA_RUNNING(mp))
56 return -ENOSYS;
57 return -xfs_sync_data(mp, 0);
58}
59
60STATIC int
61xfs_fs_get_xstate( 47xfs_fs_get_xstate(
62 struct super_block *sb, 48 struct super_block *sb,
63 struct fs_quota_stat *fqs) 49 struct fs_quota_stat *fqs)
@@ -82,8 +68,6 @@ xfs_fs_set_xstate(
82 return -EROFS; 68 return -EROFS;
83 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) 69 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
84 return -ENOSYS; 70 return -ENOSYS;
85 if (!capable(CAP_SYS_ADMIN))
86 return -EPERM;
87 71
88 if (uflags & XFS_QUOTA_UDQ_ACCT) 72 if (uflags & XFS_QUOTA_UDQ_ACCT)
89 flags |= XFS_UQUOTA_ACCT; 73 flags |= XFS_UQUOTA_ACCT;
@@ -144,14 +128,11 @@ xfs_fs_set_xquota(
144 return -ENOSYS; 128 return -ENOSYS;
145 if (!XFS_IS_QUOTA_ON(mp)) 129 if (!XFS_IS_QUOTA_ON(mp))
146 return -ESRCH; 130 return -ESRCH;
147 if (!capable(CAP_SYS_ADMIN))
148 return -EPERM;
149 131
150 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); 132 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
151} 133}
152 134
153const struct quotactl_ops xfs_quotactl_operations = { 135const struct quotactl_ops xfs_quotactl_operations = {
154 .quota_sync = xfs_fs_quota_sync,
155 .get_xstate = xfs_fs_get_xstate, 136 .get_xstate = xfs_fs_get_xstate,
156 .set_xstate = xfs_fs_set_xstate, 137 .set_xstate = xfs_fs_set_xstate,
157 .get_xquota = xfs_fs_get_xquota, 138 .get_xquota = xfs_fs_get_xquota,
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 18a4b8e11df2..29f1edca76de 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -15,6 +15,7 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
18#include "xfs.h" 19#include "xfs.h"
19#include "xfs_bit.h" 20#include "xfs_bit.h"
20#include "xfs_log.h" 21#include "xfs_log.h"
@@ -52,14 +53,15 @@
52#include "xfs_trans_priv.h" 53#include "xfs_trans_priv.h"
53#include "xfs_filestream.h" 54#include "xfs_filestream.h"
54#include "xfs_da_btree.h" 55#include "xfs_da_btree.h"
55#include "xfs_dir2_trace.h"
56#include "xfs_extfree_item.h" 56#include "xfs_extfree_item.h"
57#include "xfs_mru_cache.h" 57#include "xfs_mru_cache.h"
58#include "xfs_inode_item.h" 58#include "xfs_inode_item.h"
59#include "xfs_sync.h" 59#include "xfs_sync.h"
60#include "xfs_trace.h"
60 61
61#include <linux/namei.h> 62#include <linux/namei.h>
62#include <linux/init.h> 63#include <linux/init.h>
64#include <linux/slab.h>
63#include <linux/mount.h> 65#include <linux/mount.h>
64#include <linux/mempool.h> 66#include <linux/mempool.h>
65#include <linux/writeback.h> 67#include <linux/writeback.h>
@@ -876,12 +878,11 @@ xfsaild(
876{ 878{
877 struct xfs_ail *ailp = data; 879 struct xfs_ail *ailp = data;
878 xfs_lsn_t last_pushed_lsn = 0; 880 xfs_lsn_t last_pushed_lsn = 0;
879 long tout = 0; 881 long tout = 0; /* milliseconds */
880 882
881 while (!kthread_should_stop()) { 883 while (!kthread_should_stop()) {
882 if (tout) 884 schedule_timeout_interruptible(tout ?
883 schedule_timeout_interruptible(msecs_to_jiffies(tout)); 885 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
884 tout = 1000;
885 886
886 /* swsusp */ 887 /* swsusp */
887 try_to_freeze(); 888 try_to_freeze();
@@ -930,13 +931,37 @@ xfs_fs_alloc_inode(
930 */ 931 */
931STATIC void 932STATIC void
932xfs_fs_destroy_inode( 933xfs_fs_destroy_inode(
933 struct inode *inode) 934 struct inode *inode)
934{ 935{
935 xfs_inode_t *ip = XFS_I(inode); 936 struct xfs_inode *ip = XFS_I(inode);
937
938 xfs_itrace_entry(ip);
936 939
937 XFS_STATS_INC(vn_reclaim); 940 XFS_STATS_INC(vn_reclaim);
938 if (xfs_reclaim(ip)) 941
939 panic("%s: cannot reclaim 0x%p\n", __func__, inode); 942 /* bad inode, get out here ASAP */
943 if (is_bad_inode(inode))
944 goto out_reclaim;
945
946 xfs_ioend_wait(ip);
947
948 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
949
950 /*
951 * We should never get here with one of the reclaim flags already set.
952 */
953 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
955
956 /*
957 * We always use background reclaim here because even if the
958 * inode is clean, it still may be under IO and hence we have
959 * to take the flush lock. The background reclaim path handles
960 * this more efficiently than we can here, so simply let background
961 * reclaim tear down all inodes.
962 */
963out_reclaim:
964 xfs_inode_set_reclaim_tag(ip);
940} 965}
941 966
942/* 967/*
@@ -973,7 +998,6 @@ xfs_fs_inode_init_once(
973 998
974 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 999 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
975 "xfsino", ip->i_ino); 1000 "xfsino", ip->i_ino);
976 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
977} 1001}
978 1002
979/* 1003/*
@@ -998,59 +1022,108 @@ xfs_fs_dirty_inode(
998 XFS_I(inode)->i_update_core = 1; 1022 XFS_I(inode)->i_update_core = 1;
999} 1023}
1000 1024
1001/* 1025STATIC int
1002 * Attempt to flush the inode, this will actually fail 1026xfs_log_inode(
1003 * if the inode is pinned, but we dirty the inode again 1027 struct xfs_inode *ip)
1004 * at the point when it is unpinned after a log write, 1028{
1005 * since this is when the inode itself becomes flushable. 1029 struct xfs_mount *mp = ip->i_mount;
1006 */ 1030 struct xfs_trans *tp;
1031 int error;
1032
1033 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1034 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1035 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
1036
1037 if (error) {
1038 xfs_trans_cancel(tp, 0);
1039 /* we need to return with the lock hold shared */
1040 xfs_ilock(ip, XFS_ILOCK_SHARED);
1041 return error;
1042 }
1043
1044 xfs_ilock(ip, XFS_ILOCK_EXCL);
1045
1046 /*
1047 * Note - it's possible that we might have pushed ourselves out of the
1048 * way during trans_reserve which would flush the inode. But there's
1049 * no guarantee that the inode buffer has actually gone out yet (it's
1050 * delwri). Plus the buffer could be pinned anyway if it's part of
1051 * an inode in another recent transaction. So we play it safe and
1052 * fire off the transaction anyway.
1053 */
1054 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1055 xfs_trans_ihold(tp, ip);
1056 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1057 xfs_trans_set_sync(tp);
1058 error = xfs_trans_commit(tp, 0);
1059 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1060
1061 return error;
1062}
1063
1007STATIC int 1064STATIC int
1008xfs_fs_write_inode( 1065xfs_fs_write_inode(
1009 struct inode *inode, 1066 struct inode *inode,
1010 int sync) 1067 struct writeback_control *wbc)
1011{ 1068{
1012 struct xfs_inode *ip = XFS_I(inode); 1069 struct xfs_inode *ip = XFS_I(inode);
1013 struct xfs_mount *mp = ip->i_mount; 1070 struct xfs_mount *mp = ip->i_mount;
1014 int error = 0; 1071 int error = EAGAIN;
1015 1072
1016 xfs_itrace_entry(ip); 1073 xfs_itrace_entry(ip);
1017 1074
1018 if (XFS_FORCED_SHUTDOWN(mp)) 1075 if (XFS_FORCED_SHUTDOWN(mp))
1019 return XFS_ERROR(EIO); 1076 return XFS_ERROR(EIO);
1020 1077
1021 if (sync) { 1078 if (wbc->sync_mode == WB_SYNC_ALL) {
1022 error = xfs_wait_on_pages(ip, 0, -1); 1079 /*
1023 if (error) 1080 * Make sure the inode has hit stable storage. By using the
1081 * log and the fsync transactions we reduce the IOs we have
1082 * to do here from two (log and inode) to just the log.
1083 *
1084 * Note: We still need to do a delwri write of the inode after
1085 * this to flush it to the backing buffer so that bulkstat
1086 * works properly if this is the first time the inode has been
1087 * written. Because we hold the ilock atomically over the
1088 * transaction commit and the inode flush we are guaranteed
1089 * that the inode is not pinned when it returns. If the flush
1090 * lock is already held, then the inode has already been
1091 * flushed once and we don't need to flush it again. Hence
1092 * the code will only flush the inode if it isn't already
1093 * being flushed.
1094 */
1095 xfs_ilock(ip, XFS_ILOCK_SHARED);
1096 if (ip->i_update_core) {
1097 error = xfs_log_inode(ip);
1098 if (error)
1099 goto out_unlock;
1100 }
1101 } else {
1102 /*
1103 * We make this non-blocking if the inode is contended, return
1104 * EAGAIN to indicate to the caller that they did not succeed.
1105 * This prevents the flush path from blocking on inodes inside
1106 * another operation right now, they get caught later by xfs_sync.
1107 */
1108 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1024 goto out; 1109 goto out;
1025 } 1110 }
1026 1111
1027 /* 1112 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1028 * Bypass inodes which have already been cleaned by 1113 goto out_unlock;
1029 * the inode flush clustering code inside xfs_iflush
1030 */
1031 if (xfs_inode_clean(ip))
1032 goto out;
1033 1114
1034 /* 1115 /*
1035 * We make this non-blocking if the inode is contended, return 1116 * Now we have the flush lock and the inode is not pinned, we can check
1036 * EAGAIN to indicate to the caller that they did not succeed. 1117 * if the inode is really clean as we know that there are no pending
1037 * This prevents the flush path from blocking on inodes inside 1118 * transaction completions, it is not waiting on the delayed write
1038 * another operation right now, they get caught later by xfs_sync. 1119 * queue and there is no IO in progress.
1039 */ 1120 */
1040 if (sync) { 1121 if (xfs_inode_clean(ip)) {
1041 xfs_ilock(ip, XFS_ILOCK_SHARED); 1122 xfs_ifunlock(ip);
1042 xfs_iflock(ip); 1123 error = 0;
1043 1124 goto out_unlock;
1044 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
1045 } else {
1046 error = EAGAIN;
1047 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1048 goto out;
1049 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1050 goto out_unlock;
1051
1052 error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK);
1053 } 1125 }
1126 error = xfs_iflush(ip, 0);
1054 1127
1055 out_unlock: 1128 out_unlock:
1056 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1129 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1075,6 +1148,20 @@ xfs_fs_clear_inode(
1075 XFS_STATS_INC(vn_remove); 1148 XFS_STATS_INC(vn_remove);
1076 XFS_STATS_DEC(vn_active); 1149 XFS_STATS_DEC(vn_active);
1077 1150
1151 /*
1152 * The iolock is used by the file system to coordinate reads,
1153 * writes, and block truncates. Up to this point the lock
1154 * protected concurrent accesses by users of the inode. But
1155 * from here forward we're doing some final processing of the
1156 * inode because we're done with it, and although we reuse the
1157 * iolock for protection it is really a distinct lock class
1158 * (in the lockdep sense) from before. To keep lockdep happy
1159 * (and basically indicate what we are doing), we explicitly
1160 * re-init the iolock here.
1161 */
1162 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
1163 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
1164
1078 xfs_inactive(ip); 1165 xfs_inactive(ip);
1079} 1166}
1080 1167
@@ -1092,8 +1179,6 @@ xfs_fs_put_super(
1092 struct super_block *sb) 1179 struct super_block *sb)
1093{ 1180{
1094 struct xfs_mount *mp = XFS_M(sb); 1181 struct xfs_mount *mp = XFS_M(sb);
1095 struct xfs_inode *rip = mp->m_rootip;
1096 int unmount_event_flags = 0;
1097 1182
1098 xfs_syncd_stop(mp); 1183 xfs_syncd_stop(mp);
1099 1184
@@ -1109,20 +1194,7 @@ xfs_fs_put_super(
1109 xfs_sync_attr(mp, 0); 1194 xfs_sync_attr(mp, 0);
1110 } 1195 }
1111 1196
1112#ifdef HAVE_DMAPI 1197 XFS_SEND_PREUNMOUNT(mp);
1113 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1114 unmount_event_flags =
1115 (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
1116 0 : DM_FLAGS_UNWANTED;
1117 /*
1118 * Ignore error from dmapi here, first unmount is not allowed
1119 * to fail anyway, and second we wouldn't want to fail a
1120 * unmount because of dmapi.
1121 */
1122 XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
1123 NULL, NULL, 0, 0, unmount_event_flags);
1124 }
1125#endif
1126 1198
1127 /* 1199 /*
1128 * Blow away any referenced inode in the filestreams cache. 1200 * Blow away any referenced inode in the filestreams cache.
@@ -1133,13 +1205,11 @@ xfs_fs_put_super(
1133 1205
1134 XFS_bflush(mp->m_ddev_targp); 1206 XFS_bflush(mp->m_ddev_targp);
1135 1207
1136 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1208 XFS_SEND_UNMOUNT(mp);
1137 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
1138 unmount_event_flags);
1139 }
1140 1209
1141 xfs_unmountfs(mp); 1210 xfs_unmountfs(mp);
1142 xfs_freesb(mp); 1211 xfs_freesb(mp);
1212 xfs_inode_shrinker_unregister(mp);
1143 xfs_icsb_destroy_counters(mp); 1213 xfs_icsb_destroy_counters(mp);
1144 xfs_close_devices(mp); 1214 xfs_close_devices(mp);
1145 xfs_dmops_put(mp); 1215 xfs_dmops_put(mp);
@@ -1237,6 +1307,29 @@ xfs_fs_statfs(
1237 return 0; 1307 return 0;
1238} 1308}
1239 1309
1310STATIC void
1311xfs_save_resvblks(struct xfs_mount *mp)
1312{
1313 __uint64_t resblks = 0;
1314
1315 mp->m_resblks_save = mp->m_resblks;
1316 xfs_reserve_blocks(mp, &resblks, NULL);
1317}
1318
1319STATIC void
1320xfs_restore_resvblks(struct xfs_mount *mp)
1321{
1322 __uint64_t resblks;
1323
1324 if (mp->m_resblks_save) {
1325 resblks = mp->m_resblks_save;
1326 mp->m_resblks_save = 0;
1327 } else
1328 resblks = xfs_default_resblks(mp);
1329
1330 xfs_reserve_blocks(mp, &resblks, NULL);
1331}
1332
1240STATIC int 1333STATIC int
1241xfs_fs_remount( 1334xfs_fs_remount(
1242 struct super_block *sb, 1335 struct super_block *sb,
@@ -1316,11 +1409,27 @@ xfs_fs_remount(
1316 } 1409 }
1317 mp->m_update_flags = 0; 1410 mp->m_update_flags = 0;
1318 } 1411 }
1412
1413 /*
1414 * Fill out the reserve pool if it is empty. Use the stashed
1415 * value if it is non-zero, otherwise go with the default.
1416 */
1417 xfs_restore_resvblks(mp);
1319 } 1418 }
1320 1419
1321 /* rw -> ro */ 1420 /* rw -> ro */
1322 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { 1421 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1422 /*
1423 * After we have synced the data but before we sync the
1424 * metadata, we need to free up the reserve block pool so that
1425 * the used block count in the superblock on disk is correct at
1426 * the end of the remount. Stash the current reserve pool size
1427 * so that if we get remounted rw, we can return it to the same
1428 * size.
1429 */
1430
1323 xfs_quiesce_data(mp); 1431 xfs_quiesce_data(mp);
1432 xfs_save_resvblks(mp);
1324 xfs_quiesce_attr(mp); 1433 xfs_quiesce_attr(mp);
1325 mp->m_flags |= XFS_MOUNT_RDONLY; 1434 mp->m_flags |= XFS_MOUNT_RDONLY;
1326 } 1435 }
@@ -1339,11 +1448,22 @@ xfs_fs_freeze(
1339{ 1448{
1340 struct xfs_mount *mp = XFS_M(sb); 1449 struct xfs_mount *mp = XFS_M(sb);
1341 1450
1451 xfs_save_resvblks(mp);
1342 xfs_quiesce_attr(mp); 1452 xfs_quiesce_attr(mp);
1343 return -xfs_fs_log_dummy(mp); 1453 return -xfs_fs_log_dummy(mp);
1344} 1454}
1345 1455
1346STATIC int 1456STATIC int
1457xfs_fs_unfreeze(
1458 struct super_block *sb)
1459{
1460 struct xfs_mount *mp = XFS_M(sb);
1461
1462 xfs_restore_resvblks(mp);
1463 return 0;
1464}
1465
1466STATIC int
1347xfs_fs_show_options( 1467xfs_fs_show_options(
1348 struct seq_file *m, 1468 struct seq_file *m,
1349 struct vfsmount *mnt) 1469 struct vfsmount *mnt)
@@ -1503,9 +1623,9 @@ xfs_fs_fill_super(
1503 if (error) 1623 if (error)
1504 goto fail_vnrele; 1624 goto fail_vnrele;
1505 1625
1506 kfree(mtpt); 1626 xfs_inode_shrinker_register(mp);
1507 1627
1508 xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); 1628 kfree(mtpt);
1509 return 0; 1629 return 0;
1510 1630
1511 out_filestream_unmount: 1631 out_filestream_unmount:
@@ -1567,6 +1687,7 @@ static const struct super_operations xfs_super_operations = {
1567 .put_super = xfs_fs_put_super, 1687 .put_super = xfs_fs_put_super,
1568 .sync_fs = xfs_fs_sync_fs, 1688 .sync_fs = xfs_fs_sync_fs,
1569 .freeze_fs = xfs_fs_freeze, 1689 .freeze_fs = xfs_fs_freeze,
1690 .unfreeze_fs = xfs_fs_unfreeze,
1570 .statfs = xfs_fs_statfs, 1691 .statfs = xfs_fs_statfs,
1571 .remount_fs = xfs_fs_remount, 1692 .remount_fs = xfs_fs_remount,
1572 .show_options = xfs_fs_show_options, 1693 .show_options = xfs_fs_show_options,
@@ -1581,94 +1702,6 @@ static struct file_system_type xfs_fs_type = {
1581}; 1702};
1582 1703
1583STATIC int __init 1704STATIC int __init
1584xfs_alloc_trace_bufs(void)
1585{
1586#ifdef XFS_ALLOC_TRACE
1587 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
1588 if (!xfs_alloc_trace_buf)
1589 goto out;
1590#endif
1591#ifdef XFS_BMAP_TRACE
1592 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
1593 if (!xfs_bmap_trace_buf)
1594 goto out_free_alloc_trace;
1595#endif
1596#ifdef XFS_BTREE_TRACE
1597 xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
1598 KM_MAYFAIL);
1599 if (!xfs_allocbt_trace_buf)
1600 goto out_free_bmap_trace;
1601
1602 xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
1603 if (!xfs_inobt_trace_buf)
1604 goto out_free_allocbt_trace;
1605
1606 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1607 if (!xfs_bmbt_trace_buf)
1608 goto out_free_inobt_trace;
1609#endif
1610#ifdef XFS_ATTR_TRACE
1611 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
1612 if (!xfs_attr_trace_buf)
1613 goto out_free_bmbt_trace;
1614#endif
1615#ifdef XFS_DIR2_TRACE
1616 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
1617 if (!xfs_dir2_trace_buf)
1618 goto out_free_attr_trace;
1619#endif
1620
1621 return 0;
1622
1623#ifdef XFS_DIR2_TRACE
1624 out_free_attr_trace:
1625#endif
1626#ifdef XFS_ATTR_TRACE
1627 ktrace_free(xfs_attr_trace_buf);
1628 out_free_bmbt_trace:
1629#endif
1630#ifdef XFS_BTREE_TRACE
1631 ktrace_free(xfs_bmbt_trace_buf);
1632 out_free_inobt_trace:
1633 ktrace_free(xfs_inobt_trace_buf);
1634 out_free_allocbt_trace:
1635 ktrace_free(xfs_allocbt_trace_buf);
1636 out_free_bmap_trace:
1637#endif
1638#ifdef XFS_BMAP_TRACE
1639 ktrace_free(xfs_bmap_trace_buf);
1640 out_free_alloc_trace:
1641#endif
1642#ifdef XFS_ALLOC_TRACE
1643 ktrace_free(xfs_alloc_trace_buf);
1644 out:
1645#endif
1646 return -ENOMEM;
1647}
1648
1649STATIC void
1650xfs_free_trace_bufs(void)
1651{
1652#ifdef XFS_DIR2_TRACE
1653 ktrace_free(xfs_dir2_trace_buf);
1654#endif
1655#ifdef XFS_ATTR_TRACE
1656 ktrace_free(xfs_attr_trace_buf);
1657#endif
1658#ifdef XFS_BTREE_TRACE
1659 ktrace_free(xfs_bmbt_trace_buf);
1660 ktrace_free(xfs_inobt_trace_buf);
1661 ktrace_free(xfs_allocbt_trace_buf);
1662#endif
1663#ifdef XFS_BMAP_TRACE
1664 ktrace_free(xfs_bmap_trace_buf);
1665#endif
1666#ifdef XFS_ALLOC_TRACE
1667 ktrace_free(xfs_alloc_trace_buf);
1668#endif
1669}
1670
1671STATIC int __init
1672xfs_init_zones(void) 1705xfs_init_zones(void)
1673{ 1706{
1674 1707
@@ -1809,7 +1842,6 @@ init_xfs_fs(void)
1809 printk(KERN_INFO XFS_VERSION_STRING " with " 1842 printk(KERN_INFO XFS_VERSION_STRING " with "
1810 XFS_BUILD_OPTIONS " enabled\n"); 1843 XFS_BUILD_OPTIONS " enabled\n");
1811 1844
1812 ktrace_init(64);
1813 xfs_ioend_init(); 1845 xfs_ioend_init();
1814 xfs_dir_startup(); 1846 xfs_dir_startup();
1815 1847
@@ -1817,13 +1849,9 @@ init_xfs_fs(void)
1817 if (error) 1849 if (error)
1818 goto out; 1850 goto out;
1819 1851
1820 error = xfs_alloc_trace_bufs();
1821 if (error)
1822 goto out_destroy_zones;
1823
1824 error = xfs_mru_cache_init(); 1852 error = xfs_mru_cache_init();
1825 if (error) 1853 if (error)
1826 goto out_free_trace_buffers; 1854 goto out_destroy_zones;
1827 1855
1828 error = xfs_filestream_init(); 1856 error = xfs_filestream_init();
1829 if (error) 1857 if (error)
@@ -1842,6 +1870,7 @@ init_xfs_fs(void)
1842 goto out_cleanup_procfs; 1870 goto out_cleanup_procfs;
1843 1871
1844 vfs_initquota(); 1872 vfs_initquota();
1873 xfs_inode_shrinker_init();
1845 1874
1846 error = register_filesystem(&xfs_fs_type); 1875 error = register_filesystem(&xfs_fs_type);
1847 if (error) 1876 if (error)
@@ -1858,8 +1887,6 @@ init_xfs_fs(void)
1858 xfs_filestream_uninit(); 1887 xfs_filestream_uninit();
1859 out_mru_cache_uninit: 1888 out_mru_cache_uninit:
1860 xfs_mru_cache_uninit(); 1889 xfs_mru_cache_uninit();
1861 out_free_trace_buffers:
1862 xfs_free_trace_bufs();
1863 out_destroy_zones: 1890 out_destroy_zones:
1864 xfs_destroy_zones(); 1891 xfs_destroy_zones();
1865 out: 1892 out:
@@ -1871,14 +1898,13 @@ exit_xfs_fs(void)
1871{ 1898{
1872 vfs_exitquota(); 1899 vfs_exitquota();
1873 unregister_filesystem(&xfs_fs_type); 1900 unregister_filesystem(&xfs_fs_type);
1901 xfs_inode_shrinker_destroy();
1874 xfs_sysctl_unregister(); 1902 xfs_sysctl_unregister();
1875 xfs_cleanup_procfs(); 1903 xfs_cleanup_procfs();
1876 xfs_buf_terminate(); 1904 xfs_buf_terminate();
1877 xfs_filestream_uninit(); 1905 xfs_filestream_uninit();
1878 xfs_mru_cache_uninit(); 1906 xfs_mru_cache_uninit();
1879 xfs_free_trace_bufs();
1880 xfs_destroy_zones(); 1907 xfs_destroy_zones();
1881 ktrace_uninit();
1882} 1908}
1883 1909
1884module_init(init_xfs_fs); 1910module_init(init_xfs_fs);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 18175ebd58ed..233d4b9881b1 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -56,12 +56,6 @@ extern void xfs_qm_exit(void);
56# define XFS_BIGFS_STRING 56# define XFS_BIGFS_STRING
57#endif 57#endif
58 58
59#ifdef CONFIG_XFS_TRACE
60# define XFS_TRACE_STRING "tracing, "
61#else
62# define XFS_TRACE_STRING
63#endif
64
65#ifdef CONFIG_XFS_DMAPI 59#ifdef CONFIG_XFS_DMAPI
66# define XFS_DMAPI_STRING "dmapi support, " 60# define XFS_DMAPI_STRING "dmapi support, "
67#else 61#else
@@ -78,7 +72,6 @@ extern void xfs_qm_exit(void);
78 XFS_SECURITY_STRING \ 72 XFS_SECURITY_STRING \
79 XFS_REALTIME_STRING \ 73 XFS_REALTIME_STRING \
80 XFS_BIGFS_STRING \ 74 XFS_BIGFS_STRING \
81 XFS_TRACE_STRING \
82 XFS_DMAPI_STRING \ 75 XFS_DMAPI_STRING \
83 XFS_DBG_STRING /* DBG must be last */ 76 XFS_DBG_STRING /* DBG must be last */
84 77
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 961df0a22c78..a427c638d909 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -44,6 +44,7 @@
44#include "xfs_inode_item.h" 44#include "xfs_inode_item.h"
45#include "xfs_rw.h" 45#include "xfs_rw.h"
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_trace.h"
47 48
48#include <linux/kthread.h> 49#include <linux/kthread.h>
49#include <linux/freezer.h> 50#include <linux/freezer.h>
@@ -64,7 +65,6 @@ xfs_inode_ag_lookup(
64 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
65 * the number of objects requested. 66 * the number of objects requested.
66 */ 67 */
67 read_lock(&pag->pag_ici_lock);
68 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
70 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -73,7 +73,7 @@ xfs_inode_ag_lookup(
73 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
74 } 74 }
75 if (!nr_found) 75 if (!nr_found)
76 goto unlock; 76 return NULL;
77 77
78 /* 78 /*
79 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -83,25 +83,21 @@ xfs_inode_ag_lookup(
83 */ 83 */
84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
86 goto unlock; 86 return NULL;
87
88 return ip; 87 return ip;
89
90unlock:
91 read_unlock(&pag->pag_ici_lock);
92 return NULL;
93} 88}
94 89
95STATIC int 90STATIC int
96xfs_inode_ag_walk( 91xfs_inode_ag_walk(
97 struct xfs_mount *mp, 92 struct xfs_mount *mp,
98 xfs_agnumber_t ag, 93 struct xfs_perag *pag,
99 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
100 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
101 int flags, 96 int flags,
102 int tag) 97 int tag,
98 int exclusive,
99 int *nr_to_scan)
103{ 100{
104 struct xfs_perag *pag = &mp->m_perag[ag];
105 uint32_t first_index; 101 uint32_t first_index;
106 int last_error = 0; 102 int last_error = 0;
107 int skipped; 103 int skipped;
@@ -113,10 +109,20 @@ restart:
113 int error = 0; 109 int error = 0;
114 xfs_inode_t *ip; 110 xfs_inode_t *ip;
115 111
112 if (exclusive)
113 write_lock(&pag->pag_ici_lock);
114 else
115 read_lock(&pag->pag_ici_lock);
116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
117 if (!ip) 117 if (!ip) {
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
118 break; 122 break;
123 }
119 124
125 /* execute releases pag->pag_ici_lock */
120 error = execute(ip, pag, flags); 126 error = execute(ip, pag, flags);
121 if (error == EAGAIN) { 127 if (error == EAGAIN) {
122 skipped++; 128 skipped++;
@@ -124,20 +130,17 @@ restart:
124 } 130 }
125 if (error) 131 if (error)
126 last_error = error; 132 last_error = error;
127 /* 133
128 * bail out if the filesystem is corrupted. 134 /* bail out if the filesystem is corrupted. */
129 */
130 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
131 break; 136 break;
132 137
133 } while (1); 138 } while ((*nr_to_scan)--);
134 139
135 if (skipped) { 140 if (skipped) {
136 delay(1); 141 delay(1);
137 goto restart; 142 goto restart;
138 } 143 }
139
140 xfs_put_perag(mp, pag);
141 return last_error; 144 return last_error;
142} 145}
143 146
@@ -147,22 +150,37 @@ xfs_inode_ag_iterator(
147 int (*execute)(struct xfs_inode *ip, 150 int (*execute)(struct xfs_inode *ip,
148 struct xfs_perag *pag, int flags), 151 struct xfs_perag *pag, int flags),
149 int flags, 152 int flags,
150 int tag) 153 int tag,
154 int exclusive,
155 int *nr_to_scan)
151{ 156{
152 int error = 0; 157 int error = 0;
153 int last_error = 0; 158 int last_error = 0;
154 xfs_agnumber_t ag; 159 xfs_agnumber_t ag;
160 int nr;
155 161
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
156 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
157 if (!mp->m_perag[ag].pag_ici_init) 164 struct xfs_perag *pag;
165
166 pag = xfs_perag_get(mp, ag);
167 if (!pag->pag_ici_init) {
168 xfs_perag_put(pag);
158 continue; 169 continue;
159 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 170 }
171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
172 exclusive, &nr);
173 xfs_perag_put(pag);
160 if (error) { 174 if (error) {
161 last_error = error; 175 last_error = error;
162 if (error == EFSCORRUPTED) 176 if (error == EFSCORRUPTED)
163 break; 177 break;
164 } 178 }
179 if (nr <= 0)
180 break;
165 } 181 }
182 if (nr_to_scan)
183 *nr_to_scan = nr;
166 return XFS_ERROR(last_error); 184 return XFS_ERROR(last_error);
167} 185}
168 186
@@ -173,30 +191,31 @@ xfs_sync_inode_valid(
173 struct xfs_perag *pag) 191 struct xfs_perag *pag)
174{ 192{
175 struct inode *inode = VFS_I(ip); 193 struct inode *inode = VFS_I(ip);
194 int error = EFSCORRUPTED;
176 195
177 /* nothing to sync during shutdown */ 196 /* nothing to sync during shutdown */
178 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 197 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
179 read_unlock(&pag->pag_ici_lock); 198 goto out_unlock;
180 return EFSCORRUPTED;
181 }
182 199
183 /* 200 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
184 * If we can't get a reference on the inode, it must be in reclaim. 201 error = ENOENT;
185 * Leave it for the reclaim code to flush. Also avoid inodes that 202 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
186 * haven't been fully initialised. 203 goto out_unlock;
187 */ 204
188 if (!igrab(inode)) { 205 /* If we can't grab the inode, it must on it's way to reclaim. */
189 read_unlock(&pag->pag_ici_lock); 206 if (!igrab(inode))
190 return ENOENT; 207 goto out_unlock;
191 }
192 read_unlock(&pag->pag_ici_lock);
193 208
194 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 209 if (is_bad_inode(inode)) {
195 IRELE(ip); 210 IRELE(ip);
196 return ENOENT; 211 goto out_unlock;
197 } 212 }
198 213
199 return 0; 214 /* inode is valid */
215 error = 0;
216out_unlock:
217 read_unlock(&pag->pag_ici_lock);
218 return error;
200} 219}
201 220
202STATIC int 221STATIC int
@@ -223,7 +242,7 @@ xfs_sync_inode_data(
223 } 242 }
224 243
225 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? 244 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
226 0 : XFS_B_ASYNC, FI_NONE); 245 0 : XBF_ASYNC, FI_NONE);
227 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 246 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
228 247
229 out_wait: 248 out_wait:
@@ -259,8 +278,7 @@ xfs_sync_inode_attr(
259 goto out_unlock; 278 goto out_unlock;
260 } 279 }
261 280
262 error = xfs_iflush(ip, (flags & SYNC_WAIT) ? 281 error = xfs_iflush(ip, flags);
263 XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
264 282
265 out_unlock: 283 out_unlock:
266 xfs_iunlock(ip, XFS_ILOCK_SHARED); 284 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -281,14 +299,11 @@ xfs_sync_data(
281 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 299 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
282 300
283 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 301 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
284 XFS_ICI_NO_TAG); 302 XFS_ICI_NO_TAG, 0, NULL);
285 if (error) 303 if (error)
286 return XFS_ERROR(error); 304 return XFS_ERROR(error);
287 305
288 xfs_log_force(mp, 0, 306 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
289 (flags & SYNC_WAIT) ?
290 XFS_LOG_FORCE | XFS_LOG_SYNC :
291 XFS_LOG_FORCE);
292 return 0; 307 return 0;
293} 308}
294 309
@@ -303,7 +318,7 @@ xfs_sync_attr(
303 ASSERT((flags & ~SYNC_WAIT) == 0); 318 ASSERT((flags & ~SYNC_WAIT) == 0);
304 319
305 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 320 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
306 XFS_ICI_NO_TAG); 321 XFS_ICI_NO_TAG, 0, NULL);
307} 322}
308 323
309STATIC int 324STATIC int
@@ -314,10 +329,6 @@ xfs_commit_dummy_trans(
314 struct xfs_inode *ip = mp->m_rootip; 329 struct xfs_inode *ip = mp->m_rootip;
315 struct xfs_trans *tp; 330 struct xfs_trans *tp;
316 int error; 331 int error;
317 int log_flags = XFS_LOG_FORCE;
318
319 if (flags & SYNC_WAIT)
320 log_flags |= XFS_LOG_SYNC;
321 332
322 /* 333 /*
323 * Put a dummy transaction in the log to tell recovery 334 * Put a dummy transaction in the log to tell recovery
@@ -339,11 +350,11 @@ xfs_commit_dummy_trans(
339 xfs_iunlock(ip, XFS_ILOCK_EXCL); 350 xfs_iunlock(ip, XFS_ILOCK_EXCL);
340 351
341 /* the log force ensures this transaction is pushed to disk */ 352 /* the log force ensures this transaction is pushed to disk */
342 xfs_log_force(mp, 0, log_flags); 353 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
343 return error; 354 return error;
344} 355}
345 356
346int 357STATIC int
347xfs_sync_fsdata( 358xfs_sync_fsdata(
348 struct xfs_mount *mp, 359 struct xfs_mount *mp,
349 int flags) 360 int flags)
@@ -359,7 +370,7 @@ xfs_sync_fsdata(
359 if (flags & SYNC_TRYLOCK) { 370 if (flags & SYNC_TRYLOCK) {
360 ASSERT(!(flags & SYNC_WAIT)); 371 ASSERT(!(flags & SYNC_WAIT));
361 372
362 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); 373 bp = xfs_getsb(mp, XBF_TRYLOCK);
363 if (!bp) 374 if (!bp)
364 goto out; 375 goto out;
365 376
@@ -379,7 +390,7 @@ xfs_sync_fsdata(
379 * become pinned in between there and here. 390 * become pinned in between there and here.
380 */ 391 */
381 if (XFS_BUF_ISPINNED(bp)) 392 if (XFS_BUF_ISPINNED(bp))
382 xfs_log_force(mp, 0, XFS_LOG_FORCE); 393 xfs_log_force(mp, 0);
383 } 394 }
384 395
385 396
@@ -440,9 +451,6 @@ xfs_quiesce_data(
440 xfs_sync_data(mp, SYNC_WAIT); 451 xfs_sync_data(mp, SYNC_WAIT);
441 xfs_qm_sync(mp, SYNC_WAIT); 452 xfs_qm_sync(mp, SYNC_WAIT);
442 453
443 /* drop inode references pinned by filestreams */
444 xfs_filestream_flush(mp);
445
446 /* write superblock and hoover up shutdown errors */ 454 /* write superblock and hoover up shutdown errors */
447 error = xfs_sync_fsdata(mp, SYNC_WAIT); 455 error = xfs_sync_fsdata(mp, SYNC_WAIT);
448 456
@@ -459,16 +467,18 @@ xfs_quiesce_fs(
459{ 467{
460 int count = 0, pincount; 468 int count = 0, pincount;
461 469
470 xfs_reclaim_inodes(mp, 0);
462 xfs_flush_buftarg(mp->m_ddev_targp, 0); 471 xfs_flush_buftarg(mp->m_ddev_targp, 0);
463 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
464 472
465 /* 473 /*
466 * This loop must run at least twice. The first instance of the loop 474 * This loop must run at least twice. The first instance of the loop
467 * will flush most meta data but that will generate more meta data 475 * will flush most meta data but that will generate more meta data
468 * (typically directory updates). Which then must be flushed and 476 * (typically directory updates). Which then must be flushed and
469 * logged before we can write the unmount record. 477 * logged before we can write the unmount record. We also so sync
478 * reclaim of inodes to catch any that the above delwri flush skipped.
470 */ 479 */
471 do { 480 do {
481 xfs_reclaim_inodes(mp, SYNC_WAIT);
472 xfs_sync_attr(mp, SYNC_WAIT); 482 xfs_sync_attr(mp, SYNC_WAIT);
473 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); 483 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
474 if (!pincount) { 484 if (!pincount) {
@@ -567,7 +577,7 @@ xfs_flush_inodes(
567 igrab(inode); 577 igrab(inode);
568 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); 578 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
569 wait_for_completion(&completion); 579 wait_for_completion(&completion);
570 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 580 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
571} 581}
572 582
573/* 583/*
@@ -583,8 +593,8 @@ xfs_sync_worker(
583 int error; 593 int error;
584 594
585 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 595 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
586 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 596 xfs_log_force(mp, 0);
587 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); 597 xfs_reclaim_inodes(mp, 0);
588 /* dgc: errors ignored here */ 598 /* dgc: errors ignored here */
589 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 599 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
590 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); 600 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
@@ -605,7 +615,8 @@ xfssyncd(
605 set_freezable(); 615 set_freezable();
606 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); 616 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
607 for (;;) { 617 for (;;) {
608 timeleft = schedule_timeout_interruptible(timeleft); 618 if (list_empty(&mp->m_sync_list))
619 timeleft = schedule_timeout_interruptible(timeleft);
609 /* swsusp */ 620 /* swsusp */
610 try_to_freeze(); 621 try_to_freeze();
611 if (kthread_should_stop() && list_empty(&mp->m_sync_list)) 622 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
@@ -625,8 +636,7 @@ xfssyncd(
625 list_add_tail(&mp->m_sync_work.w_list, 636 list_add_tail(&mp->m_sync_work.w_list,
626 &mp->m_sync_list); 637 &mp->m_sync_list);
627 } 638 }
628 list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list) 639 list_splice_init(&mp->m_sync_list, &tmp);
629 list_move(&work->w_list, &tmp);
630 spin_unlock(&mp->m_sync_lock); 640 spin_unlock(&mp->m_sync_lock);
631 641
632 list_for_each_entry_safe(work, n, &tmp, w_list) { 642 list_for_each_entry_safe(work, n, &tmp, w_list) {
@@ -663,67 +673,6 @@ xfs_syncd_stop(
663 kthread_stop(mp->m_sync_task); 673 kthread_stop(mp->m_sync_task);
664} 674}
665 675
666int
667xfs_reclaim_inode(
668 xfs_inode_t *ip,
669 int locked,
670 int sync_mode)
671{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674 /* The hash lock here protects a thread in xfs_iget_core from
675 * racing with us on linking the inode back with a vnode.
676 * Once we have the XFS_IRECLAIM flag set it will not touch
677 * us.
678 */
679 write_lock(&pag->pag_ici_lock);
680 spin_lock(&ip->i_flags_lock);
681 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock);
685 if (locked) {
686 xfs_ifunlock(ip);
687 xfs_iunlock(ip, XFS_ILOCK_EXCL);
688 }
689 return -EAGAIN;
690 }
691 __xfs_iflags_set(ip, XFS_IRECLAIM);
692 spin_unlock(&ip->i_flags_lock);
693 write_unlock(&pag->pag_ici_lock);
694 xfs_put_perag(ip->i_mount, pag);
695
696 /*
697 * If the inode is still dirty, then flush it out. If the inode
698 * is not in the AIL, then it will be OK to flush it delwri as
699 * long as xfs_iflush() does not keep any references to the inode.
700 * We leave that decision up to xfs_iflush() since it has the
701 * knowledge of whether it's OK to simply do a delwri flush of
702 * the inode or whether we need to wait until the inode is
703 * pulled from the AIL.
704 * We get the flush lock regardless, though, just to make sure
705 * we don't free it while it is being flushed.
706 */
707 if (!locked) {
708 xfs_ilock(ip, XFS_ILOCK_EXCL);
709 xfs_iflock(ip);
710 }
711
712 /*
713 * In the case of a forced shutdown we rely on xfs_iflush() to
714 * wait for the inode to be unpinned before returning an error.
715 */
716 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
717 /* synchronize with xfs_iflush_done */
718 xfs_iflock(ip);
719 xfs_ifunlock(ip);
720 }
721
722 xfs_iunlock(ip, XFS_ILOCK_EXCL);
723 xfs_ireclaim(ip);
724 return 0;
725}
726
727void 676void
728__xfs_inode_set_reclaim_tag( 677__xfs_inode_set_reclaim_tag(
729 struct xfs_perag *pag, 678 struct xfs_perag *pag,
@@ -732,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
732 radix_tree_tag_set(&pag->pag_ici_root, 681 radix_tree_tag_set(&pag->pag_ici_root,
733 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 682 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
734 XFS_ICI_RECLAIM_TAG); 683 XFS_ICI_RECLAIM_TAG);
684 pag->pag_ici_reclaimable++;
735} 685}
736 686
737/* 687/*
@@ -743,16 +693,17 @@ void
743xfs_inode_set_reclaim_tag( 693xfs_inode_set_reclaim_tag(
744 xfs_inode_t *ip) 694 xfs_inode_t *ip)
745{ 695{
746 xfs_mount_t *mp = ip->i_mount; 696 struct xfs_mount *mp = ip->i_mount;
747 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 697 struct xfs_perag *pag;
748 698
749 read_lock(&pag->pag_ici_lock); 699 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
700 write_lock(&pag->pag_ici_lock);
750 spin_lock(&ip->i_flags_lock); 701 spin_lock(&ip->i_flags_lock);
751 __xfs_inode_set_reclaim_tag(pag, ip); 702 __xfs_inode_set_reclaim_tag(pag, ip);
752 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 703 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
753 spin_unlock(&ip->i_flags_lock); 704 spin_unlock(&ip->i_flags_lock);
754 read_unlock(&pag->pag_ici_lock); 705 write_unlock(&pag->pag_ici_lock);
755 xfs_put_perag(mp, pag); 706 xfs_perag_put(pag);
756} 707}
757 708
758void 709void
@@ -763,22 +714,148 @@ __xfs_inode_clear_reclaim_tag(
763{ 714{
764 radix_tree_tag_clear(&pag->pag_ici_root, 715 radix_tree_tag_clear(&pag->pag_ici_root,
765 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 716 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
717 pag->pag_ici_reclaimable--;
766} 718}
767 719
720/*
721 * Inodes in different states need to be treated differently, and the return
722 * value of xfs_iflush is not sufficient to get this right. The following table
723 * lists the inode states and the reclaim actions necessary for non-blocking
724 * reclaim:
725 *
726 *
727 * inode state iflush ret required action
728 * --------------- ---------- ---------------
729 * bad - reclaim
730 * shutdown EIO unpin and reclaim
731 * clean, unpinned 0 reclaim
732 * stale, unpinned 0 reclaim
733 * clean, pinned(*) 0 requeue
734 * stale, pinned EAGAIN requeue
735 * dirty, delwri ok 0 requeue
736 * dirty, delwri blocked EAGAIN requeue
737 * dirty, sync flush 0 reclaim
738 *
739 * (*) dgc: I don't think the clean, pinned state is possible but it gets
740 * handled anyway given the order of checks implemented.
741 *
742 * As can be seen from the table, the return value of xfs_iflush() is not
743 * sufficient to correctly decide the reclaim action here. The checks in
744 * xfs_iflush() might look like duplicates, but they are not.
745 *
746 * Also, because we get the flush lock first, we know that any inode that has
747 * been flushed delwri has had the flush completed by the time we check that
748 * the inode is clean. The clean inode check needs to be done before flushing
749 * the inode delwri otherwise we would loop forever requeuing clean inodes as
750 * we cannot tell apart a successful delwri flush and a clean inode from the
751 * return value of xfs_iflush().
752 *
753 * Note that because the inode is flushed delayed write by background
754 * writeback, the flush lock may already be held here and waiting on it can
755 * result in very long latencies. Hence for sync reclaims, where we wait on the
756 * flush lock, the caller should push out delayed write inodes first before
757 * trying to reclaim them to minimise the amount of time spent waiting. For
758 * background relaim, we just requeue the inode for the next pass.
759 *
760 * Hence the order of actions after gaining the locks should be:
761 * bad => reclaim
762 * shutdown => unpin and reclaim
763 * pinned, delwri => requeue
764 * pinned, sync => unpin
765 * stale => reclaim
766 * clean => reclaim
767 * dirty, delwri => flush and requeue
768 * dirty, sync => flush, wait and reclaim
769 */
768STATIC int 770STATIC int
769xfs_reclaim_inode_now( 771xfs_reclaim_inode(
770 struct xfs_inode *ip, 772 struct xfs_inode *ip,
771 struct xfs_perag *pag, 773 struct xfs_perag *pag,
772 int flags) 774 int sync_mode)
773{ 775{
774 /* ignore if already under reclaim */ 776 int error = 0;
775 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 777
776 read_unlock(&pag->pag_ici_lock); 778 /*
779 * The radix tree lock here protects a thread in xfs_iget from racing
780 * with us starting reclaim on the inode. Once we have the
781 * XFS_IRECLAIM flag set it will not touch us.
782 */
783 spin_lock(&ip->i_flags_lock);
784 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
785 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
786 /* ignore as it is already under reclaim */
787 spin_unlock(&ip->i_flags_lock);
788 write_unlock(&pag->pag_ici_lock);
777 return 0; 789 return 0;
778 } 790 }
779 read_unlock(&pag->pag_ici_lock); 791 __xfs_iflags_set(ip, XFS_IRECLAIM);
792 spin_unlock(&ip->i_flags_lock);
793 write_unlock(&pag->pag_ici_lock);
794
795 xfs_ilock(ip, XFS_ILOCK_EXCL);
796 if (!xfs_iflock_nowait(ip)) {
797 if (!(sync_mode & SYNC_WAIT))
798 goto out;
799 xfs_iflock(ip);
800 }
801
802 if (is_bad_inode(VFS_I(ip)))
803 goto reclaim;
804 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
805 xfs_iunpin_wait(ip);
806 goto reclaim;
807 }
808 if (xfs_ipincount(ip)) {
809 if (!(sync_mode & SYNC_WAIT)) {
810 xfs_ifunlock(ip);
811 goto out;
812 }
813 xfs_iunpin_wait(ip);
814 }
815 if (xfs_iflags_test(ip, XFS_ISTALE))
816 goto reclaim;
817 if (xfs_inode_clean(ip))
818 goto reclaim;
819
820 /* Now we have an inode that needs flushing */
821 error = xfs_iflush(ip, sync_mode);
822 if (sync_mode & SYNC_WAIT) {
823 xfs_iflock(ip);
824 goto reclaim;
825 }
826
827 /*
828 * When we have to flush an inode but don't have SYNC_WAIT set, we
829 * flush the inode out using a delwri buffer and wait for the next
830 * call into reclaim to find it in a clean state instead of waiting for
831 * it now. We also don't return errors here - if the error is transient
832 * then the next reclaim pass will flush the inode, and if the error
833 * is permanent then the next sync reclaim will reclaim the inode and
834 * pass on the error.
835 */
836 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
837 xfs_fs_cmn_err(CE_WARN, ip->i_mount,
838 "inode 0x%llx background reclaim flush failed with %d",
839 (long long)ip->i_ino, error);
840 }
841out:
842 xfs_iflags_clear(ip, XFS_IRECLAIM);
843 xfs_iunlock(ip, XFS_ILOCK_EXCL);
844 /*
845 * We could return EAGAIN here to make reclaim rescan the inode tree in
846 * a short while. However, this just burns CPU time scanning the tree
847 * waiting for IO to complete and xfssyncd never goes back to the idle
848 * state. Instead, return 0 to let the next scheduled background reclaim
849 * attempt to reclaim the inode again.
850 */
851 return 0;
852
853reclaim:
854 xfs_ifunlock(ip);
855 xfs_iunlock(ip, XFS_ILOCK_EXCL);
856 xfs_ireclaim(ip);
857 return error;
780 858
781 return xfs_reclaim_inode(ip, 0, flags);
782} 859}
783 860
784int 861int
@@ -786,6 +863,94 @@ xfs_reclaim_inodes(
786 xfs_mount_t *mp, 863 xfs_mount_t *mp,
787 int mode) 864 int mode)
788{ 865{
789 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 866 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
790 XFS_ICI_RECLAIM_TAG); 867 XFS_ICI_RECLAIM_TAG, 1, NULL);
868}
869
870/*
871 * Shrinker infrastructure.
872 *
873 * This is all far more complex than it needs to be. It adds a global list of
874 * mounts because the shrinkers can only call a global context. We need to make
875 * the shrinkers pass a context to avoid the need for global state.
876 */
877static LIST_HEAD(xfs_mount_list);
878static struct rw_semaphore xfs_mount_list_lock;
879
880static int
881xfs_reclaim_inode_shrink(
882 int nr_to_scan,
883 gfp_t gfp_mask)
884{
885 struct xfs_mount *mp;
886 struct xfs_perag *pag;
887 xfs_agnumber_t ag;
888 int reclaimable = 0;
889
890 if (nr_to_scan) {
891 if (!(gfp_mask & __GFP_FS))
892 return -1;
893
894 down_read(&xfs_mount_list_lock);
895 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
896 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
897 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
898 if (nr_to_scan <= 0)
899 break;
900 }
901 up_read(&xfs_mount_list_lock);
902 }
903
904 down_read(&xfs_mount_list_lock);
905 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
906 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
907
908 pag = xfs_perag_get(mp, ag);
909 if (!pag->pag_ici_init) {
910 xfs_perag_put(pag);
911 continue;
912 }
913 reclaimable += pag->pag_ici_reclaimable;
914 xfs_perag_put(pag);
915 }
916 }
917 up_read(&xfs_mount_list_lock);
918 return reclaimable;
919}
920
921static struct shrinker xfs_inode_shrinker = {
922 .shrink = xfs_reclaim_inode_shrink,
923 .seeks = DEFAULT_SEEKS,
924};
925
926void __init
927xfs_inode_shrinker_init(void)
928{
929 init_rwsem(&xfs_mount_list_lock);
930 register_shrinker(&xfs_inode_shrinker);
931}
932
933void
934xfs_inode_shrinker_destroy(void)
935{
936 ASSERT(list_empty(&xfs_mount_list));
937 unregister_shrinker(&xfs_inode_shrinker);
938}
939
940void
941xfs_inode_shrinker_register(
942 struct xfs_mount *mp)
943{
944 down_write(&xfs_mount_list_lock);
945 list_add_tail(&mp->m_mplist, &xfs_mount_list);
946 up_write(&xfs_mount_list_lock);
947}
948
949void
950xfs_inode_shrinker_unregister(
951 struct xfs_mount *mp)
952{
953 down_write(&xfs_mount_list_lock);
954 list_del(&mp->m_mplist);
955 up_write(&xfs_mount_list_lock);
791} 956}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 27920eb7a820..cdcbaaca9880 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -37,14 +37,12 @@ void xfs_syncd_stop(struct xfs_mount *mp);
37 37
38int xfs_sync_attr(struct xfs_mount *mp, int flags); 38int xfs_sync_attr(struct xfs_mount *mp, int flags);
39int xfs_sync_data(struct xfs_mount *mp, int flags); 39int xfs_sync_data(struct xfs_mount *mp, int flags);
40int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
41 40
42int xfs_quiesce_data(struct xfs_mount *mp); 41int xfs_quiesce_data(struct xfs_mount *mp);
43void xfs_quiesce_attr(struct xfs_mount *mp); 42void xfs_quiesce_attr(struct xfs_mount *mp);
44 43
45void xfs_flush_inodes(struct xfs_inode *ip); 44void xfs_flush_inodes(struct xfs_inode *ip);
46 45
47int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 46int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 47
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 48void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
@@ -55,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
55int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
56int xfs_inode_ag_iterator(struct xfs_mount *mp, 54int xfs_inode_ag_iterator(struct xfs_mount *mp,
57 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
58 int flags, int tag); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
59 62
60#endif 63#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index c5bc67c4e3bb..7bb5092d6ae4 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -55,170 +55,140 @@ xfs_stats_clear_proc_handler(
55 55
56static ctl_table xfs_table[] = { 56static ctl_table xfs_table[] = {
57 { 57 {
58 .ctl_name = XFS_SGID_INHERIT,
59 .procname = "irix_sgid_inherit", 58 .procname = "irix_sgid_inherit",
60 .data = &xfs_params.sgid_inherit.val, 59 .data = &xfs_params.sgid_inherit.val,
61 .maxlen = sizeof(int), 60 .maxlen = sizeof(int),
62 .mode = 0644, 61 .mode = 0644,
63 .proc_handler = &proc_dointvec_minmax, 62 .proc_handler = proc_dointvec_minmax,
64 .strategy = &sysctl_intvec,
65 .extra1 = &xfs_params.sgid_inherit.min, 63 .extra1 = &xfs_params.sgid_inherit.min,
66 .extra2 = &xfs_params.sgid_inherit.max 64 .extra2 = &xfs_params.sgid_inherit.max
67 }, 65 },
68 { 66 {
69 .ctl_name = XFS_SYMLINK_MODE,
70 .procname = "irix_symlink_mode", 67 .procname = "irix_symlink_mode",
71 .data = &xfs_params.symlink_mode.val, 68 .data = &xfs_params.symlink_mode.val,
72 .maxlen = sizeof(int), 69 .maxlen = sizeof(int),
73 .mode = 0644, 70 .mode = 0644,
74 .proc_handler = &proc_dointvec_minmax, 71 .proc_handler = proc_dointvec_minmax,
75 .strategy = &sysctl_intvec,
76 .extra1 = &xfs_params.symlink_mode.min, 72 .extra1 = &xfs_params.symlink_mode.min,
77 .extra2 = &xfs_params.symlink_mode.max 73 .extra2 = &xfs_params.symlink_mode.max
78 }, 74 },
79 { 75 {
80 .ctl_name = XFS_PANIC_MASK,
81 .procname = "panic_mask", 76 .procname = "panic_mask",
82 .data = &xfs_params.panic_mask.val, 77 .data = &xfs_params.panic_mask.val,
83 .maxlen = sizeof(int), 78 .maxlen = sizeof(int),
84 .mode = 0644, 79 .mode = 0644,
85 .proc_handler = &proc_dointvec_minmax, 80 .proc_handler = proc_dointvec_minmax,
86 .strategy = &sysctl_intvec,
87 .extra1 = &xfs_params.panic_mask.min, 81 .extra1 = &xfs_params.panic_mask.min,
88 .extra2 = &xfs_params.panic_mask.max 82 .extra2 = &xfs_params.panic_mask.max
89 }, 83 },
90 84
91 { 85 {
92 .ctl_name = XFS_ERRLEVEL,
93 .procname = "error_level", 86 .procname = "error_level",
94 .data = &xfs_params.error_level.val, 87 .data = &xfs_params.error_level.val,
95 .maxlen = sizeof(int), 88 .maxlen = sizeof(int),
96 .mode = 0644, 89 .mode = 0644,
97 .proc_handler = &proc_dointvec_minmax, 90 .proc_handler = proc_dointvec_minmax,
98 .strategy = &sysctl_intvec,
99 .extra1 = &xfs_params.error_level.min, 91 .extra1 = &xfs_params.error_level.min,
100 .extra2 = &xfs_params.error_level.max 92 .extra2 = &xfs_params.error_level.max
101 }, 93 },
102 { 94 {
103 .ctl_name = XFS_SYNCD_TIMER,
104 .procname = "xfssyncd_centisecs", 95 .procname = "xfssyncd_centisecs",
105 .data = &xfs_params.syncd_timer.val, 96 .data = &xfs_params.syncd_timer.val,
106 .maxlen = sizeof(int), 97 .maxlen = sizeof(int),
107 .mode = 0644, 98 .mode = 0644,
108 .proc_handler = &proc_dointvec_minmax, 99 .proc_handler = proc_dointvec_minmax,
109 .strategy = &sysctl_intvec,
110 .extra1 = &xfs_params.syncd_timer.min, 100 .extra1 = &xfs_params.syncd_timer.min,
111 .extra2 = &xfs_params.syncd_timer.max 101 .extra2 = &xfs_params.syncd_timer.max
112 }, 102 },
113 { 103 {
114 .ctl_name = XFS_INHERIT_SYNC,
115 .procname = "inherit_sync", 104 .procname = "inherit_sync",
116 .data = &xfs_params.inherit_sync.val, 105 .data = &xfs_params.inherit_sync.val,
117 .maxlen = sizeof(int), 106 .maxlen = sizeof(int),
118 .mode = 0644, 107 .mode = 0644,
119 .proc_handler = &proc_dointvec_minmax, 108 .proc_handler = proc_dointvec_minmax,
120 .strategy = &sysctl_intvec,
121 .extra1 = &xfs_params.inherit_sync.min, 109 .extra1 = &xfs_params.inherit_sync.min,
122 .extra2 = &xfs_params.inherit_sync.max 110 .extra2 = &xfs_params.inherit_sync.max
123 }, 111 },
124 { 112 {
125 .ctl_name = XFS_INHERIT_NODUMP,
126 .procname = "inherit_nodump", 113 .procname = "inherit_nodump",
127 .data = &xfs_params.inherit_nodump.val, 114 .data = &xfs_params.inherit_nodump.val,
128 .maxlen = sizeof(int), 115 .maxlen = sizeof(int),
129 .mode = 0644, 116 .mode = 0644,
130 .proc_handler = &proc_dointvec_minmax, 117 .proc_handler = proc_dointvec_minmax,
131 .strategy = &sysctl_intvec,
132 .extra1 = &xfs_params.inherit_nodump.min, 118 .extra1 = &xfs_params.inherit_nodump.min,
133 .extra2 = &xfs_params.inherit_nodump.max 119 .extra2 = &xfs_params.inherit_nodump.max
134 }, 120 },
135 { 121 {
136 .ctl_name = XFS_INHERIT_NOATIME,
137 .procname = "inherit_noatime", 122 .procname = "inherit_noatime",
138 .data = &xfs_params.inherit_noatim.val, 123 .data = &xfs_params.inherit_noatim.val,
139 .maxlen = sizeof(int), 124 .maxlen = sizeof(int),
140 .mode = 0644, 125 .mode = 0644,
141 .proc_handler = &proc_dointvec_minmax, 126 .proc_handler = proc_dointvec_minmax,
142 .strategy = &sysctl_intvec,
143 .extra1 = &xfs_params.inherit_noatim.min, 127 .extra1 = &xfs_params.inherit_noatim.min,
144 .extra2 = &xfs_params.inherit_noatim.max 128 .extra2 = &xfs_params.inherit_noatim.max
145 }, 129 },
146 { 130 {
147 .ctl_name = XFS_BUF_TIMER,
148 .procname = "xfsbufd_centisecs", 131 .procname = "xfsbufd_centisecs",
149 .data = &xfs_params.xfs_buf_timer.val, 132 .data = &xfs_params.xfs_buf_timer.val,
150 .maxlen = sizeof(int), 133 .maxlen = sizeof(int),
151 .mode = 0644, 134 .mode = 0644,
152 .proc_handler = &proc_dointvec_minmax, 135 .proc_handler = proc_dointvec_minmax,
153 .strategy = &sysctl_intvec,
154 .extra1 = &xfs_params.xfs_buf_timer.min, 136 .extra1 = &xfs_params.xfs_buf_timer.min,
155 .extra2 = &xfs_params.xfs_buf_timer.max 137 .extra2 = &xfs_params.xfs_buf_timer.max
156 }, 138 },
157 { 139 {
158 .ctl_name = XFS_BUF_AGE,
159 .procname = "age_buffer_centisecs", 140 .procname = "age_buffer_centisecs",
160 .data = &xfs_params.xfs_buf_age.val, 141 .data = &xfs_params.xfs_buf_age.val,
161 .maxlen = sizeof(int), 142 .maxlen = sizeof(int),
162 .mode = 0644, 143 .mode = 0644,
163 .proc_handler = &proc_dointvec_minmax, 144 .proc_handler = proc_dointvec_minmax,
164 .strategy = &sysctl_intvec,
165 .extra1 = &xfs_params.xfs_buf_age.min, 145 .extra1 = &xfs_params.xfs_buf_age.min,
166 .extra2 = &xfs_params.xfs_buf_age.max 146 .extra2 = &xfs_params.xfs_buf_age.max
167 }, 147 },
168 { 148 {
169 .ctl_name = XFS_INHERIT_NOSYM,
170 .procname = "inherit_nosymlinks", 149 .procname = "inherit_nosymlinks",
171 .data = &xfs_params.inherit_nosym.val, 150 .data = &xfs_params.inherit_nosym.val,
172 .maxlen = sizeof(int), 151 .maxlen = sizeof(int),
173 .mode = 0644, 152 .mode = 0644,
174 .proc_handler = &proc_dointvec_minmax, 153 .proc_handler = proc_dointvec_minmax,
175 .strategy = &sysctl_intvec,
176 .extra1 = &xfs_params.inherit_nosym.min, 154 .extra1 = &xfs_params.inherit_nosym.min,
177 .extra2 = &xfs_params.inherit_nosym.max 155 .extra2 = &xfs_params.inherit_nosym.max
178 }, 156 },
179 { 157 {
180 .ctl_name = XFS_ROTORSTEP,
181 .procname = "rotorstep", 158 .procname = "rotorstep",
182 .data = &xfs_params.rotorstep.val, 159 .data = &xfs_params.rotorstep.val,
183 .maxlen = sizeof(int), 160 .maxlen = sizeof(int),
184 .mode = 0644, 161 .mode = 0644,
185 .proc_handler = &proc_dointvec_minmax, 162 .proc_handler = proc_dointvec_minmax,
186 .strategy = &sysctl_intvec,
187 .extra1 = &xfs_params.rotorstep.min, 163 .extra1 = &xfs_params.rotorstep.min,
188 .extra2 = &xfs_params.rotorstep.max 164 .extra2 = &xfs_params.rotorstep.max
189 }, 165 },
190 { 166 {
191 .ctl_name = XFS_INHERIT_NODFRG,
192 .procname = "inherit_nodefrag", 167 .procname = "inherit_nodefrag",
193 .data = &xfs_params.inherit_nodfrg.val, 168 .data = &xfs_params.inherit_nodfrg.val,
194 .maxlen = sizeof(int), 169 .maxlen = sizeof(int),
195 .mode = 0644, 170 .mode = 0644,
196 .proc_handler = &proc_dointvec_minmax, 171 .proc_handler = proc_dointvec_minmax,
197 .strategy = &sysctl_intvec,
198 .extra1 = &xfs_params.inherit_nodfrg.min, 172 .extra1 = &xfs_params.inherit_nodfrg.min,
199 .extra2 = &xfs_params.inherit_nodfrg.max 173 .extra2 = &xfs_params.inherit_nodfrg.max
200 }, 174 },
201 { 175 {
202 .ctl_name = XFS_FILESTREAM_TIMER,
203 .procname = "filestream_centisecs", 176 .procname = "filestream_centisecs",
204 .data = &xfs_params.fstrm_timer.val, 177 .data = &xfs_params.fstrm_timer.val,
205 .maxlen = sizeof(int), 178 .maxlen = sizeof(int),
206 .mode = 0644, 179 .mode = 0644,
207 .proc_handler = &proc_dointvec_minmax, 180 .proc_handler = proc_dointvec_minmax,
208 .strategy = &sysctl_intvec,
209 .extra1 = &xfs_params.fstrm_timer.min, 181 .extra1 = &xfs_params.fstrm_timer.min,
210 .extra2 = &xfs_params.fstrm_timer.max, 182 .extra2 = &xfs_params.fstrm_timer.max,
211 }, 183 },
212 /* please keep this the last entry */ 184 /* please keep this the last entry */
213#ifdef CONFIG_PROC_FS 185#ifdef CONFIG_PROC_FS
214 { 186 {
215 .ctl_name = XFS_STATS_CLEAR,
216 .procname = "stats_clear", 187 .procname = "stats_clear",
217 .data = &xfs_params.stats_clear.val, 188 .data = &xfs_params.stats_clear.val,
218 .maxlen = sizeof(int), 189 .maxlen = sizeof(int),
219 .mode = 0644, 190 .mode = 0644,
220 .proc_handler = &xfs_stats_clear_proc_handler, 191 .proc_handler = xfs_stats_clear_proc_handler,
221 .strategy = &sysctl_intvec,
222 .extra1 = &xfs_params.stats_clear.min, 192 .extra1 = &xfs_params.stats_clear.min,
223 .extra2 = &xfs_params.stats_clear.max 193 .extra2 = &xfs_params.stats_clear.max
224 }, 194 },
@@ -229,7 +199,6 @@ static ctl_table xfs_table[] = {
229 199
230static ctl_table xfs_dir_table[] = { 200static ctl_table xfs_dir_table[] = {
231 { 201 {
232 .ctl_name = FS_XFS,
233 .procname = "xfs", 202 .procname = "xfs",
234 .mode = 0555, 203 .mode = 0555,
235 .child = xfs_table 204 .child = xfs_table
@@ -239,7 +208,6 @@ static ctl_table xfs_dir_table[] = {
239 208
240static ctl_table xfs_root_table[] = { 209static ctl_table xfs_root_table[] = {
241 { 210 {
242 .ctl_name = CTL_FS,
243 .procname = "fs", 211 .procname = "fs",
244 .mode = 0555, 212 .mode = 0555,
245 .child = xfs_dir_table 213 .child = xfs_dir_table
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
new file mode 100644
index 000000000000..5a107601e969
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -0,0 +1,59 @@
1/*
2 * Copyright (c) 2009, Christoph Hellwig
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h"
32#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dinode.h"
35#include "xfs_inode.h"
36#include "xfs_btree.h"
37#include "xfs_dmapi.h"
38#include "xfs_mount.h"
39#include "xfs_ialloc.h"
40#include "xfs_itable.h"
41#include "xfs_alloc.h"
42#include "xfs_bmap.h"
43#include "xfs_attr.h"
44#include "xfs_attr_sf.h"
45#include "xfs_attr_leaf.h"
46#include "xfs_log_priv.h"
47#include "xfs_buf_item.h"
48#include "xfs_quota.h"
49#include "xfs_iomap.h"
50#include "xfs_aops.h"
51#include "quota/xfs_dquot_item.h"
52#include "quota/xfs_dquot.h"
53
54/*
55 * We include this last to have the helpers above available for the trace
56 * event implementations.
57 */
58#define CREATE_TRACE_POINTS
59#include "xfs_trace.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
new file mode 100644
index 000000000000..fcaa62f0799e
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -0,0 +1,1503 @@
1/*
2 * Copyright (c) 2009, Christoph Hellwig
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#undef TRACE_SYSTEM
19#define TRACE_SYSTEM xfs
20
21#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
22#define _TRACE_XFS_H
23
24#include <linux/tracepoint.h>
25
26struct xfs_agf;
27struct xfs_alloc_arg;
28struct xfs_attr_list_context;
29struct xfs_buf_log_item;
30struct xfs_da_args;
31struct xfs_da_node_entry;
32struct xfs_dquot;
33struct xlog_ticket;
34struct log;
35
36DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx),
38 TP_ARGS(ctx),
39 TP_STRUCT__entry(
40 __field(dev_t, dev)
41 __field(xfs_ino_t, ino)
42 __field(u32, hashval)
43 __field(u32, blkno)
44 __field(u32, offset)
45 __field(void *, alist)
46 __field(int, bufsize)
47 __field(int, count)
48 __field(int, firstu)
49 __field(int, dupcnt)
50 __field(int, flags)
51 ),
52 TP_fast_assign(
53 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
54 __entry->ino = ctx->dp->i_ino;
55 __entry->hashval = ctx->cursor->hashval;
56 __entry->blkno = ctx->cursor->blkno;
57 __entry->offset = ctx->cursor->offset;
58 __entry->alist = ctx->alist;
59 __entry->bufsize = ctx->bufsize;
60 __entry->count = ctx->count;
61 __entry->firstu = ctx->firstu;
62 __entry->flags = ctx->flags;
63 ),
64 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
65 "alist 0x%p size %u count %u firstu %u flags %d %s",
66 MAJOR(__entry->dev), MINOR(__entry->dev),
67 __entry->ino,
68 __entry->hashval,
69 __entry->blkno,
70 __entry->offset,
71 __entry->dupcnt,
72 __entry->alist,
73 __entry->bufsize,
74 __entry->count,
75 __entry->firstu,
76 __entry->flags,
77 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
78 )
79)
80
81#define DEFINE_PERAG_REF_EVENT(name) \
82TRACE_EVENT(name, \
83 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
84 unsigned long caller_ip), \
85 TP_ARGS(mp, agno, refcount, caller_ip), \
86 TP_STRUCT__entry( \
87 __field(dev_t, dev) \
88 __field(xfs_agnumber_t, agno) \
89 __field(int, refcount) \
90 __field(unsigned long, caller_ip) \
91 ), \
92 TP_fast_assign( \
93 __entry->dev = mp->m_super->s_dev; \
94 __entry->agno = agno; \
95 __entry->refcount = refcount; \
96 __entry->caller_ip = caller_ip; \
97 ), \
98 TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
99 MAJOR(__entry->dev), MINOR(__entry->dev), \
100 __entry->agno, \
101 __entry->refcount, \
102 (char *)__entry->caller_ip) \
103);
104
105DEFINE_PERAG_REF_EVENT(xfs_perag_get)
106DEFINE_PERAG_REF_EVENT(xfs_perag_put)
107
108#define DEFINE_ATTR_LIST_EVENT(name) \
109DEFINE_EVENT(xfs_attr_list_class, name, \
110 TP_PROTO(struct xfs_attr_list_context *ctx), \
111 TP_ARGS(ctx))
112DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
113DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
114DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
115DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
116DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
117DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
118DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
119DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
120
121TRACE_EVENT(xfs_attr_list_node_descend,
122 TP_PROTO(struct xfs_attr_list_context *ctx,
123 struct xfs_da_node_entry *btree),
124 TP_ARGS(ctx, btree),
125 TP_STRUCT__entry(
126 __field(dev_t, dev)
127 __field(xfs_ino_t, ino)
128 __field(u32, hashval)
129 __field(u32, blkno)
130 __field(u32, offset)
131 __field(void *, alist)
132 __field(int, bufsize)
133 __field(int, count)
134 __field(int, firstu)
135 __field(int, dupcnt)
136 __field(int, flags)
137 __field(u32, bt_hashval)
138 __field(u32, bt_before)
139 ),
140 TP_fast_assign(
141 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
142 __entry->ino = ctx->dp->i_ino;
143 __entry->hashval = ctx->cursor->hashval;
144 __entry->blkno = ctx->cursor->blkno;
145 __entry->offset = ctx->cursor->offset;
146 __entry->alist = ctx->alist;
147 __entry->bufsize = ctx->bufsize;
148 __entry->count = ctx->count;
149 __entry->firstu = ctx->firstu;
150 __entry->flags = ctx->flags;
151 __entry->bt_hashval = be32_to_cpu(btree->hashval);
152 __entry->bt_before = be32_to_cpu(btree->before);
153 ),
154 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
155 "alist 0x%p size %u count %u firstu %u flags %d %s "
156 "node hashval %u, node before %u",
157 MAJOR(__entry->dev), MINOR(__entry->dev),
158 __entry->ino,
159 __entry->hashval,
160 __entry->blkno,
161 __entry->offset,
162 __entry->dupcnt,
163 __entry->alist,
164 __entry->bufsize,
165 __entry->count,
166 __entry->firstu,
167 __entry->flags,
168 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
169 __entry->bt_hashval,
170 __entry->bt_before)
171);
172
173TRACE_EVENT(xfs_iext_insert,
174 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
175 struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
176 TP_ARGS(ip, idx, r, state, caller_ip),
177 TP_STRUCT__entry(
178 __field(dev_t, dev)
179 __field(xfs_ino_t, ino)
180 __field(xfs_extnum_t, idx)
181 __field(xfs_fileoff_t, startoff)
182 __field(xfs_fsblock_t, startblock)
183 __field(xfs_filblks_t, blockcount)
184 __field(xfs_exntst_t, state)
185 __field(int, bmap_state)
186 __field(unsigned long, caller_ip)
187 ),
188 TP_fast_assign(
189 __entry->dev = VFS_I(ip)->i_sb->s_dev;
190 __entry->ino = ip->i_ino;
191 __entry->idx = idx;
192 __entry->startoff = r->br_startoff;
193 __entry->startblock = r->br_startblock;
194 __entry->blockcount = r->br_blockcount;
195 __entry->state = r->br_state;
196 __entry->bmap_state = state;
197 __entry->caller_ip = caller_ip;
198 ),
199 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
200 "offset %lld block %lld count %lld flag %d caller %pf",
201 MAJOR(__entry->dev), MINOR(__entry->dev),
202 __entry->ino,
203 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
204 (long)__entry->idx,
205 __entry->startoff,
206 (__int64_t)__entry->startblock,
207 __entry->blockcount,
208 __entry->state,
209 (char *)__entry->caller_ip)
210);
211
212DECLARE_EVENT_CLASS(xfs_bmap_class,
213 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
214 unsigned long caller_ip),
215 TP_ARGS(ip, idx, state, caller_ip),
216 TP_STRUCT__entry(
217 __field(dev_t, dev)
218 __field(xfs_ino_t, ino)
219 __field(xfs_extnum_t, idx)
220 __field(xfs_fileoff_t, startoff)
221 __field(xfs_fsblock_t, startblock)
222 __field(xfs_filblks_t, blockcount)
223 __field(xfs_exntst_t, state)
224 __field(int, bmap_state)
225 __field(unsigned long, caller_ip)
226 ),
227 TP_fast_assign(
228 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
229 ip->i_afp : &ip->i_df;
230 struct xfs_bmbt_irec r;
231
232 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
233 __entry->dev = VFS_I(ip)->i_sb->s_dev;
234 __entry->ino = ip->i_ino;
235 __entry->idx = idx;
236 __entry->startoff = r.br_startoff;
237 __entry->startblock = r.br_startblock;
238 __entry->blockcount = r.br_blockcount;
239 __entry->state = r.br_state;
240 __entry->bmap_state = state;
241 __entry->caller_ip = caller_ip;
242 ),
243 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
244 "offset %lld block %lld count %lld flag %d caller %pf",
245 MAJOR(__entry->dev), MINOR(__entry->dev),
246 __entry->ino,
247 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
248 (long)__entry->idx,
249 __entry->startoff,
250 (__int64_t)__entry->startblock,
251 __entry->blockcount,
252 __entry->state,
253 (char *)__entry->caller_ip)
254)
255
256#define DEFINE_BMAP_EVENT(name) \
257DEFINE_EVENT(xfs_bmap_class, name, \
258 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
259 unsigned long caller_ip), \
260 TP_ARGS(ip, idx, state, caller_ip))
261DEFINE_BMAP_EVENT(xfs_iext_remove);
262DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
263DEFINE_BMAP_EVENT(xfs_bmap_post_update);
264DEFINE_BMAP_EVENT(xfs_extlist);
265
266DECLARE_EVENT_CLASS(xfs_buf_class,
267 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
268 TP_ARGS(bp, caller_ip),
269 TP_STRUCT__entry(
270 __field(dev_t, dev)
271 __field(xfs_daddr_t, bno)
272 __field(size_t, buffer_length)
273 __field(int, hold)
274 __field(int, pincount)
275 __field(unsigned, lockval)
276 __field(unsigned, flags)
277 __field(unsigned long, caller_ip)
278 ),
279 TP_fast_assign(
280 __entry->dev = bp->b_target->bt_dev;
281 __entry->bno = bp->b_bn;
282 __entry->buffer_length = bp->b_buffer_length;
283 __entry->hold = atomic_read(&bp->b_hold);
284 __entry->pincount = atomic_read(&bp->b_pin_count);
285 __entry->lockval = xfs_buf_lock_value(bp);
286 __entry->flags = bp->b_flags;
287 __entry->caller_ip = caller_ip;
288 ),
289 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
290 "lock %d flags %s caller %pf",
291 MAJOR(__entry->dev), MINOR(__entry->dev),
292 (unsigned long long)__entry->bno,
293 __entry->buffer_length,
294 __entry->hold,
295 __entry->pincount,
296 __entry->lockval,
297 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
298 (void *)__entry->caller_ip)
299)
300
301#define DEFINE_BUF_EVENT(name) \
302DEFINE_EVENT(xfs_buf_class, name, \
303 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
304 TP_ARGS(bp, caller_ip))
305DEFINE_BUF_EVENT(xfs_buf_init);
306DEFINE_BUF_EVENT(xfs_buf_free);
307DEFINE_BUF_EVENT(xfs_buf_hold);
308DEFINE_BUF_EVENT(xfs_buf_rele);
309DEFINE_BUF_EVENT(xfs_buf_pin);
310DEFINE_BUF_EVENT(xfs_buf_unpin);
311DEFINE_BUF_EVENT(xfs_buf_iodone);
312DEFINE_BUF_EVENT(xfs_buf_iorequest);
313DEFINE_BUF_EVENT(xfs_buf_bawrite);
314DEFINE_BUF_EVENT(xfs_buf_bdwrite);
315DEFINE_BUF_EVENT(xfs_buf_lock);
316DEFINE_BUF_EVENT(xfs_buf_lock_done);
317DEFINE_BUF_EVENT(xfs_buf_cond_lock);
318DEFINE_BUF_EVENT(xfs_buf_unlock);
319DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
320DEFINE_BUF_EVENT(xfs_buf_iowait);
321DEFINE_BUF_EVENT(xfs_buf_iowait_done);
322DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
323DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
324DEFINE_BUF_EVENT(xfs_buf_delwri_split);
325DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
326DEFINE_BUF_EVENT(xfs_bdstrat_shut);
327DEFINE_BUF_EVENT(xfs_buf_item_relse);
328DEFINE_BUF_EVENT(xfs_buf_item_iodone);
329DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
330DEFINE_BUF_EVENT(xfs_buf_error_relse);
331DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
332DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
333
334/* not really buffer traces, but the buf provides useful information */
335DEFINE_BUF_EVENT(xfs_btree_corrupt);
336DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
337DEFINE_BUF_EVENT(xfs_reset_dqcounts);
338DEFINE_BUF_EVENT(xfs_inode_item_push);
339
340/* pass flags explicitly */
341DECLARE_EVENT_CLASS(xfs_buf_flags_class,
342 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
343 TP_ARGS(bp, flags, caller_ip),
344 TP_STRUCT__entry(
345 __field(dev_t, dev)
346 __field(xfs_daddr_t, bno)
347 __field(size_t, buffer_length)
348 __field(int, hold)
349 __field(int, pincount)
350 __field(unsigned, lockval)
351 __field(unsigned, flags)
352 __field(unsigned long, caller_ip)
353 ),
354 TP_fast_assign(
355 __entry->dev = bp->b_target->bt_dev;
356 __entry->bno = bp->b_bn;
357 __entry->buffer_length = bp->b_buffer_length;
358 __entry->flags = flags;
359 __entry->hold = atomic_read(&bp->b_hold);
360 __entry->pincount = atomic_read(&bp->b_pin_count);
361 __entry->lockval = xfs_buf_lock_value(bp);
362 __entry->caller_ip = caller_ip;
363 ),
364 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
365 "lock %d flags %s caller %pf",
366 MAJOR(__entry->dev), MINOR(__entry->dev),
367 (unsigned long long)__entry->bno,
368 __entry->buffer_length,
369 __entry->hold,
370 __entry->pincount,
371 __entry->lockval,
372 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
373 (void *)__entry->caller_ip)
374)
375
376#define DEFINE_BUF_FLAGS_EVENT(name) \
377DEFINE_EVENT(xfs_buf_flags_class, name, \
378 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
379 TP_ARGS(bp, flags, caller_ip))
380DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
381DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
382DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
383
384TRACE_EVENT(xfs_buf_ioerror,
385 TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
386 TP_ARGS(bp, error, caller_ip),
387 TP_STRUCT__entry(
388 __field(dev_t, dev)
389 __field(xfs_daddr_t, bno)
390 __field(size_t, buffer_length)
391 __field(unsigned, flags)
392 __field(int, hold)
393 __field(int, pincount)
394 __field(unsigned, lockval)
395 __field(int, error)
396 __field(unsigned long, caller_ip)
397 ),
398 TP_fast_assign(
399 __entry->dev = bp->b_target->bt_dev;
400 __entry->bno = bp->b_bn;
401 __entry->buffer_length = bp->b_buffer_length;
402 __entry->hold = atomic_read(&bp->b_hold);
403 __entry->pincount = atomic_read(&bp->b_pin_count);
404 __entry->lockval = xfs_buf_lock_value(bp);
405 __entry->error = error;
406 __entry->flags = bp->b_flags;
407 __entry->caller_ip = caller_ip;
408 ),
409 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
410 "lock %d error %d flags %s caller %pf",
411 MAJOR(__entry->dev), MINOR(__entry->dev),
412 (unsigned long long)__entry->bno,
413 __entry->buffer_length,
414 __entry->hold,
415 __entry->pincount,
416 __entry->lockval,
417 __entry->error,
418 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
419 (void *)__entry->caller_ip)
420);
421
422DECLARE_EVENT_CLASS(xfs_buf_item_class,
423 TP_PROTO(struct xfs_buf_log_item *bip),
424 TP_ARGS(bip),
425 TP_STRUCT__entry(
426 __field(dev_t, dev)
427 __field(xfs_daddr_t, buf_bno)
428 __field(size_t, buf_len)
429 __field(int, buf_hold)
430 __field(int, buf_pincount)
431 __field(int, buf_lockval)
432 __field(unsigned, buf_flags)
433 __field(unsigned, bli_recur)
434 __field(int, bli_refcount)
435 __field(unsigned, bli_flags)
436 __field(void *, li_desc)
437 __field(unsigned, li_flags)
438 ),
439 TP_fast_assign(
440 __entry->dev = bip->bli_buf->b_target->bt_dev;
441 __entry->bli_flags = bip->bli_flags;
442 __entry->bli_recur = bip->bli_recur;
443 __entry->bli_refcount = atomic_read(&bip->bli_refcount);
444 __entry->buf_bno = bip->bli_buf->b_bn;
445 __entry->buf_len = bip->bli_buf->b_buffer_length;
446 __entry->buf_flags = bip->bli_buf->b_flags;
447 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
448 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
449 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
450 __entry->li_desc = bip->bli_item.li_desc;
451 __entry->li_flags = bip->bli_item.li_flags;
452 ),
453 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
454 "lock %d flags %s recur %d refcount %d bliflags %s "
455 "lidesc 0x%p liflags %s",
456 MAJOR(__entry->dev), MINOR(__entry->dev),
457 (unsigned long long)__entry->buf_bno,
458 __entry->buf_len,
459 __entry->buf_hold,
460 __entry->buf_pincount,
461 __entry->buf_lockval,
462 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
463 __entry->bli_recur,
464 __entry->bli_refcount,
465 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
466 __entry->li_desc,
467 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
468)
469
470#define DEFINE_BUF_ITEM_EVENT(name) \
471DEFINE_EVENT(xfs_buf_item_class, name, \
472 TP_PROTO(struct xfs_buf_log_item *bip), \
473 TP_ARGS(bip))
474DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
475DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
476DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
477DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
478DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
479DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
480DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
481DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
482DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
483DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
484DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
485DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
486DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
487DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
488DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
489DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
490DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
491DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
492DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
493DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
494DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
495DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
496DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
497DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
498DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
499
500DECLARE_EVENT_CLASS(xfs_lock_class,
501 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
502 unsigned long caller_ip),
503 TP_ARGS(ip, lock_flags, caller_ip),
504 TP_STRUCT__entry(
505 __field(dev_t, dev)
506 __field(xfs_ino_t, ino)
507 __field(int, lock_flags)
508 __field(unsigned long, caller_ip)
509 ),
510 TP_fast_assign(
511 __entry->dev = VFS_I(ip)->i_sb->s_dev;
512 __entry->ino = ip->i_ino;
513 __entry->lock_flags = lock_flags;
514 __entry->caller_ip = caller_ip;
515 ),
516 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
517 MAJOR(__entry->dev), MINOR(__entry->dev),
518 __entry->ino,
519 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
520 (void *)__entry->caller_ip)
521)
522
523#define DEFINE_LOCK_EVENT(name) \
524DEFINE_EVENT(xfs_lock_class, name, \
525 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
526 unsigned long caller_ip), \
527 TP_ARGS(ip, lock_flags, caller_ip))
528DEFINE_LOCK_EVENT(xfs_ilock);
529DEFINE_LOCK_EVENT(xfs_ilock_nowait);
530DEFINE_LOCK_EVENT(xfs_ilock_demote);
531DEFINE_LOCK_EVENT(xfs_iunlock);
532
533DECLARE_EVENT_CLASS(xfs_iget_class,
534 TP_PROTO(struct xfs_inode *ip),
535 TP_ARGS(ip),
536 TP_STRUCT__entry(
537 __field(dev_t, dev)
538 __field(xfs_ino_t, ino)
539 ),
540 TP_fast_assign(
541 __entry->dev = VFS_I(ip)->i_sb->s_dev;
542 __entry->ino = ip->i_ino;
543 ),
544 TP_printk("dev %d:%d ino 0x%llx",
545 MAJOR(__entry->dev), MINOR(__entry->dev),
546 __entry->ino)
547)
548
549#define DEFINE_IGET_EVENT(name) \
550DEFINE_EVENT(xfs_iget_class, name, \
551 TP_PROTO(struct xfs_inode *ip), \
552 TP_ARGS(ip))
553DEFINE_IGET_EVENT(xfs_iget_skip);
554DEFINE_IGET_EVENT(xfs_iget_reclaim);
555DEFINE_IGET_EVENT(xfs_iget_found);
556DEFINE_IGET_EVENT(xfs_iget_alloc);
557
558DECLARE_EVENT_CLASS(xfs_inode_class,
559 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
560 TP_ARGS(ip, caller_ip),
561 TP_STRUCT__entry(
562 __field(dev_t, dev)
563 __field(xfs_ino_t, ino)
564 __field(int, count)
565 __field(unsigned long, caller_ip)
566 ),
567 TP_fast_assign(
568 __entry->dev = VFS_I(ip)->i_sb->s_dev;
569 __entry->ino = ip->i_ino;
570 __entry->count = atomic_read(&VFS_I(ip)->i_count);
571 __entry->caller_ip = caller_ip;
572 ),
573 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
574 MAJOR(__entry->dev), MINOR(__entry->dev),
575 __entry->ino,
576 __entry->count,
577 (char *)__entry->caller_ip)
578)
579
580#define DEFINE_INODE_EVENT(name) \
581DEFINE_EVENT(xfs_inode_class, name, \
582 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
583 TP_ARGS(ip, caller_ip))
584DEFINE_INODE_EVENT(xfs_ihold);
585DEFINE_INODE_EVENT(xfs_irele);
586/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
587DEFINE_INODE_EVENT(xfs_inode);
588#define xfs_itrace_entry(ip) \
589 trace_xfs_inode(ip, _THIS_IP_)
590
591DECLARE_EVENT_CLASS(xfs_dquot_class,
592 TP_PROTO(struct xfs_dquot *dqp),
593 TP_ARGS(dqp),
594 TP_STRUCT__entry(
595 __field(dev_t, dev)
596 __field(u32, id)
597 __field(unsigned, flags)
598 __field(unsigned, nrefs)
599 __field(unsigned long long, res_bcount)
600 __field(unsigned long long, bcount)
601 __field(unsigned long long, icount)
602 __field(unsigned long long, blk_hardlimit)
603 __field(unsigned long long, blk_softlimit)
604 __field(unsigned long long, ino_hardlimit)
605 __field(unsigned long long, ino_softlimit)
606 ), \
607 TP_fast_assign(
608 __entry->dev = dqp->q_mount->m_super->s_dev;
609 __entry->id = be32_to_cpu(dqp->q_core.d_id);
610 __entry->flags = dqp->dq_flags;
611 __entry->nrefs = dqp->q_nrefs;
612 __entry->res_bcount = dqp->q_res_bcount;
613 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
614 __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
615 __entry->blk_hardlimit =
616 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
617 __entry->blk_softlimit =
618 be64_to_cpu(dqp->q_core.d_blk_softlimit);
619 __entry->ino_hardlimit =
620 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
621 __entry->ino_softlimit =
622 be64_to_cpu(dqp->q_core.d_ino_softlimit);
623 ),
624 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
625 "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
626 "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
627 MAJOR(__entry->dev), MINOR(__entry->dev),
628 __entry->id,
629 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
630 __entry->nrefs,
631 __entry->res_bcount,
632 __entry->bcount,
633 __entry->blk_hardlimit,
634 __entry->blk_softlimit,
635 __entry->icount,
636 __entry->ino_hardlimit,
637 __entry->ino_softlimit)
638)
639
640#define DEFINE_DQUOT_EVENT(name) \
641DEFINE_EVENT(xfs_dquot_class, name, \
642 TP_PROTO(struct xfs_dquot *dqp), \
643 TP_ARGS(dqp))
644DEFINE_DQUOT_EVENT(xfs_dqadjust);
645DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
646DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
647DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
648DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
649DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
650DEFINE_DQUOT_EVENT(xfs_dqattach_found);
651DEFINE_DQUOT_EVENT(xfs_dqattach_get);
652DEFINE_DQUOT_EVENT(xfs_dqinit);
653DEFINE_DQUOT_EVENT(xfs_dqreuse);
654DEFINE_DQUOT_EVENT(xfs_dqalloc);
655DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
656DEFINE_DQUOT_EVENT(xfs_dqread);
657DEFINE_DQUOT_EVENT(xfs_dqread_fail);
658DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
659DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
660DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
661DEFINE_DQUOT_EVENT(xfs_dqlookup_move);
662DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
663DEFINE_DQUOT_EVENT(xfs_dqget_hit);
664DEFINE_DQUOT_EVENT(xfs_dqget_miss);
665DEFINE_DQUOT_EVENT(xfs_dqput);
666DEFINE_DQUOT_EVENT(xfs_dqput_wait);
667DEFINE_DQUOT_EVENT(xfs_dqput_free);
668DEFINE_DQUOT_EVENT(xfs_dqrele);
669DEFINE_DQUOT_EVENT(xfs_dqflush);
670DEFINE_DQUOT_EVENT(xfs_dqflush_force);
671DEFINE_DQUOT_EVENT(xfs_dqflush_done);
672/* not really iget events, but we re-use the format */
673DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
674DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
675
676DECLARE_EVENT_CLASS(xfs_loggrant_class,
677 TP_PROTO(struct log *log, struct xlog_ticket *tic),
678 TP_ARGS(log, tic),
679 TP_STRUCT__entry(
680 __field(dev_t, dev)
681 __field(unsigned, trans_type)
682 __field(char, ocnt)
683 __field(char, cnt)
684 __field(int, curr_res)
685 __field(int, unit_res)
686 __field(unsigned int, flags)
687 __field(void *, reserve_headq)
688 __field(void *, write_headq)
689 __field(int, grant_reserve_cycle)
690 __field(int, grant_reserve_bytes)
691 __field(int, grant_write_cycle)
692 __field(int, grant_write_bytes)
693 __field(int, curr_cycle)
694 __field(int, curr_block)
695 __field(xfs_lsn_t, tail_lsn)
696 ),
697 TP_fast_assign(
698 __entry->dev = log->l_mp->m_super->s_dev;
699 __entry->trans_type = tic->t_trans_type;
700 __entry->ocnt = tic->t_ocnt;
701 __entry->cnt = tic->t_cnt;
702 __entry->curr_res = tic->t_curr_res;
703 __entry->unit_res = tic->t_unit_res;
704 __entry->flags = tic->t_flags;
705 __entry->reserve_headq = log->l_reserve_headq;
706 __entry->write_headq = log->l_write_headq;
707 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
708 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
709 __entry->grant_write_cycle = log->l_grant_write_cycle;
710 __entry->grant_write_bytes = log->l_grant_write_bytes;
711 __entry->curr_cycle = log->l_curr_cycle;
712 __entry->curr_block = log->l_curr_block;
713 __entry->tail_lsn = log->l_tail_lsn;
714 ),
715 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
716 "t_unit_res %u t_flags %s reserve_headq 0x%p "
717 "write_headq 0x%p grant_reserve_cycle %d "
718 "grant_reserve_bytes %d grant_write_cycle %d "
719 "grant_write_bytes %d curr_cycle %d curr_block %d "
720 "tail_cycle %d tail_block %d",
721 MAJOR(__entry->dev), MINOR(__entry->dev),
722 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
723 __entry->ocnt,
724 __entry->cnt,
725 __entry->curr_res,
726 __entry->unit_res,
727 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
728 __entry->reserve_headq,
729 __entry->write_headq,
730 __entry->grant_reserve_cycle,
731 __entry->grant_reserve_bytes,
732 __entry->grant_write_cycle,
733 __entry->grant_write_bytes,
734 __entry->curr_cycle,
735 __entry->curr_block,
736 CYCLE_LSN(__entry->tail_lsn),
737 BLOCK_LSN(__entry->tail_lsn)
738 )
739)
740
741#define DEFINE_LOGGRANT_EVENT(name) \
742DEFINE_EVENT(xfs_loggrant_class, name, \
743 TP_PROTO(struct log *log, struct xlog_ticket *tic), \
744 TP_ARGS(log, tic))
745DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
746DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
747DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
748DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
749DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
750DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
751DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
752DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
753DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
754DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
755DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
756DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
757DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
758DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
759DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
760DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
761DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
762DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
763DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
764DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
765DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
766DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
767DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
768DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
769
770#define DEFINE_RW_EVENT(name) \
771TRACE_EVENT(name, \
772 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
773 TP_ARGS(ip, count, offset, flags), \
774 TP_STRUCT__entry( \
775 __field(dev_t, dev) \
776 __field(xfs_ino_t, ino) \
777 __field(xfs_fsize_t, size) \
778 __field(xfs_fsize_t, new_size) \
779 __field(loff_t, offset) \
780 __field(size_t, count) \
781 __field(int, flags) \
782 ), \
783 TP_fast_assign( \
784 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
785 __entry->ino = ip->i_ino; \
786 __entry->size = ip->i_d.di_size; \
787 __entry->new_size = ip->i_new_size; \
788 __entry->offset = offset; \
789 __entry->count = count; \
790 __entry->flags = flags; \
791 ), \
792 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
793 "offset 0x%llx count 0x%zx ioflags %s", \
794 MAJOR(__entry->dev), MINOR(__entry->dev), \
795 __entry->ino, \
796 __entry->size, \
797 __entry->new_size, \
798 __entry->offset, \
799 __entry->count, \
800 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
801)
802DEFINE_RW_EVENT(xfs_file_read);
803DEFINE_RW_EVENT(xfs_file_buffered_write);
804DEFINE_RW_EVENT(xfs_file_direct_write);
805DEFINE_RW_EVENT(xfs_file_splice_read);
806DEFINE_RW_EVENT(xfs_file_splice_write);
807
808
809#define DEFINE_PAGE_EVENT(name) \
810TRACE_EVENT(name, \
811 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
812 TP_ARGS(inode, page, off), \
813 TP_STRUCT__entry( \
814 __field(dev_t, dev) \
815 __field(xfs_ino_t, ino) \
816 __field(pgoff_t, pgoff) \
817 __field(loff_t, size) \
818 __field(unsigned long, offset) \
819 __field(int, delalloc) \
820 __field(int, unmapped) \
821 __field(int, unwritten) \
822 ), \
823 TP_fast_assign( \
824 int delalloc = -1, unmapped = -1, unwritten = -1; \
825 \
826 if (page_has_buffers(page)) \
827 xfs_count_page_state(page, &delalloc, \
828 &unmapped, &unwritten); \
829 __entry->dev = inode->i_sb->s_dev; \
830 __entry->ino = XFS_I(inode)->i_ino; \
831 __entry->pgoff = page_offset(page); \
832 __entry->size = i_size_read(inode); \
833 __entry->offset = off; \
834 __entry->delalloc = delalloc; \
835 __entry->unmapped = unmapped; \
836 __entry->unwritten = unwritten; \
837 ), \
838 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \
839 "delalloc %d unmapped %d unwritten %d", \
840 MAJOR(__entry->dev), MINOR(__entry->dev), \
841 __entry->ino, \
842 __entry->pgoff, \
843 __entry->size, \
844 __entry->offset, \
845 __entry->delalloc, \
846 __entry->unmapped, \
847 __entry->unwritten) \
848)
849DEFINE_PAGE_EVENT(xfs_writepage);
850DEFINE_PAGE_EVENT(xfs_releasepage);
851DEFINE_PAGE_EVENT(xfs_invalidatepage);
852
853#define DEFINE_IOMAP_EVENT(name) \
854TRACE_EVENT(name, \
855 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
856 int flags, struct xfs_bmbt_irec *irec), \
857 TP_ARGS(ip, offset, count, flags, irec), \
858 TP_STRUCT__entry( \
859 __field(dev_t, dev) \
860 __field(xfs_ino_t, ino) \
861 __field(loff_t, size) \
862 __field(loff_t, new_size) \
863 __field(loff_t, offset) \
864 __field(size_t, count) \
865 __field(int, flags) \
866 __field(xfs_fileoff_t, startoff) \
867 __field(xfs_fsblock_t, startblock) \
868 __field(xfs_filblks_t, blockcount) \
869 ), \
870 TP_fast_assign( \
871 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
872 __entry->ino = ip->i_ino; \
873 __entry->size = ip->i_d.di_size; \
874 __entry->new_size = ip->i_new_size; \
875 __entry->offset = offset; \
876 __entry->count = count; \
877 __entry->flags = flags; \
878 __entry->startoff = irec ? irec->br_startoff : 0; \
879 __entry->startblock = irec ? irec->br_startblock : 0; \
880 __entry->blockcount = irec ? irec->br_blockcount : 0; \
881 ), \
882 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
883 "offset 0x%llx count %zd flags %s " \
884 "startoff 0x%llx startblock %lld blockcount 0x%llx", \
885 MAJOR(__entry->dev), MINOR(__entry->dev), \
886 __entry->ino, \
887 __entry->size, \
888 __entry->new_size, \
889 __entry->offset, \
890 __entry->count, \
891 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
892 __entry->startoff, \
893 (__int64_t)__entry->startblock, \
894 __entry->blockcount) \
895)
896DEFINE_IOMAP_EVENT(xfs_iomap_enter);
897DEFINE_IOMAP_EVENT(xfs_iomap_found);
898DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
899
900#define DEFINE_SIMPLE_IO_EVENT(name) \
901TRACE_EVENT(name, \
902 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
903 TP_ARGS(ip, offset, count), \
904 TP_STRUCT__entry( \
905 __field(dev_t, dev) \
906 __field(xfs_ino_t, ino) \
907 __field(loff_t, size) \
908 __field(loff_t, new_size) \
909 __field(loff_t, offset) \
910 __field(size_t, count) \
911 ), \
912 TP_fast_assign( \
913 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
914 __entry->ino = ip->i_ino; \
915 __entry->size = ip->i_d.di_size; \
916 __entry->new_size = ip->i_new_size; \
917 __entry->offset = offset; \
918 __entry->count = count; \
919 ), \
920 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
921 "offset 0x%llx count %zd", \
922 MAJOR(__entry->dev), MINOR(__entry->dev), \
923 __entry->ino, \
924 __entry->size, \
925 __entry->new_size, \
926 __entry->offset, \
927 __entry->count) \
928);
929DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
930DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
931
932
933TRACE_EVENT(xfs_itruncate_start,
934 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
935 xfs_off_t toss_start, xfs_off_t toss_finish),
936 TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
937 TP_STRUCT__entry(
938 __field(dev_t, dev)
939 __field(xfs_ino_t, ino)
940 __field(xfs_fsize_t, size)
941 __field(xfs_fsize_t, new_size)
942 __field(xfs_off_t, toss_start)
943 __field(xfs_off_t, toss_finish)
944 __field(int, flag)
945 ),
946 TP_fast_assign(
947 __entry->dev = VFS_I(ip)->i_sb->s_dev;
948 __entry->ino = ip->i_ino;
949 __entry->size = ip->i_d.di_size;
950 __entry->new_size = new_size;
951 __entry->toss_start = toss_start;
952 __entry->toss_finish = toss_finish;
953 __entry->flag = flag;
954 ),
955 TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
956 "toss start 0x%llx toss finish 0x%llx",
957 MAJOR(__entry->dev), MINOR(__entry->dev),
958 __entry->ino,
959 __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
960 __entry->size,
961 __entry->new_size,
962 __entry->toss_start,
963 __entry->toss_finish)
964);
965
966DECLARE_EVENT_CLASS(xfs_itrunc_class,
967 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
968 TP_ARGS(ip, new_size),
969 TP_STRUCT__entry(
970 __field(dev_t, dev)
971 __field(xfs_ino_t, ino)
972 __field(xfs_fsize_t, size)
973 __field(xfs_fsize_t, new_size)
974 ),
975 TP_fast_assign(
976 __entry->dev = VFS_I(ip)->i_sb->s_dev;
977 __entry->ino = ip->i_ino;
978 __entry->size = ip->i_d.di_size;
979 __entry->new_size = new_size;
980 ),
981 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
982 MAJOR(__entry->dev), MINOR(__entry->dev),
983 __entry->ino,
984 __entry->size,
985 __entry->new_size)
986)
987
988#define DEFINE_ITRUNC_EVENT(name) \
989DEFINE_EVENT(xfs_itrunc_class, name, \
990 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
991 TP_ARGS(ip, new_size))
992DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
993DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
994
995TRACE_EVENT(xfs_pagecache_inval,
996 TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
997 TP_ARGS(ip, start, finish),
998 TP_STRUCT__entry(
999 __field(dev_t, dev)
1000 __field(xfs_ino_t, ino)
1001 __field(xfs_fsize_t, size)
1002 __field(xfs_off_t, start)
1003 __field(xfs_off_t, finish)
1004 ),
1005 TP_fast_assign(
1006 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1007 __entry->ino = ip->i_ino;
1008 __entry->size = ip->i_d.di_size;
1009 __entry->start = start;
1010 __entry->finish = finish;
1011 ),
1012 TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
1013 MAJOR(__entry->dev), MINOR(__entry->dev),
1014 __entry->ino,
1015 __entry->size,
1016 __entry->start,
1017 __entry->finish)
1018);
1019
1020TRACE_EVENT(xfs_bunmap,
1021 TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
1022 int flags, unsigned long caller_ip),
1023 TP_ARGS(ip, bno, len, flags, caller_ip),
1024 TP_STRUCT__entry(
1025 __field(dev_t, dev)
1026 __field(xfs_ino_t, ino)
1027 __field(xfs_fsize_t, size)
1028 __field(xfs_fileoff_t, bno)
1029 __field(xfs_filblks_t, len)
1030 __field(unsigned long, caller_ip)
1031 __field(int, flags)
1032 ),
1033 TP_fast_assign(
1034 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1035 __entry->ino = ip->i_ino;
1036 __entry->size = ip->i_d.di_size;
1037 __entry->bno = bno;
1038 __entry->len = len;
1039 __entry->caller_ip = caller_ip;
1040 __entry->flags = flags;
1041 ),
1042 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
1043 "flags %s caller %pf",
1044 MAJOR(__entry->dev), MINOR(__entry->dev),
1045 __entry->ino,
1046 __entry->size,
1047 __entry->bno,
1048 __entry->len,
1049 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
1050 (void *)__entry->caller_ip)
1051
1052);
1053
1054TRACE_EVENT(xfs_alloc_busy,
1055 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1056 xfs_extlen_t len, int slot),
1057 TP_ARGS(mp, agno, agbno, len, slot),
1058 TP_STRUCT__entry(
1059 __field(dev_t, dev)
1060 __field(xfs_agnumber_t, agno)
1061 __field(xfs_agblock_t, agbno)
1062 __field(xfs_extlen_t, len)
1063 __field(int, slot)
1064 ),
1065 TP_fast_assign(
1066 __entry->dev = mp->m_super->s_dev;
1067 __entry->agno = agno;
1068 __entry->agbno = agbno;
1069 __entry->len = len;
1070 __entry->slot = slot;
1071 ),
1072 TP_printk("dev %d:%d agno %u agbno %u len %u slot %d",
1073 MAJOR(__entry->dev), MINOR(__entry->dev),
1074 __entry->agno,
1075 __entry->agbno,
1076 __entry->len,
1077 __entry->slot)
1078
1079);
1080
1081#define XFS_BUSY_STATES \
1082 { 0, "found" }, \
1083 { 1, "missing" }
1084
1085TRACE_EVENT(xfs_alloc_unbusy,
1086 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1087 int slot, int found),
1088 TP_ARGS(mp, agno, slot, found),
1089 TP_STRUCT__entry(
1090 __field(dev_t, dev)
1091 __field(xfs_agnumber_t, agno)
1092 __field(int, slot)
1093 __field(int, found)
1094 ),
1095 TP_fast_assign(
1096 __entry->dev = mp->m_super->s_dev;
1097 __entry->agno = agno;
1098 __entry->slot = slot;
1099 __entry->found = found;
1100 ),
1101 TP_printk("dev %d:%d agno %u slot %d %s",
1102 MAJOR(__entry->dev), MINOR(__entry->dev),
1103 __entry->agno,
1104 __entry->slot,
1105 __print_symbolic(__entry->found, XFS_BUSY_STATES))
1106);
1107
1108TRACE_EVENT(xfs_alloc_busysearch,
1109 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1110 xfs_extlen_t len, xfs_lsn_t lsn),
1111 TP_ARGS(mp, agno, agbno, len, lsn),
1112 TP_STRUCT__entry(
1113 __field(dev_t, dev)
1114 __field(xfs_agnumber_t, agno)
1115 __field(xfs_agblock_t, agbno)
1116 __field(xfs_extlen_t, len)
1117 __field(xfs_lsn_t, lsn)
1118 ),
1119 TP_fast_assign(
1120 __entry->dev = mp->m_super->s_dev;
1121 __entry->agno = agno;
1122 __entry->agbno = agbno;
1123 __entry->len = len;
1124 __entry->lsn = lsn;
1125 ),
1126 TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
1127 MAJOR(__entry->dev), MINOR(__entry->dev),
1128 __entry->agno,
1129 __entry->agbno,
1130 __entry->len,
1131 __entry->lsn)
1132);
1133
1134TRACE_EVENT(xfs_agf,
1135 TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
1136 unsigned long caller_ip),
1137 TP_ARGS(mp, agf, flags, caller_ip),
1138 TP_STRUCT__entry(
1139 __field(dev_t, dev)
1140 __field(xfs_agnumber_t, agno)
1141 __field(int, flags)
1142 __field(__u32, length)
1143 __field(__u32, bno_root)
1144 __field(__u32, cnt_root)
1145 __field(__u32, bno_level)
1146 __field(__u32, cnt_level)
1147 __field(__u32, flfirst)
1148 __field(__u32, fllast)
1149 __field(__u32, flcount)
1150 __field(__u32, freeblks)
1151 __field(__u32, longest)
1152 __field(unsigned long, caller_ip)
1153 ),
1154 TP_fast_assign(
1155 __entry->dev = mp->m_super->s_dev;
1156 __entry->agno = be32_to_cpu(agf->agf_seqno),
1157 __entry->flags = flags;
1158 __entry->length = be32_to_cpu(agf->agf_length),
1159 __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
1160 __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
1161 __entry->bno_level =
1162 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
1163 __entry->cnt_level =
1164 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
1165 __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
1166 __entry->fllast = be32_to_cpu(agf->agf_fllast),
1167 __entry->flcount = be32_to_cpu(agf->agf_flcount),
1168 __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
1169 __entry->longest = be32_to_cpu(agf->agf_longest);
1170 __entry->caller_ip = caller_ip;
1171 ),
1172 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
1173 "levels b %u c %u flfirst %u fllast %u flcount %u "
1174 "freeblks %u longest %u caller %pf",
1175 MAJOR(__entry->dev), MINOR(__entry->dev),
1176 __entry->agno,
1177 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
1178 __entry->length,
1179 __entry->bno_root,
1180 __entry->cnt_root,
1181 __entry->bno_level,
1182 __entry->cnt_level,
1183 __entry->flfirst,
1184 __entry->fllast,
1185 __entry->flcount,
1186 __entry->freeblks,
1187 __entry->longest,
1188 (void *)__entry->caller_ip)
1189);
1190
1191TRACE_EVENT(xfs_free_extent,
1192 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1193 xfs_extlen_t len, bool isfl, int haveleft, int haveright),
1194 TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
1195 TP_STRUCT__entry(
1196 __field(dev_t, dev)
1197 __field(xfs_agnumber_t, agno)
1198 __field(xfs_agblock_t, agbno)
1199 __field(xfs_extlen_t, len)
1200 __field(int, isfl)
1201 __field(int, haveleft)
1202 __field(int, haveright)
1203 ),
1204 TP_fast_assign(
1205 __entry->dev = mp->m_super->s_dev;
1206 __entry->agno = agno;
1207 __entry->agbno = agbno;
1208 __entry->len = len;
1209 __entry->isfl = isfl;
1210 __entry->haveleft = haveleft;
1211 __entry->haveright = haveright;
1212 ),
1213 TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
1214 MAJOR(__entry->dev), MINOR(__entry->dev),
1215 __entry->agno,
1216 __entry->agbno,
1217 __entry->len,
1218 __entry->isfl,
1219 __entry->haveleft ?
1220 (__entry->haveright ? "both" : "left") :
1221 (__entry->haveright ? "right" : "none"))
1222
1223);
1224
1225DECLARE_EVENT_CLASS(xfs_alloc_class,
1226 TP_PROTO(struct xfs_alloc_arg *args),
1227 TP_ARGS(args),
1228 TP_STRUCT__entry(
1229 __field(dev_t, dev)
1230 __field(xfs_agnumber_t, agno)
1231 __field(xfs_agblock_t, agbno)
1232 __field(xfs_extlen_t, minlen)
1233 __field(xfs_extlen_t, maxlen)
1234 __field(xfs_extlen_t, mod)
1235 __field(xfs_extlen_t, prod)
1236 __field(xfs_extlen_t, minleft)
1237 __field(xfs_extlen_t, total)
1238 __field(xfs_extlen_t, alignment)
1239 __field(xfs_extlen_t, minalignslop)
1240 __field(xfs_extlen_t, len)
1241 __field(short, type)
1242 __field(short, otype)
1243 __field(char, wasdel)
1244 __field(char, wasfromfl)
1245 __field(char, isfl)
1246 __field(char, userdata)
1247 __field(xfs_fsblock_t, firstblock)
1248 ),
1249 TP_fast_assign(
1250 __entry->dev = args->mp->m_super->s_dev;
1251 __entry->agno = args->agno;
1252 __entry->agbno = args->agbno;
1253 __entry->minlen = args->minlen;
1254 __entry->maxlen = args->maxlen;
1255 __entry->mod = args->mod;
1256 __entry->prod = args->prod;
1257 __entry->minleft = args->minleft;
1258 __entry->total = args->total;
1259 __entry->alignment = args->alignment;
1260 __entry->minalignslop = args->minalignslop;
1261 __entry->len = args->len;
1262 __entry->type = args->type;
1263 __entry->otype = args->otype;
1264 __entry->wasdel = args->wasdel;
1265 __entry->wasfromfl = args->wasfromfl;
1266 __entry->isfl = args->isfl;
1267 __entry->userdata = args->userdata;
1268 __entry->firstblock = args->firstblock;
1269 ),
1270 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
1271 "prod %u minleft %u total %u alignment %u minalignslop %u "
1272 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
1273 "userdata %d firstblock 0x%llx",
1274 MAJOR(__entry->dev), MINOR(__entry->dev),
1275 __entry->agno,
1276 __entry->agbno,
1277 __entry->minlen,
1278 __entry->maxlen,
1279 __entry->mod,
1280 __entry->prod,
1281 __entry->minleft,
1282 __entry->total,
1283 __entry->alignment,
1284 __entry->minalignslop,
1285 __entry->len,
1286 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
1287 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
1288 __entry->wasdel,
1289 __entry->wasfromfl,
1290 __entry->isfl,
1291 __entry->userdata,
1292 __entry->firstblock)
1293)
1294
1295#define DEFINE_ALLOC_EVENT(name) \
1296DEFINE_EVENT(xfs_alloc_class, name, \
1297 TP_PROTO(struct xfs_alloc_arg *args), \
1298 TP_ARGS(args))
1299DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1300DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1301DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
1302DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
1303DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
1304DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
1305DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
1306DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
1307DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
1308DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
1309DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
1310DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
1311DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
1312DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
1313DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
1314DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
1315DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
1316DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
1317DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
1318DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1319DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1320
1321DECLARE_EVENT_CLASS(xfs_dir2_class,
1322 TP_PROTO(struct xfs_da_args *args),
1323 TP_ARGS(args),
1324 TP_STRUCT__entry(
1325 __field(dev_t, dev)
1326 __field(xfs_ino_t, ino)
1327 __dynamic_array(char, name, args->namelen)
1328 __field(int, namelen)
1329 __field(xfs_dahash_t, hashval)
1330 __field(xfs_ino_t, inumber)
1331 __field(int, op_flags)
1332 ),
1333 TP_fast_assign(
1334 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1335 __entry->ino = args->dp->i_ino;
1336 if (args->namelen)
1337 memcpy(__get_str(name), args->name, args->namelen);
1338 __entry->namelen = args->namelen;
1339 __entry->hashval = args->hashval;
1340 __entry->inumber = args->inumber;
1341 __entry->op_flags = args->op_flags;
1342 ),
1343 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
1344 "inumber 0x%llx op_flags %s",
1345 MAJOR(__entry->dev), MINOR(__entry->dev),
1346 __entry->ino,
1347 __entry->namelen,
1348 __entry->namelen ? __get_str(name) : NULL,
1349 __entry->namelen,
1350 __entry->hashval,
1351 __entry->inumber,
1352 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
1353)
1354
1355#define DEFINE_DIR2_EVENT(name) \
1356DEFINE_EVENT(xfs_dir2_class, name, \
1357 TP_PROTO(struct xfs_da_args *args), \
1358 TP_ARGS(args))
1359DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
1360DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
1361DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
1362DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
1363DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
1364DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
1365DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
1366DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
1367DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
1368DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
1369DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
1370DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
1371DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
1372DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
1373DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
1374DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
1375DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
1376DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
1377DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
1378DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
1379DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
1380DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
1381DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1382DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1383DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
1384
1385DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1386 TP_PROTO(struct xfs_da_args *args, int idx),
1387 TP_ARGS(args, idx),
1388 TP_STRUCT__entry(
1389 __field(dev_t, dev)
1390 __field(xfs_ino_t, ino)
1391 __field(int, op_flags)
1392 __field(int, idx)
1393 ),
1394 TP_fast_assign(
1395 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1396 __entry->ino = args->dp->i_ino;
1397 __entry->op_flags = args->op_flags;
1398 __entry->idx = idx;
1399 ),
1400 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
1401 MAJOR(__entry->dev), MINOR(__entry->dev),
1402 __entry->ino,
1403 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1404 __entry->idx)
1405)
1406
1407#define DEFINE_DIR2_SPACE_EVENT(name) \
1408DEFINE_EVENT(xfs_dir2_space_class, name, \
1409 TP_PROTO(struct xfs_da_args *args, int idx), \
1410 TP_ARGS(args, idx))
1411DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
1412DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
1413DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
1414DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
1415
1416TRACE_EVENT(xfs_dir2_leafn_moveents,
1417 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
1418 TP_ARGS(args, src_idx, dst_idx, count),
1419 TP_STRUCT__entry(
1420 __field(dev_t, dev)
1421 __field(xfs_ino_t, ino)
1422 __field(int, op_flags)
1423 __field(int, src_idx)
1424 __field(int, dst_idx)
1425 __field(int, count)
1426 ),
1427 TP_fast_assign(
1428 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1429 __entry->ino = args->dp->i_ino;
1430 __entry->op_flags = args->op_flags;
1431 __entry->src_idx = src_idx;
1432 __entry->dst_idx = dst_idx;
1433 __entry->count = count;
1434 ),
1435 TP_printk("dev %d:%d ino 0x%llx op_flags %s "
1436 "src_idx %d dst_idx %d count %d",
1437 MAJOR(__entry->dev), MINOR(__entry->dev),
1438 __entry->ino,
1439 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1440 __entry->src_idx,
1441 __entry->dst_idx,
1442 __entry->count)
1443);
1444
1445#define XFS_SWAPEXT_INODES \
1446 { 0, "target" }, \
1447 { 1, "temp" }
1448
1449#define XFS_INODE_FORMAT_STR \
1450 { 0, "invalid" }, \
1451 { 1, "local" }, \
1452 { 2, "extent" }, \
1453 { 3, "btree" }
1454
1455DECLARE_EVENT_CLASS(xfs_swap_extent_class,
1456 TP_PROTO(struct xfs_inode *ip, int which),
1457 TP_ARGS(ip, which),
1458 TP_STRUCT__entry(
1459 __field(dev_t, dev)
1460 __field(int, which)
1461 __field(xfs_ino_t, ino)
1462 __field(int, format)
1463 __field(int, nex)
1464 __field(int, max_nex)
1465 __field(int, broot_size)
1466 __field(int, fork_off)
1467 ),
1468 TP_fast_assign(
1469 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1470 __entry->which = which;
1471 __entry->ino = ip->i_ino;
1472 __entry->format = ip->i_d.di_format;
1473 __entry->nex = ip->i_d.di_nextents;
1474 __entry->max_nex = ip->i_df.if_ext_max;
1475 __entry->broot_size = ip->i_df.if_broot_bytes;
1476 __entry->fork_off = XFS_IFORK_BOFF(ip);
1477 ),
1478 TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
1479 "Max in-fork extents %d, broot size %d, fork offset %d",
1480 MAJOR(__entry->dev), MINOR(__entry->dev),
1481 __entry->ino,
1482 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
1483 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
1484 __entry->nex,
1485 __entry->max_nex,
1486 __entry->broot_size,
1487 __entry->fork_off)
1488)
1489
1490#define DEFINE_SWAPEXT_EVENT(name) \
1491DEFINE_EVENT(xfs_swap_extent_class, name, \
1492 TP_PROTO(struct xfs_inode *ip, int which), \
1493 TP_ARGS(ip, which))
1494
1495DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
1496DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
1497
1498#endif /* _TRACE_XFS_H */
1499
1500#undef TRACE_INCLUDE_PATH
1501#define TRACE_INCLUDE_PATH .
1502#define TRACE_INCLUDE_FILE xfs_trace
1503#include <trace/define_trace.h>
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index ad7fbead4c97..7c220b4227bc 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -36,10 +36,13 @@ struct attrlist_cursor_kern;
36/* 36/*
37 * Flags for read/write calls - same values as IRIX 37 * Flags for read/write calls - same values as IRIX
38 */ 38 */
39#define IO_ISAIO 0x00001 /* don't wait for completion */
40#define IO_ISDIRECT 0x00004 /* bypass page cache */ 39#define IO_ISDIRECT 0x00004 /* bypass page cache */
41#define IO_INVIS 0x00020 /* don't update inode timestamps */ 40#define IO_INVIS 0x00020 /* don't update inode timestamps */
42 41
42#define XFS_IO_FLAGS \
43 { IO_ISDIRECT, "DIRECT" }, \
44 { IO_INVIS, "INVIS"}
45
43/* 46/*
44 * Flush/Invalidate options for vop_toss/flush/flushinval_pages. 47 * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
45 */ 48 */
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc1..fa01b9daba6b 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
30 30
31 31
32static int 32static int
33__xfs_xattr_get(struct inode *inode, const char *name, 33xfs_xattr_get(struct dentry *dentry, const char *name,
34 void *value, size_t size, int xflags) 34 void *value, size_t size, int xflags)
35{ 35{
36 struct xfs_inode *ip = XFS_I(inode); 36 struct xfs_inode *ip = XFS_I(dentry->d_inode);
37 int error, asize = size; 37 int error, asize = size;
38 38
39 if (strcmp(name, "") == 0) 39 if (strcmp(name, "") == 0)
@@ -45,17 +45,17 @@ __xfs_xattr_get(struct inode *inode, const char *name,
45 value = NULL; 45 value = NULL;
46 } 46 }
47 47
48 error = -xfs_attr_get(ip, name, value, &asize, xflags); 48 error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
49 if (error) 49 if (error)
50 return error; 50 return error;
51 return asize; 51 return asize;
52} 52}
53 53
54static int 54static int
55__xfs_xattr_set(struct inode *inode, const char *name, const void *value, 55xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
56 size_t size, int flags, int xflags) 56 size_t size, int flags, int xflags)
57{ 57{
58 struct xfs_inode *ip = XFS_I(inode); 58 struct xfs_inode *ip = XFS_I(dentry->d_inode);
59 59
60 if (strcmp(name, "") == 0) 60 if (strcmp(name, "") == 0)
61 return -EINVAL; 61 return -EINVAL;
@@ -67,79 +67,39 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
67 xflags |= ATTR_REPLACE; 67 xflags |= ATTR_REPLACE;
68 68
69 if (!value) 69 if (!value)
70 return -xfs_attr_remove(ip, name, xflags); 70 return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
71 return -xfs_attr_set(ip, name, (void *)value, size, xflags); 71 return -xfs_attr_set(ip, (unsigned char *)name,
72} 72 (void *)value, size, xflags);
73
74static int
75xfs_xattr_user_get(struct inode *inode, const char *name,
76 void *value, size_t size)
77{
78 return __xfs_xattr_get(inode, name, value, size, 0);
79}
80
81static int
82xfs_xattr_user_set(struct inode *inode, const char *name,
83 const void *value, size_t size, int flags)
84{
85 return __xfs_xattr_set(inode, name, value, size, flags, 0);
86} 73}
87 74
88static struct xattr_handler xfs_xattr_user_handler = { 75static struct xattr_handler xfs_xattr_user_handler = {
89 .prefix = XATTR_USER_PREFIX, 76 .prefix = XATTR_USER_PREFIX,
90 .get = xfs_xattr_user_get, 77 .flags = 0, /* no flags implies user namespace */
91 .set = xfs_xattr_user_set, 78 .get = xfs_xattr_get,
79 .set = xfs_xattr_set,
92}; 80};
93 81
94
95static int
96xfs_xattr_trusted_get(struct inode *inode, const char *name,
97 void *value, size_t size)
98{
99 return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
100}
101
102static int
103xfs_xattr_trusted_set(struct inode *inode, const char *name,
104 const void *value, size_t size, int flags)
105{
106 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
107}
108
109static struct xattr_handler xfs_xattr_trusted_handler = { 82static struct xattr_handler xfs_xattr_trusted_handler = {
110 .prefix = XATTR_TRUSTED_PREFIX, 83 .prefix = XATTR_TRUSTED_PREFIX,
111 .get = xfs_xattr_trusted_get, 84 .flags = ATTR_ROOT,
112 .set = xfs_xattr_trusted_set, 85 .get = xfs_xattr_get,
86 .set = xfs_xattr_set,
113}; 87};
114 88
115
116static int
117xfs_xattr_secure_get(struct inode *inode, const char *name,
118 void *value, size_t size)
119{
120 return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
121}
122
123static int
124xfs_xattr_secure_set(struct inode *inode, const char *name,
125 const void *value, size_t size, int flags)
126{
127 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
128}
129
130static struct xattr_handler xfs_xattr_security_handler = { 89static struct xattr_handler xfs_xattr_security_handler = {
131 .prefix = XATTR_SECURITY_PREFIX, 90 .prefix = XATTR_SECURITY_PREFIX,
132 .get = xfs_xattr_secure_get, 91 .flags = ATTR_SECURE,
133 .set = xfs_xattr_secure_set, 92 .get = xfs_xattr_get,
93 .set = xfs_xattr_set,
134}; 94};
135 95
136
137struct xattr_handler *xfs_xattr_handlers[] = { 96struct xattr_handler *xfs_xattr_handlers[] = {
138 &xfs_xattr_user_handler, 97 &xfs_xattr_user_handler,
139 &xfs_xattr_trusted_handler, 98 &xfs_xattr_trusted_handler,
140 &xfs_xattr_security_handler, 99 &xfs_xattr_security_handler,
141#ifdef CONFIG_XFS_POSIX_ACL 100#ifdef CONFIG_XFS_POSIX_ACL
142 &xfs_xattr_system_handler, 101 &xfs_xattr_acl_access_handler,
102 &xfs_xattr_acl_default_handler,
143#endif 103#endif
144 NULL 104 NULL
145}; 105};
@@ -165,8 +125,13 @@ static const char *xfs_xattr_prefix(int flags)
165} 125}
166 126
167static int 127static int
168xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, 128xfs_xattr_put_listent(
169 char *name, int namelen, int valuelen, char *value) 129 struct xfs_attr_list_context *context,
130 int flags,
131 unsigned char *name,
132 int namelen,
133 int valuelen,
134 unsigned char *value)
170{ 135{
171 unsigned int prefix_len = xfs_xattr_prefix_len(flags); 136 unsigned int prefix_len = xfs_xattr_prefix_len(flags);
172 char *offset; 137 char *offset;
@@ -189,7 +154,7 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
189 offset = (char *)context->alist + context->count; 154 offset = (char *)context->alist + context->count;
190 strncpy(offset, xfs_xattr_prefix(flags), prefix_len); 155 strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
191 offset += prefix_len; 156 offset += prefix_len;
192 strncpy(offset, name, namelen); /* real name */ 157 strncpy(offset, (char *)name, namelen); /* real name */
193 offset += namelen; 158 offset += namelen;
194 *offset = '\0'; 159 *offset = '\0';
195 context->count += prefix_len + namelen + 1; 160 context->count += prefix_len + namelen + 1;
@@ -197,8 +162,13 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
197} 162}
198 163
199static int 164static int
200xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags, 165xfs_xattr_put_listent_sizes(
201 char *name, int namelen, int valuelen, char *value) 166 struct xfs_attr_list_context *context,
167 int flags,
168 unsigned char *name,
169 int namelen,
170 int valuelen,
171 unsigned char *value)
202{ 172{
203 context->count += xfs_xattr_prefix_len(flags) + namelen + 1; 173 context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
204 return 0; 174 return 0;
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 2f3f2229eaaf..5f79dd78626b 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -47,6 +47,7 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_trans_priv.h" 48#include "xfs_trans_priv.h"
49#include "xfs_qm.h" 49#include "xfs_qm.h"
50#include "xfs_trace.h"
50 51
51 52
52/* 53/*
@@ -112,10 +113,7 @@ xfs_qm_dqinit(
112 init_completion(&dqp->q_flush); 113 init_completion(&dqp->q_flush);
113 complete(&dqp->q_flush); 114 complete(&dqp->q_flush);
114 115
115#ifdef XFS_DQUOT_TRACE 116 trace_xfs_dqinit(dqp);
116 dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS);
117 xfs_dqtrace_entry(dqp, "DQINIT");
118#endif
119 } else { 117 } else {
120 /* 118 /*
121 * Only the q_core portion was zeroed in dqreclaim_one(). 119 * Only the q_core portion was zeroed in dqreclaim_one().
@@ -136,10 +134,7 @@ xfs_qm_dqinit(
136 dqp->q_hash = NULL; 134 dqp->q_hash = NULL;
137 ASSERT(dqp->dq_flnext == dqp->dq_flprev); 135 ASSERT(dqp->dq_flnext == dqp->dq_flprev);
138 136
139#ifdef XFS_DQUOT_TRACE 137 trace_xfs_dqreuse(dqp);
140 ASSERT(dqp->q_trace);
141 xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT");
142#endif
143 } 138 }
144 139
145 /* 140 /*
@@ -167,13 +162,8 @@ xfs_qm_dqdestroy(
167 162
168 mutex_destroy(&dqp->q_qlock); 163 mutex_destroy(&dqp->q_qlock);
169 sv_destroy(&dqp->q_pinwait); 164 sv_destroy(&dqp->q_pinwait);
170
171#ifdef XFS_DQUOT_TRACE
172 if (dqp->q_trace)
173 ktrace_free(dqp->q_trace);
174 dqp->q_trace = NULL;
175#endif
176 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); 165 kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
166
177 atomic_dec(&xfs_Gqm->qm_totaldquots); 167 atomic_dec(&xfs_Gqm->qm_totaldquots);
178} 168}
179 169
@@ -195,49 +185,6 @@ xfs_qm_dqinit_core(
195 d->dd_diskdq.d_flags = type; 185 d->dd_diskdq.d_flags = type;
196} 186}
197 187
198
199#ifdef XFS_DQUOT_TRACE
200/*
201 * Dquot tracing for debugging.
202 */
203/* ARGSUSED */
204void
205__xfs_dqtrace_entry(
206 xfs_dquot_t *dqp,
207 char *func,
208 void *retaddr,
209 xfs_inode_t *ip)
210{
211 xfs_dquot_t *udqp = NULL;
212 xfs_ino_t ino = 0;
213
214 ASSERT(dqp->q_trace);
215 if (ip) {
216 ino = ip->i_ino;
217 udqp = ip->i_udquot;
218 }
219 ktrace_enter(dqp->q_trace,
220 (void *)(__psint_t)DQUOT_KTRACE_ENTRY,
221 (void *)func,
222 (void *)(__psint_t)dqp->q_nrefs,
223 (void *)(__psint_t)dqp->dq_flags,
224 (void *)(__psint_t)dqp->q_res_bcount,
225 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_bcount),
226 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_icount),
227 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_hardlimit),
228 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_softlimit),
229 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_hardlimit),
230 (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_softlimit),
231 (void *)(__psint_t)be32_to_cpu(dqp->q_core.d_id),
232 (void *)(__psint_t)current_pid(),
233 (void *)(__psint_t)ino,
234 (void *)(__psint_t)retaddr,
235 (void *)(__psint_t)udqp);
236 return;
237}
238#endif
239
240
241/* 188/*
242 * If default limits are in force, push them into the dquot now. 189 * If default limits are in force, push them into the dquot now.
243 * We overwrite the dquot limits only if they are zero and this 190 * We overwrite the dquot limits only if they are zero and this
@@ -425,7 +372,8 @@ xfs_qm_dqalloc(
425 xfs_trans_t *tp = *tpp; 372 xfs_trans_t *tp = *tpp;
426 373
427 ASSERT(tp != NULL); 374 ASSERT(tp != NULL);
428 xfs_dqtrace_entry(dqp, "DQALLOC"); 375
376 trace_xfs_dqalloc(dqp);
429 377
430 /* 378 /*
431 * Initialize the bmap freelist prior to calling bmapi code. 379 * Initialize the bmap freelist prior to calling bmapi code.
@@ -612,7 +560,8 @@ xfs_qm_dqtobp(
612 * (in which case we already have the buf). 560 * (in which case we already have the buf).
613 */ 561 */
614 if (! newdquot) { 562 if (! newdquot) {
615 xfs_dqtrace_entry(dqp, "DQTOBP READBUF"); 563 trace_xfs_dqtobp_read(dqp);
564
616 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 565 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
617 dqp->q_blkno, 566 dqp->q_blkno,
618 XFS_QI_DQCHUNKLEN(mp), 567 XFS_QI_DQCHUNKLEN(mp),
@@ -670,11 +619,12 @@ xfs_qm_dqread(
670 619
671 ASSERT(tpp); 620 ASSERT(tpp);
672 621
622 trace_xfs_dqread(dqp);
623
673 /* 624 /*
674 * get a pointer to the on-disk dquot and the buffer containing it 625 * get a pointer to the on-disk dquot and the buffer containing it
675 * dqp already knows its own type (GROUP/USER). 626 * dqp already knows its own type (GROUP/USER).
676 */ 627 */
677 xfs_dqtrace_entry(dqp, "DQREAD");
678 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { 628 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
679 return (error); 629 return (error);
680 } 630 }
@@ -763,7 +713,7 @@ xfs_qm_idtodq(
763 * or if the dquot didn't exist on disk and we ask to 713 * or if the dquot didn't exist on disk and we ask to
764 * allocate (ENOENT). 714 * allocate (ENOENT).
765 */ 715 */
766 xfs_dqtrace_entry(dqp, "DQREAD FAIL"); 716 trace_xfs_dqread_fail(dqp);
767 cancelflags |= XFS_TRANS_ABORT; 717 cancelflags |= XFS_TRANS_ABORT;
768 goto error0; 718 goto error0;
769 } 719 }
@@ -817,7 +767,8 @@ xfs_qm_dqlookup(
817 * id can't be modified without the hashlock anyway. 767 * id can't be modified without the hashlock anyway.
818 */ 768 */
819 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { 769 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
820 xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP"); 770 trace_xfs_dqlookup_found(dqp);
771
821 /* 772 /*
822 * All in core dquots must be on the dqlist of mp 773 * All in core dquots must be on the dqlist of mp
823 */ 774 */
@@ -827,7 +778,7 @@ xfs_qm_dqlookup(
827 if (dqp->q_nrefs == 0) { 778 if (dqp->q_nrefs == 0) {
828 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp)); 779 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
829 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 780 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
830 xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT"); 781 trace_xfs_dqlookup_want(dqp);
831 782
832 /* 783 /*
833 * We may have raced with dqreclaim_one() 784 * We may have raced with dqreclaim_one()
@@ -857,8 +808,7 @@ xfs_qm_dqlookup(
857 /* 808 /*
858 * take it off the freelist 809 * take it off the freelist
859 */ 810 */
860 xfs_dqtrace_entry(dqp, 811 trace_xfs_dqlookup_freelist(dqp);
861 "DQLOOKUP: TAKEOFF FL");
862 XQM_FREELIST_REMOVE(dqp); 812 XQM_FREELIST_REMOVE(dqp);
863 /* xfs_qm_freelist_print(&(xfs_Gqm-> 813 /* xfs_qm_freelist_print(&(xfs_Gqm->
864 qm_dqfreelist), 814 qm_dqfreelist),
@@ -878,8 +828,7 @@ xfs_qm_dqlookup(
878 */ 828 */
879 ASSERT(mutex_is_locked(&qh->qh_lock)); 829 ASSERT(mutex_is_locked(&qh->qh_lock));
880 if (dqp->HL_PREVP != &qh->qh_next) { 830 if (dqp->HL_PREVP != &qh->qh_next) {
881 xfs_dqtrace_entry(dqp, 831 trace_xfs_dqlookup_move(dqp);
882 "DQLOOKUP: HASH MOVETOFRONT");
883 if ((d = dqp->HL_NEXT)) 832 if ((d = dqp->HL_NEXT))
884 d->HL_PREVP = dqp->HL_PREVP; 833 d->HL_PREVP = dqp->HL_PREVP;
885 *(dqp->HL_PREVP) = d; 834 *(dqp->HL_PREVP) = d;
@@ -889,7 +838,7 @@ xfs_qm_dqlookup(
889 dqp->HL_PREVP = &qh->qh_next; 838 dqp->HL_PREVP = &qh->qh_next;
890 qh->qh_next = dqp; 839 qh->qh_next = dqp;
891 } 840 }
892 xfs_dqtrace_entry(dqp, "LOOKUP END"); 841 trace_xfs_dqlookup_done(dqp);
893 *O_dqpp = dqp; 842 *O_dqpp = dqp;
894 ASSERT(mutex_is_locked(&qh->qh_lock)); 843 ASSERT(mutex_is_locked(&qh->qh_lock));
895 return (0); 844 return (0);
@@ -971,7 +920,7 @@ xfs_qm_dqget(
971 ASSERT(*O_dqpp); 920 ASSERT(*O_dqpp);
972 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); 921 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
973 mutex_unlock(&h->qh_lock); 922 mutex_unlock(&h->qh_lock);
974 xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)"); 923 trace_xfs_dqget_hit(*O_dqpp);
975 return (0); /* success */ 924 return (0); /* success */
976 } 925 }
977 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); 926 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
@@ -1104,7 +1053,7 @@ xfs_qm_dqget(
1104 mutex_unlock(&h->qh_lock); 1053 mutex_unlock(&h->qh_lock);
1105 dqret: 1054 dqret:
1106 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1055 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
1107 xfs_dqtrace_entry(dqp, "DQGET DONE"); 1056 trace_xfs_dqget_miss(dqp);
1108 *O_dqpp = dqp; 1057 *O_dqpp = dqp;
1109 return (0); 1058 return (0);
1110} 1059}
@@ -1124,7 +1073,8 @@ xfs_qm_dqput(
1124 1073
1125 ASSERT(dqp->q_nrefs > 0); 1074 ASSERT(dqp->q_nrefs > 0);
1126 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1075 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1127 xfs_dqtrace_entry(dqp, "DQPUT"); 1076
1077 trace_xfs_dqput(dqp);
1128 1078
1129 if (dqp->q_nrefs != 1) { 1079 if (dqp->q_nrefs != 1) {
1130 dqp->q_nrefs--; 1080 dqp->q_nrefs--;
@@ -1137,7 +1087,7 @@ xfs_qm_dqput(
1137 * in the right order; but try to get it out-of-order first 1087 * in the right order; but try to get it out-of-order first
1138 */ 1088 */
1139 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 1089 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
1140 xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT"); 1090 trace_xfs_dqput_wait(dqp);
1141 xfs_dqunlock(dqp); 1091 xfs_dqunlock(dqp);
1142 xfs_qm_freelist_lock(xfs_Gqm); 1092 xfs_qm_freelist_lock(xfs_Gqm);
1143 xfs_dqlock(dqp); 1093 xfs_dqlock(dqp);
@@ -1148,7 +1098,8 @@ xfs_qm_dqput(
1148 1098
1149 /* We can't depend on nrefs being == 1 here */ 1099 /* We can't depend on nrefs being == 1 here */
1150 if (--dqp->q_nrefs == 0) { 1100 if (--dqp->q_nrefs == 0) {
1151 xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST"); 1101 trace_xfs_dqput_free(dqp);
1102
1152 /* 1103 /*
1153 * insert at end of the freelist. 1104 * insert at end of the freelist.
1154 */ 1105 */
@@ -1196,7 +1147,7 @@ xfs_qm_dqrele(
1196 if (!dqp) 1147 if (!dqp)
1197 return; 1148 return;
1198 1149
1199 xfs_dqtrace_entry(dqp, "DQRELE"); 1150 trace_xfs_dqrele(dqp);
1200 1151
1201 xfs_dqlock(dqp); 1152 xfs_dqlock(dqp);
1202 /* 1153 /*
@@ -1229,14 +1180,14 @@ xfs_qm_dqflush(
1229 1180
1230 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1181 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1231 ASSERT(!completion_done(&dqp->q_flush)); 1182 ASSERT(!completion_done(&dqp->q_flush));
1232 xfs_dqtrace_entry(dqp, "DQFLUSH"); 1183 trace_xfs_dqflush(dqp);
1233 1184
1234 /* 1185 /*
1235 * If not dirty, or it's pinned and we are not supposed to 1186 * If not dirty, or it's pinned and we are not supposed to
1236 * block, nada. 1187 * block, nada.
1237 */ 1188 */
1238 if (!XFS_DQ_IS_DIRTY(dqp) || 1189 if (!XFS_DQ_IS_DIRTY(dqp) ||
1239 (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) { 1190 (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
1240 xfs_dqfunlock(dqp); 1191 xfs_dqfunlock(dqp);
1241 return 0; 1192 return 0;
1242 } 1193 }
@@ -1259,7 +1210,6 @@ xfs_qm_dqflush(
1259 * the ondisk-dquot has already been allocated for. 1210 * the ondisk-dquot has already been allocated for.
1260 */ 1211 */
1261 if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { 1212 if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
1262 xfs_dqtrace_entry(dqp, "DQTOBP FAIL");
1263 ASSERT(error != ENOENT); 1213 ASSERT(error != ENOENT);
1264 /* 1214 /*
1265 * Quotas could have gotten turned off (ESRCH) 1215 * Quotas could have gotten turned off (ESRCH)
@@ -1297,22 +1247,21 @@ xfs_qm_dqflush(
1297 * get stuck waiting in the write for too long. 1247 * get stuck waiting in the write for too long.
1298 */ 1248 */
1299 if (XFS_BUF_ISPINNED(bp)) { 1249 if (XFS_BUF_ISPINNED(bp)) {
1300 xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE"); 1250 trace_xfs_dqflush_force(dqp);
1301 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 1251 xfs_log_force(mp, 0);
1302 } 1252 }
1303 1253
1304 if (flags & XFS_QMOPT_DELWRI) { 1254 if (flags & SYNC_WAIT)
1305 xfs_bdwrite(mp, bp);
1306 } else if (flags & XFS_QMOPT_ASYNC) {
1307 error = xfs_bawrite(mp, bp);
1308 } else {
1309 error = xfs_bwrite(mp, bp); 1255 error = xfs_bwrite(mp, bp);
1310 } 1256 else
1311 xfs_dqtrace_entry(dqp, "DQFLUSH END"); 1257 xfs_bdwrite(mp, bp);
1258
1259 trace_xfs_dqflush_done(dqp);
1260
1312 /* 1261 /*
1313 * dqp is still locked, but caller is free to unlock it now. 1262 * dqp is still locked, but caller is free to unlock it now.
1314 */ 1263 */
1315 return (error); 1264 return error;
1316 1265
1317} 1266}
1318 1267
@@ -1483,7 +1432,7 @@ xfs_qm_dqpurge(
1483 */ 1432 */
1484 if (XFS_DQ_IS_DIRTY(dqp)) { 1433 if (XFS_DQ_IS_DIRTY(dqp)) {
1485 int error; 1434 int error;
1486 xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); 1435
1487 /* dqflush unlocks dqflock */ 1436 /* dqflush unlocks dqflock */
1488 /* 1437 /*
1489 * Given that dqpurge is a very rare occurrence, it is OK 1438 * Given that dqpurge is a very rare occurrence, it is OK
@@ -1493,7 +1442,7 @@ xfs_qm_dqpurge(
1493 * We don't care about getting disk errors here. We need 1442 * We don't care about getting disk errors here. We need
1494 * to purge this dquot anyway, so we go ahead regardless. 1443 * to purge this dquot anyway, so we go ahead regardless.
1495 */ 1444 */
1496 error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); 1445 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
1497 if (error) 1446 if (error)
1498 xfs_fs_cmn_err(CE_WARN, mp, 1447 xfs_fs_cmn_err(CE_WARN, mp,
1499 "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1448 "xfs_qm_dqpurge: dquot %p flush failed", dqp);
@@ -1577,25 +1526,17 @@ xfs_qm_dqflock_pushbuf_wait(
1577 * the flush lock when the I/O completes. 1526 * the flush lock when the I/O completes.
1578 */ 1527 */
1579 bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, 1528 bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
1580 XFS_QI_DQCHUNKLEN(dqp->q_mount), 1529 XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK);
1581 XFS_INCORE_TRYLOCK); 1530 if (!bp)
1582 if (bp != NULL) { 1531 goto out_lock;
1583 if (XFS_BUF_ISDELAYWRITE(bp)) { 1532
1584 int error; 1533 if (XFS_BUF_ISDELAYWRITE(bp)) {
1585 if (XFS_BUF_ISPINNED(bp)) { 1534 if (XFS_BUF_ISPINNED(bp))
1586 xfs_log_force(dqp->q_mount, 1535 xfs_log_force(dqp->q_mount, 0);
1587 (xfs_lsn_t)0, 1536 xfs_buf_delwri_promote(bp);
1588 XFS_LOG_FORCE); 1537 wake_up_process(bp->b_target->bt_task);
1589 }
1590 error = xfs_bawrite(dqp->q_mount, bp);
1591 if (error)
1592 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
1593 "xfs_qm_dqflock_pushbuf_wait: "
1594 "pushbuf error %d on dqp %p, bp %p",
1595 error, dqp, bp);
1596 } else {
1597 xfs_buf_relse(bp);
1598 }
1599 } 1538 }
1539 xfs_buf_relse(bp);
1540out_lock:
1600 xfs_dqflock(dqp); 1541 xfs_dqflock(dqp);
1601} 1542}
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 6533ead9b889..a0f7da586d1b 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -85,9 +85,6 @@ typedef struct xfs_dquot {
85 struct completion q_flush; /* flush completion queue */ 85 struct completion q_flush; /* flush completion queue */
86 atomic_t q_pincount; /* dquot pin count */ 86 atomic_t q_pincount; /* dquot pin count */
87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ 87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
88#ifdef XFS_DQUOT_TRACE
89 struct ktrace *q_trace; /* trace header structure */
90#endif
91} xfs_dquot_t; 88} xfs_dquot_t;
92 89
93 90
@@ -98,7 +95,7 @@ typedef struct xfs_dquot {
98#define dq_flags q_lists.dqm_flags 95#define dq_flags q_lists.dqm_flags
99 96
100/* 97/*
101 * Lock hierachy for q_qlock: 98 * Lock hierarchy for q_qlock:
102 * XFS_QLOCK_NORMAL is the implicit default, 99 * XFS_QLOCK_NORMAL is the implicit default,
103 * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 100 * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
104 */ 101 */
@@ -144,24 +141,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
144 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ 141 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
145 (XFS_IS_OQUOTA_ON((d)->q_mount)))) 142 (XFS_IS_OQUOTA_ON((d)->q_mount))))
146 143
147#ifdef XFS_DQUOT_TRACE
148/*
149 * Dquot Tracing stuff.
150 */
151#define DQUOT_TRACE_SIZE 64
152#define DQUOT_KTRACE_ENTRY 1
153
154extern void __xfs_dqtrace_entry(xfs_dquot_t *dqp, char *func,
155 void *, xfs_inode_t *);
156#define xfs_dqtrace_entry_ino(a,b,ip) \
157 __xfs_dqtrace_entry((a), (b), (void*)__return_address, (ip))
158#define xfs_dqtrace_entry(a,b) \
159 __xfs_dqtrace_entry((a), (b), (void*)__return_address, NULL)
160#else
161#define xfs_dqtrace_entry(a,b)
162#define xfs_dqtrace_entry_ino(a,b,ip)
163#endif
164
165#ifdef QUOTADEBUG 144#ifdef QUOTADEBUG
166extern void xfs_qm_dqprint(xfs_dquot_t *); 145extern void xfs_qm_dqprint(xfs_dquot_t *);
167#else 146#else
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index d0d4a9a0bbd7..4e4ee9a57194 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -74,11 +74,11 @@ xfs_qm_dquot_logitem_format(
74 74
75 logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; 75 logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
76 logvec->i_len = sizeof(xfs_dq_logformat_t); 76 logvec->i_len = sizeof(xfs_dq_logformat_t);
77 XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_QFORMAT); 77 logvec->i_type = XLOG_REG_TYPE_QFORMAT;
78 logvec++; 78 logvec++;
79 logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; 79 logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
80 logvec->i_len = sizeof(xfs_disk_dquot_t); 80 logvec->i_len = sizeof(xfs_disk_dquot_t);
81 XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_DQUOT); 81 logvec->i_type = XLOG_REG_TYPE_DQUOT;
82 82
83 ASSERT(2 == logitem->qli_item.li_desc->lid_size); 83 ASSERT(2 == logitem->qli_item.li_desc->lid_size);
84 logitem->qli_format.qlf_size = 2; 84 logitem->qli_format.qlf_size = 2;
@@ -153,7 +153,7 @@ xfs_qm_dquot_logitem_push(
153 * lock without sleeping, then there must not have been 153 * lock without sleeping, then there must not have been
154 * anyone in the process of flushing the dquot. 154 * anyone in the process of flushing the dquot.
155 */ 155 */
156 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 156 error = xfs_qm_dqflush(dqp, 0);
157 if (error) 157 if (error)
158 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 158 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
159 "xfs_qm_dquot_logitem_push: push error %d on dqp %p", 159 "xfs_qm_dquot_logitem_push: push error %d on dqp %p",
@@ -190,7 +190,7 @@ xfs_qm_dqunpin_wait(
190 /* 190 /*
191 * Give the log a push so we don't wait here too long. 191 * Give the log a push so we don't wait here too long.
192 */ 192 */
193 xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); 193 xfs_log_force(dqp->q_mount, 0);
194 wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); 194 wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
195} 195}
196 196
@@ -212,68 +212,31 @@ xfs_qm_dquot_logitem_pushbuf(
212 xfs_dquot_t *dqp; 212 xfs_dquot_t *dqp;
213 xfs_mount_t *mp; 213 xfs_mount_t *mp;
214 xfs_buf_t *bp; 214 xfs_buf_t *bp;
215 uint dopush;
216 215
217 dqp = qip->qli_dquot; 216 dqp = qip->qli_dquot;
218 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 217 ASSERT(XFS_DQ_IS_LOCKED(dqp));
219 218
220 /* 219 /*
221 * The qli_pushbuf_flag keeps others from
222 * trying to duplicate our effort.
223 */
224 ASSERT(qip->qli_pushbuf_flag != 0);
225 ASSERT(qip->qli_push_owner == current_pid());
226
227 /*
228 * If flushlock isn't locked anymore, chances are that the 220 * If flushlock isn't locked anymore, chances are that the
229 * inode flush completed and the inode was taken off the AIL. 221 * inode flush completed and the inode was taken off the AIL.
230 * So, just get out. 222 * So, just get out.
231 */ 223 */
232 if (completion_done(&dqp->q_flush) || 224 if (completion_done(&dqp->q_flush) ||
233 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { 225 ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
234 qip->qli_pushbuf_flag = 0;
235 xfs_dqunlock(dqp); 226 xfs_dqunlock(dqp);
236 return; 227 return;
237 } 228 }
238 mp = dqp->q_mount; 229 mp = dqp->q_mount;
239 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, 230 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
240 XFS_QI_DQCHUNKLEN(mp), 231 XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK);
241 XFS_INCORE_TRYLOCK); 232 xfs_dqunlock(dqp);
242 if (bp != NULL) { 233 if (!bp)
243 if (XFS_BUF_ISDELAYWRITE(bp)) {
244 dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
245 !completion_done(&dqp->q_flush));
246 qip->qli_pushbuf_flag = 0;
247 xfs_dqunlock(dqp);
248
249 if (XFS_BUF_ISPINNED(bp)) {
250 xfs_log_force(mp, (xfs_lsn_t)0,
251 XFS_LOG_FORCE);
252 }
253 if (dopush) {
254 int error;
255#ifdef XFSRACEDEBUG
256 delay_for_intr();
257 delay(300);
258#endif
259 error = xfs_bawrite(mp, bp);
260 if (error)
261 xfs_fs_cmn_err(CE_WARN, mp,
262 "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p",
263 error, qip, bp);
264 } else {
265 xfs_buf_relse(bp);
266 }
267 } else {
268 qip->qli_pushbuf_flag = 0;
269 xfs_dqunlock(dqp);
270 xfs_buf_relse(bp);
271 }
272 return; 234 return;
273 } 235 if (XFS_BUF_ISDELAYWRITE(bp))
236 xfs_buf_delwri_promote(bp);
237 xfs_buf_relse(bp);
238 return;
274 239
275 qip->qli_pushbuf_flag = 0;
276 xfs_dqunlock(dqp);
277} 240}
278 241
279/* 242/*
@@ -291,50 +254,24 @@ xfs_qm_dquot_logitem_trylock(
291 xfs_dq_logitem_t *qip) 254 xfs_dq_logitem_t *qip)
292{ 255{
293 xfs_dquot_t *dqp; 256 xfs_dquot_t *dqp;
294 uint retval;
295 257
296 dqp = qip->qli_dquot; 258 dqp = qip->qli_dquot;
297 if (atomic_read(&dqp->q_pincount) > 0) 259 if (atomic_read(&dqp->q_pincount) > 0)
298 return (XFS_ITEM_PINNED); 260 return XFS_ITEM_PINNED;
299 261
300 if (! xfs_qm_dqlock_nowait(dqp)) 262 if (! xfs_qm_dqlock_nowait(dqp))
301 return (XFS_ITEM_LOCKED); 263 return XFS_ITEM_LOCKED;
302 264
303 retval = XFS_ITEM_SUCCESS;
304 if (!xfs_dqflock_nowait(dqp)) { 265 if (!xfs_dqflock_nowait(dqp)) {
305 /* 266 /*
306 * The dquot is already being flushed. It may have been 267 * dquot has already been flushed to the backing buffer,
307 * flushed delayed write, however, and we don't want to 268 * leave it locked, pushbuf routine will unlock it.
308 * get stuck waiting for that to complete. So, we want to check
309 * to see if we can lock the dquot's buffer without sleeping.
310 * If we can and it is marked for delayed write, then we
311 * hold it and send it out from the push routine. We don't
312 * want to do that now since we might sleep in the device
313 * strategy routine. We also don't want to grab the buffer lock
314 * here because we'd like not to call into the buffer cache
315 * while holding the AIL lock.
316 * Make sure to only return PUSHBUF if we set pushbuf_flag
317 * ourselves. If someone else is doing it then we don't
318 * want to go to the push routine and duplicate their efforts.
319 */ 269 */
320 if (qip->qli_pushbuf_flag == 0) { 270 return XFS_ITEM_PUSHBUF;
321 qip->qli_pushbuf_flag = 1;
322 ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
323#ifdef DEBUG
324 qip->qli_push_owner = current_pid();
325#endif
326 /*
327 * The dquot is left locked.
328 */
329 retval = XFS_ITEM_PUSHBUF;
330 } else {
331 retval = XFS_ITEM_FLUSHING;
332 xfs_dqunlock_nonotify(dqp);
333 }
334 } 271 }
335 272
336 ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); 273 ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
337 return (retval); 274 return XFS_ITEM_SUCCESS;
338} 275}
339 276
340 277
@@ -467,7 +404,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf,
467 404
468 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); 405 log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
469 log_vector->i_len = sizeof(xfs_qoff_logitem_t); 406 log_vector->i_len = sizeof(xfs_qoff_logitem_t);
470 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); 407 log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
471 qf->qql_format.qf_size = 1; 408 qf->qql_format.qf_size = 1;
472} 409}
473 410
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
index 5a632531f843..5acae2ada70b 100644
--- a/fs/xfs/quota/xfs_dquot_item.h
+++ b/fs/xfs/quota/xfs_dquot_item.h
@@ -27,10 +27,6 @@ typedef struct xfs_dq_logitem {
27 xfs_log_item_t qli_item; /* common portion */ 27 xfs_log_item_t qli_item; /* common portion */
28 struct xfs_dquot *qli_dquot; /* dquot ptr */ 28 struct xfs_dquot *qli_dquot; /* dquot ptr */
29 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ 29 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
30 unsigned short qli_pushbuf_flag; /* 1 bit used in push_ail */
31#ifdef DEBUG
32 uint64_t qli_push_owner;
33#endif
34 xfs_dq_logformat_t qli_format; /* logged structure */ 30 xfs_dq_logformat_t qli_format; /* logged structure */
35} xfs_dq_logitem_t; 31} xfs_dq_logitem_t;
36 32
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 45b1bfef7388..417e61e3d9dd 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -47,6 +47,7 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_qm.h" 49#include "xfs_qm.h"
50#include "xfs_trace.h"
50 51
51/* 52/*
52 * The global quota manager. There is only one of these for the entire 53 * The global quota manager. There is only one of these for the entire
@@ -117,9 +118,14 @@ xfs_Gqm_init(void)
117 */ 118 */
118 udqhash = kmem_zalloc_greedy(&hsize, 119 udqhash = kmem_zalloc_greedy(&hsize,
119 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), 120 XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
120 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t), 121 XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
121 KM_SLEEP | KM_MAYFAIL | KM_LARGE); 122 if (!udqhash)
122 gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE); 123 goto out;
124
125 gdqhash = kmem_zalloc_large(hsize);
126 if (!gdqhash)
127 goto out_free_udqhash;
128
123 hsize /= sizeof(xfs_dqhash_t); 129 hsize /= sizeof(xfs_dqhash_t);
124 ndquot = hsize << 8; 130 ndquot = hsize << 8;
125 131
@@ -169,6 +175,11 @@ xfs_Gqm_init(void)
169 mutex_init(&qcheck_lock); 175 mutex_init(&qcheck_lock);
170#endif 176#endif
171 return xqm; 177 return xqm;
178
179 out_free_udqhash:
180 kmem_free_large(udqhash);
181 out:
182 return NULL;
172} 183}
173 184
174/* 185/*
@@ -188,8 +199,8 @@ xfs_qm_destroy(
188 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 199 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
189 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); 200 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
190 } 201 }
191 kmem_free(xqm->qm_usr_dqhtable); 202 kmem_free_large(xqm->qm_usr_dqhtable);
192 kmem_free(xqm->qm_grp_dqhtable); 203 kmem_free_large(xqm->qm_grp_dqhtable);
193 xqm->qm_usr_dqhtable = NULL; 204 xqm->qm_usr_dqhtable = NULL;
194 xqm->qm_grp_dqhtable = NULL; 205 xqm->qm_grp_dqhtable = NULL;
195 xqm->qm_dqhashmask = 0; 206 xqm->qm_dqhashmask = 0;
@@ -218,8 +229,12 @@ xfs_qm_hold_quotafs_ref(
218 */ 229 */
219 mutex_lock(&xfs_Gqm_lock); 230 mutex_lock(&xfs_Gqm_lock);
220 231
221 if (xfs_Gqm == NULL) 232 if (!xfs_Gqm) {
222 xfs_Gqm = xfs_Gqm_init(); 233 xfs_Gqm = xfs_Gqm_init();
234 if (!xfs_Gqm)
235 return ENOMEM;
236 }
237
223 /* 238 /*
224 * We can keep a list of all filesystems with quotas mounted for 239 * We can keep a list of all filesystems with quotas mounted for
225 * debugging and statistical purposes, but ... 240 * debugging and statistical purposes, but ...
@@ -435,7 +450,7 @@ xfs_qm_unmount_quotas(
435STATIC int 450STATIC int
436xfs_qm_dqflush_all( 451xfs_qm_dqflush_all(
437 xfs_mount_t *mp, 452 xfs_mount_t *mp,
438 int flags) 453 int sync_mode)
439{ 454{
440 int recl; 455 int recl;
441 xfs_dquot_t *dqp; 456 xfs_dquot_t *dqp;
@@ -453,7 +468,7 @@ again:
453 xfs_dqunlock(dqp); 468 xfs_dqunlock(dqp);
454 continue; 469 continue;
455 } 470 }
456 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); 471
457 /* XXX a sentinel would be better */ 472 /* XXX a sentinel would be better */
458 recl = XFS_QI_MPLRECLAIMS(mp); 473 recl = XFS_QI_MPLRECLAIMS(mp);
459 if (!xfs_dqflock_nowait(dqp)) { 474 if (!xfs_dqflock_nowait(dqp)) {
@@ -471,7 +486,7 @@ again:
471 * across a disk write. 486 * across a disk write.
472 */ 487 */
473 xfs_qm_mplist_unlock(mp); 488 xfs_qm_mplist_unlock(mp);
474 error = xfs_qm_dqflush(dqp, flags); 489 error = xfs_qm_dqflush(dqp, sync_mode);
475 xfs_dqunlock(dqp); 490 xfs_dqunlock(dqp);
476 if (error) 491 if (error)
477 return error; 492 return error;
@@ -651,7 +666,7 @@ xfs_qm_dqattach_one(
651 */ 666 */
652 dqp = *IO_idqpp; 667 dqp = *IO_idqpp;
653 if (dqp) { 668 if (dqp) {
654 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip"); 669 trace_xfs_dqattach_found(dqp);
655 return 0; 670 return 0;
656 } 671 }
657 672
@@ -704,7 +719,7 @@ xfs_qm_dqattach_one(
704 if (error) 719 if (error)
705 return error; 720 return error;
706 721
707 xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget"); 722 trace_xfs_dqattach_get(dqp);
708 723
709 /* 724 /*
710 * dqget may have dropped and re-acquired the ilock, but it guarantees 725 * dqget may have dropped and re-acquired the ilock, but it guarantees
@@ -890,15 +905,15 @@ xfs_qm_dqdetach(
890 if (!(ip->i_udquot || ip->i_gdquot)) 905 if (!(ip->i_udquot || ip->i_gdquot))
891 return; 906 return;
892 907
908 trace_xfs_dquot_dqdetach(ip);
909
893 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); 910 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
894 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); 911 ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
895 if (ip->i_udquot) { 912 if (ip->i_udquot) {
896 xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
897 xfs_qm_dqrele(ip->i_udquot); 913 xfs_qm_dqrele(ip->i_udquot);
898 ip->i_udquot = NULL; 914 ip->i_udquot = NULL;
899 } 915 }
900 if (ip->i_gdquot) { 916 if (ip->i_gdquot) {
901 xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
902 xfs_qm_dqrele(ip->i_gdquot); 917 xfs_qm_dqrele(ip->i_gdquot);
903 ip->i_gdquot = NULL; 918 ip->i_gdquot = NULL;
904 } 919 }
@@ -911,13 +926,11 @@ xfs_qm_sync(
911{ 926{
912 int recl, restarts; 927 int recl, restarts;
913 xfs_dquot_t *dqp; 928 xfs_dquot_t *dqp;
914 uint flush_flags;
915 int error; 929 int error;
916 930
917 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 931 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
918 return 0; 932 return 0;
919 933
920 flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
921 restarts = 0; 934 restarts = 0;
922 935
923 again: 936 again:
@@ -977,8 +990,7 @@ xfs_qm_sync(
977 * across a disk write 990 * across a disk write
978 */ 991 */
979 xfs_qm_mplist_unlock(mp); 992 xfs_qm_mplist_unlock(mp);
980 xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH"); 993 error = xfs_qm_dqflush(dqp, flags);
981 error = xfs_qm_dqflush(dqp, flush_flags);
982 xfs_dqunlock(dqp); 994 xfs_dqunlock(dqp);
983 if (error && XFS_FORCED_SHUTDOWN(mp)) 995 if (error && XFS_FORCED_SHUTDOWN(mp))
984 return 0; /* Need to prevent umount failure */ 996 return 0; /* Need to prevent umount failure */
@@ -1350,7 +1362,8 @@ xfs_qm_reset_dqcounts(
1350 xfs_disk_dquot_t *ddq; 1362 xfs_disk_dquot_t *ddq;
1351 int j; 1363 int j;
1352 1364
1353 xfs_buftrace("RESET DQUOTS", bp); 1365 trace_xfs_reset_dqcounts(bp, _RET_IP_);
1366
1354 /* 1367 /*
1355 * Reset all counters and timers. They'll be 1368 * Reset all counters and timers. They'll be
1356 * started afresh by xfs_qm_quotacheck. 1369 * started afresh by xfs_qm_quotacheck.
@@ -1543,7 +1556,9 @@ xfs_qm_quotacheck_dqadjust(
1543 xfs_qcnt_t rtblks) 1556 xfs_qcnt_t rtblks)
1544{ 1557{
1545 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1558 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1546 xfs_dqtrace_entry(dqp, "QCHECK DQADJUST"); 1559
1560 trace_xfs_dqadjust(dqp);
1561
1547 /* 1562 /*
1548 * Adjust the inode count and the block count to reflect this inode's 1563 * Adjust the inode count and the block count to reflect this inode's
1549 * resource usage. 1564 * resource usage.
@@ -1779,7 +1794,7 @@ xfs_qm_quotacheck(
1779 * successfully. 1794 * successfully.
1780 */ 1795 */
1781 if (!error) 1796 if (!error)
1782 error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); 1797 error = xfs_qm_dqflush_all(mp, 0);
1783 1798
1784 /* 1799 /*
1785 * We can get this error if we couldn't do a dquot allocation inside 1800 * We can get this error if we couldn't do a dquot allocation inside
@@ -1994,12 +2009,14 @@ xfs_qm_shake_freelist(
1994 */ 2009 */
1995 if (XFS_DQ_IS_DIRTY(dqp)) { 2010 if (XFS_DQ_IS_DIRTY(dqp)) {
1996 int error; 2011 int error;
1997 xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); 2012
2013 trace_xfs_dqshake_dirty(dqp);
2014
1998 /* 2015 /*
1999 * We flush it delayed write, so don't bother 2016 * We flush it delayed write, so don't bother
2000 * releasing the mplock. 2017 * releasing the mplock.
2001 */ 2018 */
2002 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2019 error = xfs_qm_dqflush(dqp, 0);
2003 if (error) { 2020 if (error) {
2004 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2021 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2005 "xfs_qm_dqflush_all: dquot %p flush failed", dqp); 2022 "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
@@ -2038,7 +2055,9 @@ xfs_qm_shake_freelist(
2038 return nreclaimed; 2055 return nreclaimed;
2039 goto tryagain; 2056 goto tryagain;
2040 } 2057 }
2041 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING"); 2058
2059 trace_xfs_dqshake_unlink(dqp);
2060
2042#ifdef QUOTADEBUG 2061#ifdef QUOTADEBUG
2043 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n", 2062 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2044 dqp, be32_to_cpu(dqp->q_core.d_id)); 2063 dqp, be32_to_cpu(dqp->q_core.d_id));
@@ -2125,7 +2144,9 @@ xfs_qm_dqreclaim_one(void)
2125 */ 2144 */
2126 if (dqp->dq_flags & XFS_DQ_WANT) { 2145 if (dqp->dq_flags & XFS_DQ_WANT) {
2127 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); 2146 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2128 xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT"); 2147
2148 trace_xfs_dqreclaim_want(dqp);
2149
2129 xfs_dqunlock(dqp); 2150 xfs_dqunlock(dqp);
2130 xfs_qm_freelist_unlock(xfs_Gqm); 2151 xfs_qm_freelist_unlock(xfs_Gqm);
2131 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 2152 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
@@ -2171,12 +2192,14 @@ xfs_qm_dqreclaim_one(void)
2171 */ 2192 */
2172 if (XFS_DQ_IS_DIRTY(dqp)) { 2193 if (XFS_DQ_IS_DIRTY(dqp)) {
2173 int error; 2194 int error;
2174 xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); 2195
2196 trace_xfs_dqreclaim_dirty(dqp);
2197
2175 /* 2198 /*
2176 * We flush it delayed write, so don't bother 2199 * We flush it delayed write, so don't bother
2177 * releasing the freelist lock. 2200 * releasing the freelist lock.
2178 */ 2201 */
2179 error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); 2202 error = xfs_qm_dqflush(dqp, 0);
2180 if (error) { 2203 if (error) {
2181 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2204 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2182 "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 2205 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
@@ -2194,8 +2217,9 @@ xfs_qm_dqreclaim_one(void)
2194 if (!mutex_trylock(&dqp->q_hash->qh_lock)) 2217 if (!mutex_trylock(&dqp->q_hash->qh_lock))
2195 goto mplistunlock; 2218 goto mplistunlock;
2196 2219
2220 trace_xfs_dqreclaim_unlink(dqp);
2221
2197 ASSERT(dqp->q_nrefs == 0); 2222 ASSERT(dqp->q_nrefs == 0);
2198 xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2199 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); 2223 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2200 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp); 2224 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2201 XQM_FREELIST_REMOVE(dqp); 2225 XQM_FREELIST_REMOVE(dqp);
@@ -2430,7 +2454,7 @@ xfs_qm_vop_dqalloc(
2430 } 2454 }
2431 } 2455 }
2432 if (uq) 2456 if (uq)
2433 xfs_dqtrace_entry_ino(uq, "DQALLOC", ip); 2457 trace_xfs_dquot_dqalloc(ip);
2434 2458
2435 xfs_iunlock(ip, lockflags); 2459 xfs_iunlock(ip, lockflags);
2436 if (O_udqpp) 2460 if (O_udqpp)
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index a5346630dfae..97b410c12794 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -59,7 +59,7 @@ xfs_fill_statvfs_from_dquot(
59 be64_to_cpu(dp->d_blk_hardlimit); 59 be64_to_cpu(dp->d_blk_hardlimit);
60 if (limit && statp->f_blocks > limit) { 60 if (limit && statp->f_blocks > limit) {
61 statp->f_blocks = limit; 61 statp->f_blocks = limit;
62 statp->f_bfree = 62 statp->f_bfree = statp->f_bavail =
63 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? 63 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
64 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; 64 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
65 } 65 }
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d1a3b98a6e6..50bee07d6b0e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -49,6 +49,7 @@
49#include "xfs_buf_item.h" 49#include "xfs_buf_item.h"
50#include "xfs_utils.h" 50#include "xfs_utils.h"
51#include "xfs_qm.h" 51#include "xfs_qm.h"
52#include "xfs_trace.h"
52 53
53#ifdef DEBUG 54#ifdef DEBUG
54# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args) 55# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args)
@@ -496,7 +497,6 @@ xfs_qm_scall_setqlim(
496 ASSERT(error != ENOENT); 497 ASSERT(error != ENOENT);
497 return (error); 498 return (error);
498 } 499 }
499 xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET");
500 xfs_trans_dqjoin(tp, dqp); 500 xfs_trans_dqjoin(tp, dqp);
501 ddq = &dqp->q_core; 501 ddq = &dqp->q_core;
502 502
@@ -602,7 +602,6 @@ xfs_qm_scall_setqlim(
602 dqp->dq_flags |= XFS_DQ_DIRTY; 602 dqp->dq_flags |= XFS_DQ_DIRTY;
603 xfs_trans_log_dquot(tp, dqp); 603 xfs_trans_log_dquot(tp, dqp);
604 604
605 xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT");
606 error = xfs_trans_commit(tp, 0); 605 error = xfs_trans_commit(tp, 0);
607 xfs_qm_dqprint(dqp); 606 xfs_qm_dqprint(dqp);
608 xfs_qm_dqrele(dqp); 607 xfs_qm_dqrele(dqp);
@@ -630,7 +629,6 @@ xfs_qm_scall_getquota(
630 return (error); 629 return (error);
631 } 630 }
632 631
633 xfs_dqtrace_entry(dqp, "Q_GETQUOTA SUCCESS");
634 /* 632 /*
635 * If everything's NULL, this dquot doesn't quite exist as far as 633 * If everything's NULL, this dquot doesn't quite exist as far as
636 * our utility programs are concerned. 634 * our utility programs are concerned.
@@ -893,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
893 uint flags) 891 uint flags)
894{ 892{
895 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
896 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
895 XFS_ICI_NO_TAG, 0, NULL);
897} 896}
898 897
899/*------------------------------------------------------------------------*/ 898/*------------------------------------------------------------------------*/
@@ -1194,9 +1193,9 @@ xfs_qm_internalqcheck(
1194 if (! XFS_IS_QUOTA_ON(mp)) 1193 if (! XFS_IS_QUOTA_ON(mp))
1195 return XFS_ERROR(ESRCH); 1194 return XFS_ERROR(ESRCH);
1196 1195
1197 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1196 xfs_log_force(mp, XFS_LOG_SYNC);
1198 XFS_bflush(mp->m_ddev_targp); 1197 XFS_bflush(mp->m_ddev_targp);
1199 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1198 xfs_log_force(mp, XFS_LOG_SYNC);
1200 XFS_bflush(mp->m_ddev_targp); 1199 XFS_bflush(mp->m_ddev_targp);
1201 1200
1202 mutex_lock(&qcheck_lock); 1201 mutex_lock(&qcheck_lock);
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 97ac9640be98..c3ab75cb1d9a 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -589,12 +589,18 @@ xfs_trans_unreserve_and_mod_dquots(
589 } 589 }
590} 590}
591 591
592STATIC int 592STATIC void
593xfs_quota_error(uint flags) 593xfs_quota_warn(
594 struct xfs_mount *mp,
595 struct xfs_dquot *dqp,
596 int type)
594{ 597{
595 if (flags & XFS_QMOPT_ENOSPC) 598 /* no warnings for project quotas - we just return ENOSPC later */
596 return ENOSPC; 599 if (dqp->dq_flags & XFS_DQ_PROJ)
597 return EDQUOT; 600 return;
601 quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
602 be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
603 type);
598} 604}
599 605
600/* 606/*
@@ -612,7 +618,6 @@ xfs_trans_dqresv(
612 long ninos, 618 long ninos,
613 uint flags) 619 uint flags)
614{ 620{
615 int error;
616 xfs_qcnt_t hardlimit; 621 xfs_qcnt_t hardlimit;
617 xfs_qcnt_t softlimit; 622 xfs_qcnt_t softlimit;
618 time_t timer; 623 time_t timer;
@@ -649,7 +654,6 @@ xfs_trans_dqresv(
649 warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); 654 warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount);
650 resbcountp = &dqp->q_res_rtbcount; 655 resbcountp = &dqp->q_res_rtbcount;
651 } 656 }
652 error = 0;
653 657
654 if ((flags & XFS_QMOPT_FORCE_RES) == 0 && 658 if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
655 dqp->q_core.d_id && 659 dqp->q_core.d_id &&
@@ -667,18 +671,20 @@ xfs_trans_dqresv(
667 * nblks. 671 * nblks.
668 */ 672 */
669 if (hardlimit > 0ULL && 673 if (hardlimit > 0ULL &&
670 (hardlimit <= nblks + *resbcountp)) { 674 hardlimit <= nblks + *resbcountp) {
671 error = xfs_quota_error(flags); 675 xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
672 goto error_return; 676 goto error_return;
673 } 677 }
674
675 if (softlimit > 0ULL && 678 if (softlimit > 0ULL &&
676 (softlimit <= nblks + *resbcountp)) { 679 softlimit <= nblks + *resbcountp) {
677 if ((timer != 0 && get_seconds() > timer) || 680 if ((timer != 0 && get_seconds() > timer) ||
678 (warns != 0 && warns >= warnlimit)) { 681 (warns != 0 && warns >= warnlimit)) {
679 error = xfs_quota_error(flags); 682 xfs_quota_warn(mp, dqp,
683 QUOTA_NL_BSOFTLONGWARN);
680 goto error_return; 684 goto error_return;
681 } 685 }
686
687 xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
682 } 688 }
683 } 689 }
684 if (ninos > 0) { 690 if (ninos > 0) {
@@ -692,15 +698,19 @@ xfs_trans_dqresv(
692 softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); 698 softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
693 if (!softlimit) 699 if (!softlimit)
694 softlimit = q->qi_isoftlimit; 700 softlimit = q->qi_isoftlimit;
701
695 if (hardlimit > 0ULL && count >= hardlimit) { 702 if (hardlimit > 0ULL && count >= hardlimit) {
696 error = xfs_quota_error(flags); 703 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
697 goto error_return; 704 goto error_return;
698 } else if (softlimit > 0ULL && count >= softlimit) { 705 }
699 if ((timer != 0 && get_seconds() > timer) || 706 if (softlimit > 0ULL && count >= softlimit) {
707 if ((timer != 0 && get_seconds() > timer) ||
700 (warns != 0 && warns >= warnlimit)) { 708 (warns != 0 && warns >= warnlimit)) {
701 error = xfs_quota_error(flags); 709 xfs_quota_warn(mp, dqp,
710 QUOTA_NL_ISOFTLONGWARN);
702 goto error_return; 711 goto error_return;
703 } 712 }
713 xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
704 } 714 }
705 } 715 }
706 } 716 }
@@ -736,9 +746,14 @@ xfs_trans_dqresv(
736 ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); 746 ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
737 ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); 747 ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
738 748
749 xfs_dqunlock(dqp);
750 return 0;
751
739error_return: 752error_return:
740 xfs_dqunlock(dqp); 753 xfs_dqunlock(dqp);
741 return error; 754 if (flags & XFS_QMOPT_ENOSPC)
755 return ENOSPC;
756 return EDQUOT;
742} 757}
743 758
744 759
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 6f4fd37c67af..d2d20462fd4f 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -41,10 +41,6 @@ extern void assfail(char *expr, char *f, int l);
41# define STATIC static noinline 41# define STATIC static noinline
42#endif 42#endif
43 43
44#ifndef STATIC_INLINE
45# define STATIC_INLINE static inline
46#endif
47
48#else /* DEBUG */ 44#else /* DEBUG */
49 45
50#define ASSERT(expr) \ 46#define ASSERT(expr) \
@@ -54,19 +50,5 @@ extern void assfail(char *expr, char *f, int l);
54# define STATIC noinline 50# define STATIC noinline
55#endif 51#endif
56 52
57/*
58 * We stop inlining of inline functions in debug mode.
59 * Unfortunately, this means static inline in header files
60 * get multiple definitions, so they need to remain static.
61 * This then gives tonnes of warnings about unused but defined
62 * functions, so we need to add the unused attribute to prevent
63 * these spurious warnings.
64 */
65#ifndef STATIC_INLINE
66# define STATIC_INLINE static __attribute__ ((unused)) noinline
67#endif
68
69#endif /* DEBUG */ 53#endif /* DEBUG */
70
71
72#endif /* __XFS_SUPPORT_DEBUG_H__ */ 54#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
deleted file mode 100644
index 2d494c26717f..000000000000
--- a/fs/xfs/support/ktrace.c
+++ /dev/null
@@ -1,323 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <xfs.h>
19
20static kmem_zone_t *ktrace_hdr_zone;
21static kmem_zone_t *ktrace_ent_zone;
22static int ktrace_zentries;
23
24void __init
25ktrace_init(int zentries)
26{
27 ktrace_zentries = roundup_pow_of_two(zentries);
28
29 ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t),
30 "ktrace_hdr");
31 ASSERT(ktrace_hdr_zone);
32
33 ktrace_ent_zone = kmem_zone_init(ktrace_zentries
34 * sizeof(ktrace_entry_t),
35 "ktrace_ent");
36 ASSERT(ktrace_ent_zone);
37}
38
39void __exit
40ktrace_uninit(void)
41{
42 kmem_zone_destroy(ktrace_hdr_zone);
43 kmem_zone_destroy(ktrace_ent_zone);
44}
45
46/*
47 * ktrace_alloc()
48 *
49 * Allocate a ktrace header and enough buffering for the given
50 * number of entries. Round the number of entries up to a
51 * power of 2 so we can do fast masking to get the index from
52 * the atomic index counter.
53 */
54ktrace_t *
55ktrace_alloc(int nentries, unsigned int __nocast sleep)
56{
57 ktrace_t *ktp;
58 ktrace_entry_t *ktep;
59 int entries;
60
61 ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep);
62
63 if (ktp == (ktrace_t*)NULL) {
64 /*
65 * KM_SLEEP callers don't expect failure.
66 */
67 if (sleep & KM_SLEEP)
68 panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
69
70 return NULL;
71 }
72
73 /*
74 * Special treatment for buffers with the ktrace_zentries entries
75 */
76 entries = roundup_pow_of_two(nentries);
77 if (entries == ktrace_zentries) {
78 ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone,
79 sleep);
80 } else {
81 ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)),
82 sleep | KM_LARGE);
83 }
84
85 if (ktep == NULL) {
86 /*
87 * KM_SLEEP callers don't expect failure.
88 */
89 if (sleep & KM_SLEEP)
90 panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
91
92 kmem_free(ktp);
93
94 return NULL;
95 }
96
97 ktp->kt_entries = ktep;
98 ktp->kt_nentries = entries;
99 ASSERT(is_power_of_2(entries));
100 ktp->kt_index_mask = entries - 1;
101 atomic_set(&ktp->kt_index, 0);
102 ktp->kt_rollover = 0;
103 return ktp;
104}
105
106
107/*
108 * ktrace_free()
109 *
110 * Free up the ktrace header and buffer. It is up to the caller
111 * to ensure that no-one is referencing it.
112 */
113void
114ktrace_free(ktrace_t *ktp)
115{
116 if (ktp == (ktrace_t *)NULL)
117 return;
118
119 /*
120 * Special treatment for the Vnode trace buffer.
121 */
122 if (ktp->kt_nentries == ktrace_zentries)
123 kmem_zone_free(ktrace_ent_zone, ktp->kt_entries);
124 else
125 kmem_free(ktp->kt_entries);
126
127 kmem_zone_free(ktrace_hdr_zone, ktp);
128}
129
130
131/*
132 * Enter the given values into the "next" entry in the trace buffer.
133 * kt_index is always the index of the next entry to be filled.
134 */
135void
136ktrace_enter(
137 ktrace_t *ktp,
138 void *val0,
139 void *val1,
140 void *val2,
141 void *val3,
142 void *val4,
143 void *val5,
144 void *val6,
145 void *val7,
146 void *val8,
147 void *val9,
148 void *val10,
149 void *val11,
150 void *val12,
151 void *val13,
152 void *val14,
153 void *val15)
154{
155 int index;
156 ktrace_entry_t *ktep;
157
158 ASSERT(ktp != NULL);
159
160 /*
161 * Grab an entry by pushing the index up to the next one.
162 */
163 index = atomic_add_return(1, &ktp->kt_index);
164 index = (index - 1) & ktp->kt_index_mask;
165 if (!ktp->kt_rollover && index == ktp->kt_nentries - 1)
166 ktp->kt_rollover = 1;
167
168 ASSERT((index >= 0) && (index < ktp->kt_nentries));
169
170 ktep = &(ktp->kt_entries[index]);
171
172 ktep->val[0] = val0;
173 ktep->val[1] = val1;
174 ktep->val[2] = val2;
175 ktep->val[3] = val3;
176 ktep->val[4] = val4;
177 ktep->val[5] = val5;
178 ktep->val[6] = val6;
179 ktep->val[7] = val7;
180 ktep->val[8] = val8;
181 ktep->val[9] = val9;
182 ktep->val[10] = val10;
183 ktep->val[11] = val11;
184 ktep->val[12] = val12;
185 ktep->val[13] = val13;
186 ktep->val[14] = val14;
187 ktep->val[15] = val15;
188}
189
190/*
191 * Return the number of entries in the trace buffer.
192 */
193int
194ktrace_nentries(
195 ktrace_t *ktp)
196{
197 int index;
198 if (ktp == NULL)
199 return 0;
200
201 index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
202 return (ktp->kt_rollover ? ktp->kt_nentries : index);
203}
204
205/*
206 * ktrace_first()
207 *
208 * This is used to find the start of the trace buffer.
209 * In conjunction with ktrace_next() it can be used to
210 * iterate through the entire trace buffer. This code does
211 * not do any locking because it is assumed that it is called
212 * from the debugger.
213 *
214 * The caller must pass in a pointer to a ktrace_snap
215 * structure in which we will keep some state used to
216 * iterate through the buffer. This state must not touched
217 * by any code outside of this module.
218 */
219ktrace_entry_t *
220ktrace_first(ktrace_t *ktp, ktrace_snap_t *ktsp)
221{
222 ktrace_entry_t *ktep;
223 int index;
224 int nentries;
225
226 if (ktp->kt_rollover)
227 index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
228 else
229 index = 0;
230
231 ktsp->ks_start = index;
232 ktep = &(ktp->kt_entries[index]);
233
234 nentries = ktrace_nentries(ktp);
235 index++;
236 if (index < nentries) {
237 ktsp->ks_index = index;
238 } else {
239 ktsp->ks_index = 0;
240 if (index > nentries)
241 ktep = NULL;
242 }
243 return ktep;
244}
245
246/*
247 * ktrace_next()
248 *
249 * This is used to iterate through the entries of the given
250 * trace buffer. The caller must pass in the ktrace_snap_t
251 * structure initialized by ktrace_first(). The return value
252 * will be either a pointer to the next ktrace_entry or NULL
253 * if all of the entries have been traversed.
254 */
255ktrace_entry_t *
256ktrace_next(
257 ktrace_t *ktp,
258 ktrace_snap_t *ktsp)
259{
260 int index;
261 ktrace_entry_t *ktep;
262
263 index = ktsp->ks_index;
264 if (index == ktsp->ks_start) {
265 ktep = NULL;
266 } else {
267 ktep = &ktp->kt_entries[index];
268 }
269
270 index++;
271 if (index == ktrace_nentries(ktp)) {
272 ktsp->ks_index = 0;
273 } else {
274 ktsp->ks_index = index;
275 }
276
277 return ktep;
278}
279
280/*
281 * ktrace_skip()
282 *
283 * Skip the next "count" entries and return the entry after that.
284 * Return NULL if this causes us to iterate past the beginning again.
285 */
286ktrace_entry_t *
287ktrace_skip(
288 ktrace_t *ktp,
289 int count,
290 ktrace_snap_t *ktsp)
291{
292 int index;
293 int new_index;
294 ktrace_entry_t *ktep;
295 int nentries = ktrace_nentries(ktp);
296
297 index = ktsp->ks_index;
298 new_index = index + count;
299 while (new_index >= nentries) {
300 new_index -= nentries;
301 }
302 if (index == ktsp->ks_start) {
303 /*
304 * We've iterated around to the start, so we're done.
305 */
306 ktep = NULL;
307 } else if ((new_index < index) && (index < ktsp->ks_index)) {
308 /*
309 * We've skipped past the start again, so we're done.
310 */
311 ktep = NULL;
312 ktsp->ks_index = ktsp->ks_start;
313 } else {
314 ktep = &(ktp->kt_entries[new_index]);
315 new_index++;
316 if (new_index == nentries) {
317 ktsp->ks_index = 0;
318 } else {
319 ktsp->ks_index = new_index;
320 }
321 }
322 return ktep;
323}
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
deleted file mode 100644
index 741d6947ca60..000000000000
--- a/fs/xfs/support/ktrace.h
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_KTRACE_H__
19#define __XFS_SUPPORT_KTRACE_H__
20
21/*
22 * Trace buffer entry structure.
23 */
24typedef struct ktrace_entry {
25 void *val[16];
26} ktrace_entry_t;
27
28/*
29 * Trace buffer header structure.
30 */
31typedef struct ktrace {
32 int kt_nentries; /* number of entries in trace buf */
33 atomic_t kt_index; /* current index in entries */
34 unsigned int kt_index_mask;
35 int kt_rollover;
36 ktrace_entry_t *kt_entries; /* buffer of entries */
37} ktrace_t;
38
39/*
40 * Trace buffer snapshot structure.
41 */
42typedef struct ktrace_snap {
43 int ks_start; /* kt_index at time of snap */
44 int ks_index; /* current index */
45} ktrace_snap_t;
46
47
48#ifdef CONFIG_XFS_TRACE
49
50extern void ktrace_init(int zentries);
51extern void ktrace_uninit(void);
52
53extern ktrace_t *ktrace_alloc(int, unsigned int __nocast);
54extern void ktrace_free(ktrace_t *);
55
56extern void ktrace_enter(
57 ktrace_t *,
58 void *,
59 void *,
60 void *,
61 void *,
62 void *,
63 void *,
64 void *,
65 void *,
66 void *,
67 void *,
68 void *,
69 void *,
70 void *,
71 void *,
72 void *,
73 void *);
74
75extern ktrace_entry_t *ktrace_first(ktrace_t *, ktrace_snap_t *);
76extern int ktrace_nentries(ktrace_t *);
77extern ktrace_entry_t *ktrace_next(ktrace_t *, ktrace_snap_t *);
78extern ktrace_entry_t *ktrace_skip(ktrace_t *, int, ktrace_snap_t *);
79
80#else
81#define ktrace_init(x) do { } while (0)
82#define ktrace_uninit() do { } while (0)
83#endif /* CONFIG_XFS_TRACE */
84
85#endif /* __XFS_SUPPORT_KTRACE_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 17254b529c54..5ad8ad3a1dcd 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -25,21 +25,5 @@
25/* #define QUOTADEBUG 1 */ 25/* #define QUOTADEBUG 1 */
26#endif 26#endif
27 27
28#ifdef CONFIG_XFS_TRACE
29#define XFS_ALLOC_TRACE 1
30#define XFS_ATTR_TRACE 1
31#define XFS_BLI_TRACE 1
32#define XFS_BMAP_TRACE 1
33#define XFS_BTREE_TRACE 1
34#define XFS_DIR2_TRACE 1
35#define XFS_DQUOT_TRACE 1
36#define XFS_ILOCK_TRACE 1
37#define XFS_LOG_TRACE 1
38#define XFS_RW_TRACE 1
39#define XFS_BUF_TRACE 1
40#define XFS_INODE_TRACE 1
41#define XFS_FILESTREAMS_TRACE 1
42#endif
43
44#include <linux-2.6/xfs_linux.h> 28#include <linux-2.6/xfs_linux.h>
45#endif /* __XFS_H__ */ 29#endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 947b150df8ed..d13eeba2c8f8 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -36,8 +36,8 @@ struct xfs_acl {
36}; 36};
37 37
38/* On-disk XFS extended attribute names */ 38/* On-disk XFS extended attribute names */
39#define SGI_ACL_FILE "SGI_ACL_FILE" 39#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE"
40#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" 40#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT"
41#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) 41#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
43 43
@@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
49extern int posix_acl_access_exists(struct inode *inode); 49extern int posix_acl_access_exists(struct inode *inode);
50extern int posix_acl_default_exists(struct inode *inode); 50extern int posix_acl_default_exists(struct inode *inode);
51 51
52extern struct xattr_handler xfs_xattr_system_handler; 52extern struct xattr_handler xfs_xattr_acl_access_handler;
53extern struct xattr_handler xfs_xattr_acl_default_handler;
53#else 54#else
54# define xfs_check_acl NULL 55# define xfs_check_acl NULL
55# define xfs_get_acl(inode, type) NULL 56# define xfs_get_acl(inode, type) NULL
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index a5d54bf4931b..abb8222b88c9 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -86,6 +86,20 @@ typedef struct xfs_agf {
86#define XFS_AGF_NUM_BITS 12 86#define XFS_AGF_NUM_BITS 12
87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) 87#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1)
88 88
89#define XFS_AGF_FLAGS \
90 { XFS_AGF_MAGICNUM, "MAGICNUM" }, \
91 { XFS_AGF_VERSIONNUM, "VERSIONNUM" }, \
92 { XFS_AGF_SEQNO, "SEQNO" }, \
93 { XFS_AGF_LENGTH, "LENGTH" }, \
94 { XFS_AGF_ROOTS, "ROOTS" }, \
95 { XFS_AGF_LEVELS, "LEVELS" }, \
96 { XFS_AGF_FLFIRST, "FLFIRST" }, \
97 { XFS_AGF_FLLAST, "FLLAST" }, \
98 { XFS_AGF_FLCOUNT, "FLCOUNT" }, \
99 { XFS_AGF_FREEBLKS, "FREEBLKS" }, \
100 { XFS_AGF_LONGEST, "LONGEST" }, \
101 { XFS_AGF_BTREEBLKS, "BTREEBLKS" }
102
89/* disk block (xfs_daddr_t) in the AG */ 103/* disk block (xfs_daddr_t) in the AG */
90#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) 104#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
91#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) 105#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
@@ -173,17 +187,13 @@ typedef struct xfs_perag_busy {
173/* 187/*
174 * Per-ag incore structure, copies of information in agf and agi, 188 * Per-ag incore structure, copies of information in agf and agi,
175 * to improve the performance of allocation group selection. 189 * to improve the performance of allocation group selection.
176 *
177 * pick sizes which fit in allocation buckets well
178 */ 190 */
179#if (BITS_PER_LONG == 32)
180#define XFS_PAGB_NUM_SLOTS 84
181#elif (BITS_PER_LONG == 64)
182#define XFS_PAGB_NUM_SLOTS 128 191#define XFS_PAGB_NUM_SLOTS 128
183#endif
184 192
185typedef struct xfs_perag 193typedef struct xfs_perag {
186{ 194 struct xfs_mount *pag_mount; /* owner filesystem */
195 xfs_agnumber_t pag_agno; /* AG this structure belongs to */
196 atomic_t pag_ref; /* perag reference count */
187 char pagf_init; /* this agf's entry is initialized */ 197 char pagf_init; /* this agf's entry is initialized */
188 char pagi_init; /* this agi's entry is initialized */ 198 char pagi_init; /* this agi's entry is initialized */
189 char pagf_metadata; /* the agf is preferred to be metadata */ 199 char pagf_metadata; /* the agf is preferred to be metadata */
@@ -196,8 +206,6 @@ typedef struct xfs_perag
196 __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ 206 __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */
197 xfs_agino_t pagi_freecount; /* number of free inodes */ 207 xfs_agino_t pagi_freecount; /* number of free inodes */
198 xfs_agino_t pagi_count; /* number of allocated inodes */ 208 xfs_agino_t pagi_count; /* number of allocated inodes */
199 int pagb_count; /* pagb slots in use */
200 xfs_perag_busy_t *pagb_list; /* unstable blocks */
201 209
202 /* 210 /*
203 * Inode allocation search lookup optimisation. 211 * Inode allocation search lookup optimisation.
@@ -215,7 +223,10 @@ typedef struct xfs_perag
215 int pag_ici_init; /* incore inode cache initialised */ 223 int pag_ici_init; /* incore inode cache initialised */
216 rwlock_t pag_ici_lock; /* incore inode lock */ 224 rwlock_t pag_ici_lock; /* incore inode lock */
217 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 225 struct radix_tree_root pag_ici_root; /* incore inode cache root */
226 int pag_ici_reclaimable; /* reclaimable inodes */
218#endif 227#endif
228 int pagb_count; /* pagb slots in use */
229 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
219} xfs_perag_t; 230} xfs_perag_t;
220 231
221/* 232/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 2cf944eb796d..94cddbfb2560 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -38,6 +38,7 @@
38#include "xfs_ialloc.h" 38#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 39#include "xfs_alloc.h"
40#include "xfs_error.h" 40#include "xfs_error.h"
41#include "xfs_trace.h"
41 42
42 43
43#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 44#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
@@ -51,30 +52,6 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
51 xfs_agblock_t bno, 52 xfs_agblock_t bno,
52 xfs_extlen_t len); 53 xfs_extlen_t len);
53 54
54#if defined(XFS_ALLOC_TRACE)
55ktrace_t *xfs_alloc_trace_buf;
56
57#define TRACE_ALLOC(s,a) \
58 xfs_alloc_trace_alloc(__func__, s, a, __LINE__)
59#define TRACE_FREE(s,a,b,x,f) \
60 xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__)
61#define TRACE_MODAGF(s,a,f) \
62 xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__)
63#define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \
64 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
65#define TRACE_UNBUSY(__func__,s,ag,sl,tp) \
66 xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
67#define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \
68 xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
69#else
70#define TRACE_ALLOC(s,a)
71#define TRACE_FREE(s,a,b,x,f)
72#define TRACE_MODAGF(s,a,f)
73#define TRACE_BUSY(s,a,ag,agb,l,sl,tp)
74#define TRACE_UNBUSY(fname,s,ag,sl,tp)
75#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp)
76#endif /* XFS_ALLOC_TRACE */
77
78/* 55/*
79 * Prototypes for per-ag allocation routines 56 * Prototypes for per-ag allocation routines
80 */ 57 */
@@ -498,124 +475,6 @@ xfs_alloc_read_agfl(
498 return 0; 475 return 0;
499} 476}
500 477
501#if defined(XFS_ALLOC_TRACE)
502/*
503 * Add an allocation trace entry for an alloc call.
504 */
505STATIC void
506xfs_alloc_trace_alloc(
507 const char *name, /* function tag string */
508 char *str, /* additional string */
509 xfs_alloc_arg_t *args, /* allocation argument structure */
510 int line) /* source line number */
511{
512 ktrace_enter(xfs_alloc_trace_buf,
513 (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
514 (void *)name,
515 (void *)str,
516 (void *)args->mp,
517 (void *)(__psunsigned_t)args->agno,
518 (void *)(__psunsigned_t)args->agbno,
519 (void *)(__psunsigned_t)args->minlen,
520 (void *)(__psunsigned_t)args->maxlen,
521 (void *)(__psunsigned_t)args->mod,
522 (void *)(__psunsigned_t)args->prod,
523 (void *)(__psunsigned_t)args->minleft,
524 (void *)(__psunsigned_t)args->total,
525 (void *)(__psunsigned_t)args->alignment,
526 (void *)(__psunsigned_t)args->len,
527 (void *)((((__psint_t)args->type) << 16) |
528 (__psint_t)args->otype),
529 (void *)(__psint_t)((args->wasdel << 3) |
530 (args->wasfromfl << 2) |
531 (args->isfl << 1) |
532 (args->userdata << 0)));
533}
534
535/*
536 * Add an allocation trace entry for a free call.
537 */
538STATIC void
539xfs_alloc_trace_free(
540 const char *name, /* function tag string */
541 char *str, /* additional string */
542 xfs_mount_t *mp, /* file system mount point */
543 xfs_agnumber_t agno, /* allocation group number */
544 xfs_agblock_t agbno, /* a.g. relative block number */
545 xfs_extlen_t len, /* length of extent */
546 int isfl, /* set if is freelist allocation/free */
547 int line) /* source line number */
548{
549 ktrace_enter(xfs_alloc_trace_buf,
550 (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
551 (void *)name,
552 (void *)str,
553 (void *)mp,
554 (void *)(__psunsigned_t)agno,
555 (void *)(__psunsigned_t)agbno,
556 (void *)(__psunsigned_t)len,
557 (void *)(__psint_t)isfl,
558 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
559}
560
561/*
562 * Add an allocation trace entry for modifying an agf.
563 */
564STATIC void
565xfs_alloc_trace_modagf(
566 const char *name, /* function tag string */
567 char *str, /* additional string */
568 xfs_mount_t *mp, /* file system mount point */
569 xfs_agf_t *agf, /* new agf value */
570 int flags, /* logging flags for agf */
571 int line) /* source line number */
572{
573 ktrace_enter(xfs_alloc_trace_buf,
574 (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
575 (void *)name,
576 (void *)str,
577 (void *)mp,
578 (void *)(__psint_t)flags,
579 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno),
580 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_length),
581 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
582 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
583 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
584 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
585 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst),
586 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast),
587 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount),
588 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks),
589 (void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest));
590}
591
592STATIC void
593xfs_alloc_trace_busy(
594 const char *name, /* function tag string */
595 char *str, /* additional string */
596 xfs_mount_t *mp, /* file system mount point */
597 xfs_agnumber_t agno, /* allocation group number */
598 xfs_agblock_t agbno, /* a.g. relative block number */
599 xfs_extlen_t len, /* length of extent */
600 int slot, /* perag Busy slot */
601 xfs_trans_t *tp,
602 int trtype, /* type: add, delete, search */
603 int line) /* source line number */
604{
605 ktrace_enter(xfs_alloc_trace_buf,
606 (void *)(__psint_t)(trtype | (line << 16)),
607 (void *)name,
608 (void *)str,
609 (void *)mp,
610 (void *)(__psunsigned_t)agno,
611 (void *)(__psunsigned_t)agbno,
612 (void *)(__psunsigned_t)len,
613 (void *)(__psint_t)slot,
614 (void *)tp,
615 NULL, NULL, NULL, NULL, NULL, NULL, NULL);
616}
617#endif /* XFS_ALLOC_TRACE */
618
619/* 478/*
620 * Allocation group level functions. 479 * Allocation group level functions.
621 */ 480 */
@@ -665,9 +524,6 @@ xfs_alloc_ag_vextent(
665 */ 524 */
666 if (args->agbno != NULLAGBLOCK) { 525 if (args->agbno != NULLAGBLOCK) {
667 xfs_agf_t *agf; /* allocation group freelist header */ 526 xfs_agf_t *agf; /* allocation group freelist header */
668#ifdef XFS_ALLOC_TRACE
669 xfs_mount_t *mp = args->mp;
670#endif
671 long slen = (long)args->len; 527 long slen = (long)args->len;
672 528
673 ASSERT(args->len >= args->minlen && args->len <= args->maxlen); 529 ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
@@ -682,7 +538,6 @@ xfs_alloc_ag_vextent(
682 args->pag->pagf_freeblks -= args->len; 538 args->pag->pagf_freeblks -= args->len;
683 ASSERT(be32_to_cpu(agf->agf_freeblks) <= 539 ASSERT(be32_to_cpu(agf->agf_freeblks) <=
684 be32_to_cpu(agf->agf_length)); 540 be32_to_cpu(agf->agf_length));
685 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
686 xfs_alloc_log_agf(args->tp, args->agbp, 541 xfs_alloc_log_agf(args->tp, args->agbp,
687 XFS_AGF_FREEBLKS); 542 XFS_AGF_FREEBLKS);
688 /* search the busylist for these blocks */ 543 /* search the busylist for these blocks */
@@ -792,13 +647,14 @@ xfs_alloc_ag_vextent_exact(
792 } 647 }
793 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 648 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
794 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 649 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
795 TRACE_ALLOC("normal", args); 650
651 trace_xfs_alloc_exact_done(args);
796 args->wasfromfl = 0; 652 args->wasfromfl = 0;
797 return 0; 653 return 0;
798 654
799error0: 655error0:
800 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 656 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
801 TRACE_ALLOC("error", args); 657 trace_xfs_alloc_exact_error(args);
802 return error; 658 return error;
803} 659}
804 660
@@ -958,7 +814,7 @@ xfs_alloc_ag_vextent_near(
958 args->len = blen; 814 args->len = blen;
959 if (!xfs_alloc_fix_minleft(args)) { 815 if (!xfs_alloc_fix_minleft(args)) {
960 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 816 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
961 TRACE_ALLOC("nominleft", args); 817 trace_xfs_alloc_near_nominleft(args);
962 return 0; 818 return 0;
963 } 819 }
964 blen = args->len; 820 blen = args->len;
@@ -981,7 +837,8 @@ xfs_alloc_ag_vextent_near(
981 goto error0; 837 goto error0;
982 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 838 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
983 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 839 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
984 TRACE_ALLOC("first", args); 840
841 trace_xfs_alloc_near_first(args);
985 return 0; 842 return 0;
986 } 843 }
987 /* 844 /*
@@ -1272,7 +1129,7 @@ xfs_alloc_ag_vextent_near(
1272 * If we couldn't get anything, give up. 1129 * If we couldn't get anything, give up.
1273 */ 1130 */
1274 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1131 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1275 TRACE_ALLOC("neither", args); 1132 trace_xfs_alloc_size_neither(args);
1276 args->agbno = NULLAGBLOCK; 1133 args->agbno = NULLAGBLOCK;
1277 return 0; 1134 return 0;
1278 } 1135 }
@@ -1299,7 +1156,7 @@ xfs_alloc_ag_vextent_near(
1299 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1156 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1300 xfs_alloc_fix_len(args); 1157 xfs_alloc_fix_len(args);
1301 if (!xfs_alloc_fix_minleft(args)) { 1158 if (!xfs_alloc_fix_minleft(args)) {
1302 TRACE_ALLOC("nominleft", args); 1159 trace_xfs_alloc_near_nominleft(args);
1303 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1160 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1304 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1161 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1305 return 0; 1162 return 0;
@@ -1314,13 +1171,18 @@ xfs_alloc_ag_vextent_near(
1314 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1171 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1315 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1172 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1316 goto error0; 1173 goto error0;
1317 TRACE_ALLOC(j ? "gt" : "lt", args); 1174
1175 if (j)
1176 trace_xfs_alloc_near_greater(args);
1177 else
1178 trace_xfs_alloc_near_lesser(args);
1179
1318 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1180 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1319 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1181 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1320 return 0; 1182 return 0;
1321 1183
1322 error0: 1184 error0:
1323 TRACE_ALLOC("error", args); 1185 trace_xfs_alloc_near_error(args);
1324 if (cnt_cur != NULL) 1186 if (cnt_cur != NULL)
1325 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1187 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1326 if (bno_cur_lt != NULL) 1188 if (bno_cur_lt != NULL)
@@ -1371,7 +1233,7 @@ xfs_alloc_ag_vextent_size(
1371 goto error0; 1233 goto error0;
1372 if (i == 0 || flen == 0) { 1234 if (i == 0 || flen == 0) {
1373 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1235 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1374 TRACE_ALLOC("noentry", args); 1236 trace_xfs_alloc_size_noentry(args);
1375 return 0; 1237 return 0;
1376 } 1238 }
1377 ASSERT(i == 1); 1239 ASSERT(i == 1);
@@ -1448,7 +1310,7 @@ xfs_alloc_ag_vextent_size(
1448 xfs_alloc_fix_len(args); 1310 xfs_alloc_fix_len(args);
1449 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1311 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
1450 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1312 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1451 TRACE_ALLOC("nominleft", args); 1313 trace_xfs_alloc_size_nominleft(args);
1452 args->agbno = NULLAGBLOCK; 1314 args->agbno = NULLAGBLOCK;
1453 return 0; 1315 return 0;
1454 } 1316 }
@@ -1471,11 +1333,11 @@ xfs_alloc_ag_vextent_size(
1471 args->agbno + args->len <= 1333 args->agbno + args->len <=
1472 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1334 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1473 error0); 1335 error0);
1474 TRACE_ALLOC("normal", args); 1336 trace_xfs_alloc_size_done(args);
1475 return 0; 1337 return 0;
1476 1338
1477error0: 1339error0:
1478 TRACE_ALLOC("error", args); 1340 trace_xfs_alloc_size_error(args);
1479 if (cnt_cur) 1341 if (cnt_cur)
1480 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1342 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1481 if (bno_cur) 1343 if (bno_cur)
@@ -1534,7 +1396,7 @@ xfs_alloc_ag_vextent_small(
1534 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1396 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1535 error0); 1397 error0);
1536 args->wasfromfl = 1; 1398 args->wasfromfl = 1;
1537 TRACE_ALLOC("freelist", args); 1399 trace_xfs_alloc_small_freelist(args);
1538 *stat = 0; 1400 *stat = 0;
1539 return 0; 1401 return 0;
1540 } 1402 }
@@ -1556,17 +1418,17 @@ xfs_alloc_ag_vextent_small(
1556 */ 1418 */
1557 if (flen < args->minlen) { 1419 if (flen < args->minlen) {
1558 args->agbno = NULLAGBLOCK; 1420 args->agbno = NULLAGBLOCK;
1559 TRACE_ALLOC("notenough", args); 1421 trace_xfs_alloc_small_notenough(args);
1560 flen = 0; 1422 flen = 0;
1561 } 1423 }
1562 *fbnop = fbno; 1424 *fbnop = fbno;
1563 *flenp = flen; 1425 *flenp = flen;
1564 *stat = 1; 1426 *stat = 1;
1565 TRACE_ALLOC("normal", args); 1427 trace_xfs_alloc_small_done(args);
1566 return 0; 1428 return 0;
1567 1429
1568error0: 1430error0:
1569 TRACE_ALLOC("error", args); 1431 trace_xfs_alloc_small_error(args);
1570 return error; 1432 return error;
1571} 1433}
1572 1434
@@ -1800,26 +1662,25 @@ xfs_free_ag_extent(
1800 xfs_agf_t *agf; 1662 xfs_agf_t *agf;
1801 xfs_perag_t *pag; /* per allocation group data */ 1663 xfs_perag_t *pag; /* per allocation group data */
1802 1664
1665 pag = xfs_perag_get(mp, agno);
1666 pag->pagf_freeblks += len;
1667 xfs_perag_put(pag);
1668
1803 agf = XFS_BUF_TO_AGF(agbp); 1669 agf = XFS_BUF_TO_AGF(agbp);
1804 pag = &mp->m_perag[agno];
1805 be32_add_cpu(&agf->agf_freeblks, len); 1670 be32_add_cpu(&agf->agf_freeblks, len);
1806 xfs_trans_agblocks_delta(tp, len); 1671 xfs_trans_agblocks_delta(tp, len);
1807 pag->pagf_freeblks += len;
1808 XFS_WANT_CORRUPTED_GOTO( 1672 XFS_WANT_CORRUPTED_GOTO(
1809 be32_to_cpu(agf->agf_freeblks) <= 1673 be32_to_cpu(agf->agf_freeblks) <=
1810 be32_to_cpu(agf->agf_length), 1674 be32_to_cpu(agf->agf_length),
1811 error0); 1675 error0);
1812 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
1813 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 1676 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1814 if (!isfl) 1677 if (!isfl)
1815 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); 1678 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1816 XFS_STATS_INC(xs_freex); 1679 XFS_STATS_INC(xs_freex);
1817 XFS_STATS_ADD(xs_freeb, len); 1680 XFS_STATS_ADD(xs_freeb, len);
1818 } 1681 }
1819 TRACE_FREE(haveleft ? 1682
1820 (haveright ? "both" : "left") : 1683 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1821 (haveright ? "right" : "none"),
1822 agno, bno, len, isfl);
1823 1684
1824 /* 1685 /*
1825 * Since blocks move to the free list without the coordination 1686 * Since blocks move to the free list without the coordination
@@ -1836,7 +1697,7 @@ xfs_free_ag_extent(
1836 return 0; 1697 return 0;
1837 1698
1838 error0: 1699 error0:
1839 TRACE_FREE("error", agno, bno, len, isfl); 1700 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1840 if (bno_cur) 1701 if (bno_cur)
1841 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1702 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1842 if (cnt_cur) 1703 if (cnt_cur)
@@ -2110,10 +1971,12 @@ xfs_alloc_get_freelist(
2110 xfs_trans_brelse(tp, agflbp); 1971 xfs_trans_brelse(tp, agflbp);
2111 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 1972 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2112 agf->agf_flfirst = 0; 1973 agf->agf_flfirst = 0;
2113 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; 1974
1975 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2114 be32_add_cpu(&agf->agf_flcount, -1); 1976 be32_add_cpu(&agf->agf_flcount, -1);
2115 xfs_trans_agflist_delta(tp, -1); 1977 xfs_trans_agflist_delta(tp, -1);
2116 pag->pagf_flcount--; 1978 pag->pagf_flcount--;
1979 xfs_perag_put(pag);
2117 1980
2118 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 1981 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2119 if (btreeblk) { 1982 if (btreeblk) {
@@ -2122,7 +1985,6 @@ xfs_alloc_get_freelist(
2122 logflags |= XFS_AGF_BTREEBLKS; 1985 logflags |= XFS_AGF_BTREEBLKS;
2123 } 1986 }
2124 1987
2125 TRACE_MODAGF(NULL, agf, logflags);
2126 xfs_alloc_log_agf(tp, agbp, logflags); 1988 xfs_alloc_log_agf(tp, agbp, logflags);
2127 *bnop = bno; 1989 *bnop = bno;
2128 1990
@@ -2165,6 +2027,8 @@ xfs_alloc_log_agf(
2165 sizeof(xfs_agf_t) 2027 sizeof(xfs_agf_t)
2166 }; 2028 };
2167 2029
2030 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2031
2168 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2032 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2169 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2033 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2170} 2034}
@@ -2218,7 +2082,8 @@ xfs_alloc_put_freelist(
2218 be32_add_cpu(&agf->agf_fllast, 1); 2082 be32_add_cpu(&agf->agf_fllast, 1);
2219 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2083 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2220 agf->agf_fllast = 0; 2084 agf->agf_fllast = 0;
2221 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; 2085
2086 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2222 be32_add_cpu(&agf->agf_flcount, 1); 2087 be32_add_cpu(&agf->agf_flcount, 1);
2223 xfs_trans_agflist_delta(tp, 1); 2088 xfs_trans_agflist_delta(tp, 1);
2224 pag->pagf_flcount++; 2089 pag->pagf_flcount++;
@@ -2229,14 +2094,13 @@ xfs_alloc_put_freelist(
2229 pag->pagf_btreeblks--; 2094 pag->pagf_btreeblks--;
2230 logflags |= XFS_AGF_BTREEBLKS; 2095 logflags |= XFS_AGF_BTREEBLKS;
2231 } 2096 }
2097 xfs_perag_put(pag);
2232 2098
2233 TRACE_MODAGF(NULL, agf, logflags);
2234 xfs_alloc_log_agf(tp, agbp, logflags); 2099 xfs_alloc_log_agf(tp, agbp, logflags);
2235 2100
2236 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2101 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2237 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2102 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2238 *blockp = cpu_to_be32(bno); 2103 *blockp = cpu_to_be32(bno);
2239 TRACE_MODAGF(NULL, agf, logflags);
2240 xfs_alloc_log_agf(tp, agbp, logflags); 2104 xfs_alloc_log_agf(tp, agbp, logflags);
2241 xfs_trans_log_buf(tp, agflbp, 2105 xfs_trans_log_buf(tp, agflbp,
2242 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), 2106 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
@@ -2294,7 +2158,6 @@ xfs_read_agf(
2294 xfs_trans_brelse(tp, *bpp); 2158 xfs_trans_brelse(tp, *bpp);
2295 return XFS_ERROR(EFSCORRUPTED); 2159 return XFS_ERROR(EFSCORRUPTED);
2296 } 2160 }
2297
2298 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); 2161 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
2299 return 0; 2162 return 0;
2300} 2163}
@@ -2317,7 +2180,7 @@ xfs_alloc_read_agf(
2317 ASSERT(agno != NULLAGNUMBER); 2180 ASSERT(agno != NULLAGNUMBER);
2318 2181
2319 error = xfs_read_agf(mp, tp, agno, 2182 error = xfs_read_agf(mp, tp, agno,
2320 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0, 2183 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2321 bpp); 2184 bpp);
2322 if (error) 2185 if (error)
2323 return error; 2186 return error;
@@ -2326,7 +2189,7 @@ xfs_alloc_read_agf(
2326 ASSERT(!XFS_BUF_GETERROR(*bpp)); 2189 ASSERT(!XFS_BUF_GETERROR(*bpp));
2327 2190
2328 agf = XFS_BUF_TO_AGF(*bpp); 2191 agf = XFS_BUF_TO_AGF(*bpp);
2329 pag = &mp->m_perag[agno]; 2192 pag = xfs_perag_get(mp, agno);
2330 if (!pag->pagf_init) { 2193 if (!pag->pagf_init) {
2331 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2194 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2332 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2195 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
@@ -2337,8 +2200,8 @@ xfs_alloc_read_agf(
2337 pag->pagf_levels[XFS_BTNUM_CNTi] = 2200 pag->pagf_levels[XFS_BTNUM_CNTi] =
2338 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2201 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2339 spin_lock_init(&pag->pagb_lock); 2202 spin_lock_init(&pag->pagb_lock);
2340 pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * 2203 pag->pagb_count = 0;
2341 sizeof(xfs_perag_busy_t), KM_SLEEP); 2204 memset(pag->pagb_list, 0, sizeof(pag->pagb_list));
2342 pag->pagf_init = 1; 2205 pag->pagf_init = 1;
2343 } 2206 }
2344#ifdef DEBUG 2207#ifdef DEBUG
@@ -2353,6 +2216,7 @@ xfs_alloc_read_agf(
2353 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2216 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2354 } 2217 }
2355#endif 2218#endif
2219 xfs_perag_put(pag);
2356 return 0; 2220 return 0;
2357} 2221}
2358 2222
@@ -2399,7 +2263,7 @@ xfs_alloc_vextent(
2399 args->minlen > args->maxlen || args->minlen > agsize || 2263 args->minlen > args->maxlen || args->minlen > agsize ||
2400 args->mod >= args->prod) { 2264 args->mod >= args->prod) {
2401 args->fsbno = NULLFSBLOCK; 2265 args->fsbno = NULLFSBLOCK;
2402 TRACE_ALLOC("badargs", args); 2266 trace_xfs_alloc_vextent_badargs(args);
2403 return 0; 2267 return 0;
2404 } 2268 }
2405 minleft = args->minleft; 2269 minleft = args->minleft;
@@ -2412,24 +2276,21 @@ xfs_alloc_vextent(
2412 * These three force us into a single a.g. 2276 * These three force us into a single a.g.
2413 */ 2277 */
2414 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2278 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2415 down_read(&mp->m_peraglock); 2279 args->pag = xfs_perag_get(mp, args->agno);
2416 args->pag = &mp->m_perag[args->agno];
2417 args->minleft = 0; 2280 args->minleft = 0;
2418 error = xfs_alloc_fix_freelist(args, 0); 2281 error = xfs_alloc_fix_freelist(args, 0);
2419 args->minleft = minleft; 2282 args->minleft = minleft;
2420 if (error) { 2283 if (error) {
2421 TRACE_ALLOC("nofix", args); 2284 trace_xfs_alloc_vextent_nofix(args);
2422 goto error0; 2285 goto error0;
2423 } 2286 }
2424 if (!args->agbp) { 2287 if (!args->agbp) {
2425 up_read(&mp->m_peraglock); 2288 trace_xfs_alloc_vextent_noagbp(args);
2426 TRACE_ALLOC("noagbp", args);
2427 break; 2289 break;
2428 } 2290 }
2429 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2291 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2430 if ((error = xfs_alloc_ag_vextent(args))) 2292 if ((error = xfs_alloc_ag_vextent(args)))
2431 goto error0; 2293 goto error0;
2432 up_read(&mp->m_peraglock);
2433 break; 2294 break;
2434 case XFS_ALLOCTYPE_START_BNO: 2295 case XFS_ALLOCTYPE_START_BNO:
2435 /* 2296 /*
@@ -2481,14 +2342,13 @@ xfs_alloc_vextent(
2481 * Loop over allocation groups twice; first time with 2342 * Loop over allocation groups twice; first time with
2482 * trylock set, second time without. 2343 * trylock set, second time without.
2483 */ 2344 */
2484 down_read(&mp->m_peraglock);
2485 for (;;) { 2345 for (;;) {
2486 args->pag = &mp->m_perag[args->agno]; 2346 args->pag = xfs_perag_get(mp, args->agno);
2487 if (no_min) args->minleft = 0; 2347 if (no_min) args->minleft = 0;
2488 error = xfs_alloc_fix_freelist(args, flags); 2348 error = xfs_alloc_fix_freelist(args, flags);
2489 args->minleft = minleft; 2349 args->minleft = minleft;
2490 if (error) { 2350 if (error) {
2491 TRACE_ALLOC("nofix", args); 2351 trace_xfs_alloc_vextent_nofix(args);
2492 goto error0; 2352 goto error0;
2493 } 2353 }
2494 /* 2354 /*
@@ -2499,7 +2359,9 @@ xfs_alloc_vextent(
2499 goto error0; 2359 goto error0;
2500 break; 2360 break;
2501 } 2361 }
2502 TRACE_ALLOC("loopfailed", args); 2362
2363 trace_xfs_alloc_vextent_loopfailed(args);
2364
2503 /* 2365 /*
2504 * Didn't work, figure out the next iteration. 2366 * Didn't work, figure out the next iteration.
2505 */ 2367 */
@@ -2526,7 +2388,7 @@ xfs_alloc_vextent(
2526 if (args->agno == sagno) { 2388 if (args->agno == sagno) {
2527 if (no_min == 1) { 2389 if (no_min == 1) {
2528 args->agbno = NULLAGBLOCK; 2390 args->agbno = NULLAGBLOCK;
2529 TRACE_ALLOC("allfailed", args); 2391 trace_xfs_alloc_vextent_allfailed(args);
2530 break; 2392 break;
2531 } 2393 }
2532 if (flags == 0) { 2394 if (flags == 0) {
@@ -2540,8 +2402,8 @@ xfs_alloc_vextent(
2540 } 2402 }
2541 } 2403 }
2542 } 2404 }
2405 xfs_perag_put(args->pag);
2543 } 2406 }
2544 up_read(&mp->m_peraglock);
2545 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { 2407 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2546 if (args->agno == sagno) 2408 if (args->agno == sagno)
2547 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2409 mp->m_agfrotor = (mp->m_agfrotor + 1) %
@@ -2567,9 +2429,10 @@ xfs_alloc_vextent(
2567 args->len); 2429 args->len);
2568#endif 2430#endif
2569 } 2431 }
2432 xfs_perag_put(args->pag);
2570 return 0; 2433 return 0;
2571error0: 2434error0:
2572 up_read(&mp->m_peraglock); 2435 xfs_perag_put(args->pag);
2573 return error; 2436 return error;
2574} 2437}
2575 2438
@@ -2594,8 +2457,7 @@ xfs_free_extent(
2594 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2457 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2595 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2458 ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2596 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2459 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2597 down_read(&args.mp->m_peraglock); 2460 args.pag = xfs_perag_get(args.mp, args.agno);
2598 args.pag = &args.mp->m_perag[args.agno];
2599 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2461 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2600 goto error0; 2462 goto error0;
2601#ifdef DEBUG 2463#ifdef DEBUG
@@ -2605,7 +2467,7 @@ xfs_free_extent(
2605#endif 2467#endif
2606 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2468 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2607error0: 2469error0:
2608 up_read(&args.mp->m_peraglock); 2470 xfs_perag_put(args.pag);
2609 return error; 2471 return error;
2610} 2472}
2611 2473
@@ -2626,15 +2488,15 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2626 xfs_agblock_t bno, 2488 xfs_agblock_t bno,
2627 xfs_extlen_t len) 2489 xfs_extlen_t len)
2628{ 2490{
2629 xfs_mount_t *mp;
2630 xfs_perag_busy_t *bsy; 2491 xfs_perag_busy_t *bsy;
2492 struct xfs_perag *pag;
2631 int n; 2493 int n;
2632 2494
2633 mp = tp->t_mountp; 2495 pag = xfs_perag_get(tp->t_mountp, agno);
2634 spin_lock(&mp->m_perag[agno].pagb_lock); 2496 spin_lock(&pag->pagb_lock);
2635 2497
2636 /* search pagb_list for an open slot */ 2498 /* search pagb_list for an open slot */
2637 for (bsy = mp->m_perag[agno].pagb_list, n = 0; 2499 for (bsy = pag->pagb_list, n = 0;
2638 n < XFS_PAGB_NUM_SLOTS; 2500 n < XFS_PAGB_NUM_SLOTS;
2639 bsy++, n++) { 2501 bsy++, n++) {
2640 if (bsy->busy_tp == NULL) { 2502 if (bsy->busy_tp == NULL) {
@@ -2642,16 +2504,16 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2642 } 2504 }
2643 } 2505 }
2644 2506
2507 trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n);
2508
2645 if (n < XFS_PAGB_NUM_SLOTS) { 2509 if (n < XFS_PAGB_NUM_SLOTS) {
2646 bsy = &mp->m_perag[agno].pagb_list[n]; 2510 bsy = &pag->pagb_list[n];
2647 mp->m_perag[agno].pagb_count++; 2511 pag->pagb_count++;
2648 TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp);
2649 bsy->busy_start = bno; 2512 bsy->busy_start = bno;
2650 bsy->busy_length = len; 2513 bsy->busy_length = len;
2651 bsy->busy_tp = tp; 2514 bsy->busy_tp = tp;
2652 xfs_trans_add_busy(tp, agno, n); 2515 xfs_trans_add_busy(tp, agno, n);
2653 } else { 2516 } else {
2654 TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp);
2655 /* 2517 /*
2656 * The busy list is full! Since it is now not possible to 2518 * The busy list is full! Since it is now not possible to
2657 * track the free block, make this a synchronous transaction 2519 * track the free block, make this a synchronous transaction
@@ -2661,7 +2523,8 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
2661 xfs_trans_set_sync(tp); 2523 xfs_trans_set_sync(tp);
2662 } 2524 }
2663 2525
2664 spin_unlock(&mp->m_perag[agno].pagb_lock); 2526 spin_unlock(&pag->pagb_lock);
2527 xfs_perag_put(pag);
2665} 2528}
2666 2529
2667void 2530void
@@ -2669,24 +2532,23 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
2669 xfs_agnumber_t agno, 2532 xfs_agnumber_t agno,
2670 int idx) 2533 int idx)
2671{ 2534{
2672 xfs_mount_t *mp; 2535 struct xfs_perag *pag;
2673 xfs_perag_busy_t *list; 2536 xfs_perag_busy_t *list;
2674 2537
2675 mp = tp->t_mountp; 2538 ASSERT(idx < XFS_PAGB_NUM_SLOTS);
2539 pag = xfs_perag_get(tp->t_mountp, agno);
2540 spin_lock(&pag->pagb_lock);
2541 list = pag->pagb_list;
2676 2542
2677 spin_lock(&mp->m_perag[agno].pagb_lock); 2543 trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp);
2678 list = mp->m_perag[agno].pagb_list;
2679 2544
2680 ASSERT(idx < XFS_PAGB_NUM_SLOTS);
2681 if (list[idx].busy_tp == tp) { 2545 if (list[idx].busy_tp == tp) {
2682 TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp);
2683 list[idx].busy_tp = NULL; 2546 list[idx].busy_tp = NULL;
2684 mp->m_perag[agno].pagb_count--; 2547 pag->pagb_count--;
2685 } else {
2686 TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp);
2687 } 2548 }
2688 2549
2689 spin_unlock(&mp->m_perag[agno].pagb_lock); 2550 spin_unlock(&pag->pagb_lock);
2551 xfs_perag_put(pag);
2690} 2552}
2691 2553
2692 2554
@@ -2700,48 +2562,44 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2700 xfs_agblock_t bno, 2562 xfs_agblock_t bno,
2701 xfs_extlen_t len) 2563 xfs_extlen_t len)
2702{ 2564{
2703 xfs_mount_t *mp; 2565 struct xfs_perag *pag;
2704 xfs_perag_busy_t *bsy; 2566 xfs_perag_busy_t *bsy;
2705 xfs_agblock_t uend, bend; 2567 xfs_agblock_t uend, bend;
2706 xfs_lsn_t lsn; 2568 xfs_lsn_t lsn = 0;
2707 int cnt; 2569 int cnt;
2708 2570
2709 mp = tp->t_mountp; 2571 pag = xfs_perag_get(tp->t_mountp, agno);
2710 2572 spin_lock(&pag->pagb_lock);
2711 spin_lock(&mp->m_perag[agno].pagb_lock); 2573 cnt = pag->pagb_count;
2712 cnt = mp->m_perag[agno].pagb_count;
2713 2574
2575 /*
2576 * search pagb_list for this slot, skipping open slots. We have to
2577 * search the entire array as there may be multiple overlaps and
2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range.
2580 */
2714 uend = bno + len - 1; 2581 uend = bno + len - 1;
2715 2582 for (cnt = 0; cnt < pag->pagb_count; cnt++) {
2716 /* search pagb_list for this slot, skipping open slots */ 2583 bsy = &pag->pagb_list[cnt];
2717 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { 2584 if (!bsy->busy_tp)
2718 2585 continue;
2719 /* 2586
2720 * (start1,length1) within (start2, length2) 2587 bend = bsy->busy_start + bsy->busy_length - 1;
2721 */ 2588 if (bno > bend || uend < bsy->busy_start)
2722 if (bsy->busy_tp != NULL) { 2589 continue;
2723 bend = bsy->busy_start + bsy->busy_length - 1; 2590
2724 if ((bno > bend) || (uend < bsy->busy_start)) { 2591 /* (start1,length1) within (start2, length2) */
2725 cnt--; 2592 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2726 } else { 2593 lsn = bsy->busy_tp->t_commit_lsn;
2727 TRACE_BUSYSEARCH("xfs_alloc_search_busy",
2728 "found1", agno, bno, len, tp);
2729 break;
2730 }
2731 }
2732 } 2594 }
2595 spin_unlock(&pag->pagb_lock);
2596 xfs_perag_put(pag);
2597 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2733 2598
2734 /* 2599 /*
2735 * If a block was found, force the log through the LSN of the 2600 * If a block was found, force the log through the LSN of the
2736 * transaction that freed the block 2601 * transaction that freed the block
2737 */ 2602 */
2738 if (cnt) { 2603 if (lsn)
2739 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp); 2604 xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC);
2740 lsn = bsy->busy_tp->t_commit_lsn;
2741 spin_unlock(&mp->m_perag[agno].pagb_lock);
2742 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2743 } else {
2744 TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp);
2745 spin_unlock(&mp->m_perag[agno].pagb_lock);
2746 }
2747} 2605}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index e704caee10df..599bffa39784 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -37,6 +37,15 @@ typedef enum xfs_alloctype
37 XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */ 37 XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */
38} xfs_alloctype_t; 38} xfs_alloctype_t;
39 39
40#define XFS_ALLOC_TYPES \
41 { XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \
42 { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \
43 { XFS_ALLOCTYPE_START_AG, "START_AG" }, \
44 { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \
45 { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \
46 { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \
47 { XFS_ALLOCTYPE_THIS_BNO, "THIS_BNO" }
48
40/* 49/*
41 * Flags for xfs_alloc_fix_freelist. 50 * Flags for xfs_alloc_fix_freelist.
42 */ 51 */
@@ -109,24 +118,6 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp,
109 118
110#ifdef __KERNEL__ 119#ifdef __KERNEL__
111 120
112#if defined(XFS_ALLOC_TRACE)
113/*
114 * Allocation tracing buffer size.
115 */
116#define XFS_ALLOC_TRACE_SIZE 4096
117extern ktrace_t *xfs_alloc_trace_buf;
118
119/*
120 * Types for alloc tracing.
121 */
122#define XFS_ALLOC_KTRACE_ALLOC 1
123#define XFS_ALLOC_KTRACE_FREE 2
124#define XFS_ALLOC_KTRACE_MODAGF 3
125#define XFS_ALLOC_KTRACE_BUSY 4
126#define XFS_ALLOC_KTRACE_UNBUSY 5
127#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
128#endif
129
130void 121void
131xfs_alloc_mark_busy(xfs_trans_t *tp, 122xfs_alloc_mark_busy(xfs_trans_t *tp,
132 xfs_agnumber_t agno, 123 xfs_agnumber_t agno,
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index c10c3a292d30..b726e10d2c1c 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -39,6 +39,7 @@
39#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
40#include "xfs_alloc.h" 40#include "xfs_alloc.h"
41#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_trace.h"
42 43
43 44
44STATIC struct xfs_btree_cur * 45STATIC struct xfs_btree_cur *
@@ -60,12 +61,14 @@ xfs_allocbt_set_root(
60 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 61 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
61 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); 62 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
62 int btnum = cur->bc_btnum; 63 int btnum = cur->bc_btnum;
64 struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
63 65
64 ASSERT(ptr->s != 0); 66 ASSERT(ptr->s != 0);
65 67
66 agf->agf_roots[btnum] = ptr->s; 68 agf->agf_roots[btnum] = ptr->s;
67 be32_add_cpu(&agf->agf_levels[btnum], inc); 69 be32_add_cpu(&agf->agf_levels[btnum], inc);
68 cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc; 70 pag->pagf_levels[btnum] += inc;
71 xfs_perag_put(pag);
69 72
70 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 73 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
71} 74}
@@ -149,6 +152,7 @@ xfs_allocbt_update_lastrec(
149{ 152{
150 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); 153 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
151 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); 154 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
155 struct xfs_perag *pag;
152 __be32 len; 156 __be32 len;
153 int numrecs; 157 int numrecs;
154 158
@@ -192,7 +196,9 @@ xfs_allocbt_update_lastrec(
192 } 196 }
193 197
194 agf->agf_longest = len; 198 agf->agf_longest = len;
195 cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len); 199 pag = xfs_perag_get(cur->bc_mp, seqno);
200 pag->pagf_longest = be32_to_cpu(len);
201 xfs_perag_put(pag);
196 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); 202 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
197} 203}
198 204
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 4ece1906bd41..b9c196a53c42 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -47,6 +47,7 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_rw.h" 48#include "xfs_rw.h"
49#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
50#include "xfs_trace.h"
50 51
51/* 52/*
52 * xfs_attr.c 53 * xfs_attr.c
@@ -89,19 +90,15 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
89 90
90#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 91#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
91 92
92#if defined(XFS_ATTR_TRACE)
93ktrace_t *xfs_attr_trace_buf;
94#endif
95
96STATIC int 93STATIC int
97xfs_attr_name_to_xname( 94xfs_attr_name_to_xname(
98 struct xfs_name *xname, 95 struct xfs_name *xname,
99 const char *aname) 96 const unsigned char *aname)
100{ 97{
101 if (!aname) 98 if (!aname)
102 return EINVAL; 99 return EINVAL;
103 xname->name = aname; 100 xname->name = aname;
104 xname->len = strlen(aname); 101 xname->len = strlen((char *)aname);
105 if (xname->len >= MAXNAMELEN) 102 if (xname->len >= MAXNAMELEN)
106 return EFAULT; /* match IRIX behaviour */ 103 return EFAULT; /* match IRIX behaviour */
107 104
@@ -123,9 +120,13 @@ xfs_inode_hasattr(
123 * Overall external interface routines. 120 * Overall external interface routines.
124 *========================================================================*/ 121 *========================================================================*/
125 122
126int 123STATIC int
127xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name, 124xfs_attr_get_int(
128 char *value, int *valuelenp, int flags) 125 struct xfs_inode *ip,
126 struct xfs_name *name,
127 unsigned char *value,
128 int *valuelenp,
129 int flags)
129{ 130{
130 xfs_da_args_t args; 131 xfs_da_args_t args;
131 int error; 132 int error;
@@ -170,8 +171,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
170int 171int
171xfs_attr_get( 172xfs_attr_get(
172 xfs_inode_t *ip, 173 xfs_inode_t *ip,
173 const char *name, 174 const unsigned char *name,
174 char *value, 175 unsigned char *value,
175 int *valuelenp, 176 int *valuelenp,
176 int flags) 177 int flags)
177{ 178{
@@ -188,7 +189,7 @@ xfs_attr_get(
188 return error; 189 return error;
189 190
190 xfs_ilock(ip, XFS_ILOCK_SHARED); 191 xfs_ilock(ip, XFS_ILOCK_SHARED);
191 error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags); 192 error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags);
192 xfs_iunlock(ip, XFS_ILOCK_SHARED); 193 xfs_iunlock(ip, XFS_ILOCK_SHARED);
193 return(error); 194 return(error);
194} 195}
@@ -196,7 +197,7 @@ xfs_attr_get(
196/* 197/*
197 * Calculate how many blocks we need for the new attribute, 198 * Calculate how many blocks we need for the new attribute,
198 */ 199 */
199int 200STATIC int
200xfs_attr_calc_size( 201xfs_attr_calc_size(
201 struct xfs_inode *ip, 202 struct xfs_inode *ip,
202 int namelen, 203 int namelen,
@@ -234,8 +235,12 @@ xfs_attr_calc_size(
234} 235}
235 236
236STATIC int 237STATIC int
237xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, 238xfs_attr_set_int(
238 char *value, int valuelen, int flags) 239 struct xfs_inode *dp,
240 struct xfs_name *name,
241 unsigned char *value,
242 int valuelen,
243 int flags)
239{ 244{
240 xfs_da_args_t args; 245 xfs_da_args_t args;
241 xfs_fsblock_t firstblock; 246 xfs_fsblock_t firstblock;
@@ -451,8 +456,8 @@ out:
451int 456int
452xfs_attr_set( 457xfs_attr_set(
453 xfs_inode_t *dp, 458 xfs_inode_t *dp,
454 const char *name, 459 const unsigned char *name,
455 char *value, 460 unsigned char *value,
456 int valuelen, 461 int valuelen,
457 int flags) 462 int flags)
458{ 463{
@@ -599,7 +604,7 @@ out:
599int 604int
600xfs_attr_remove( 605xfs_attr_remove(
601 xfs_inode_t *dp, 606 xfs_inode_t *dp,
602 const char *name, 607 const unsigned char *name,
603 int flags) 608 int flags)
604{ 609{
605 int error; 610 int error;
@@ -636,7 +641,6 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
636 return EIO; 641 return EIO;
637 642
638 xfs_ilock(dp, XFS_ILOCK_SHARED); 643 xfs_ilock(dp, XFS_ILOCK_SHARED);
639 xfs_attr_trace_l_c("syscall start", context);
640 644
641 /* 645 /*
642 * Decide on what work routines to call based on the inode size. 646 * Decide on what work routines to call based on the inode size.
@@ -652,7 +656,6 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
652 } 656 }
653 657
654 xfs_iunlock(dp, XFS_ILOCK_SHARED); 658 xfs_iunlock(dp, XFS_ILOCK_SHARED);
655 xfs_attr_trace_l_c("syscall end", context);
656 659
657 return error; 660 return error;
658} 661}
@@ -670,9 +673,13 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
670 */ 673 */
671/*ARGSUSED*/ 674/*ARGSUSED*/
672STATIC int 675STATIC int
673xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags, 676xfs_attr_put_listent(
674 char *name, int namelen, 677 xfs_attr_list_context_t *context,
675 int valuelen, char *value) 678 int flags,
679 unsigned char *name,
680 int namelen,
681 int valuelen,
682 unsigned char *value)
676{ 683{
677 struct attrlist *alist = (struct attrlist *)context->alist; 684 struct attrlist *alist = (struct attrlist *)context->alist;
678 attrlist_ent_t *aep; 685 attrlist_ent_t *aep;
@@ -698,7 +705,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
698 context->count * sizeof(alist->al_offset[0]); 705 context->count * sizeof(alist->al_offset[0]);
699 context->firstu -= ATTR_ENTSIZE(namelen); 706 context->firstu -= ATTR_ENTSIZE(namelen);
700 if (context->firstu < arraytop) { 707 if (context->firstu < arraytop) {
701 xfs_attr_trace_l_c("buffer full", context); 708 trace_xfs_attr_list_full(context);
702 alist->al_more = 1; 709 alist->al_more = 1;
703 context->seen_enough = 1; 710 context->seen_enough = 1;
704 return 1; 711 return 1;
@@ -710,7 +717,7 @@ xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
710 aep->a_name[namelen] = 0; 717 aep->a_name[namelen] = 0;
711 alist->al_offset[context->count++] = context->firstu; 718 alist->al_offset[context->count++] = context->firstu;
712 alist->al_count = context->count; 719 alist->al_count = context->count;
713 xfs_attr_trace_l_c("add", context); 720 trace_xfs_attr_list_add(context);
714 return 0; 721 return 0;
715} 722}
716 723
@@ -1849,7 +1856,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1849 node = bp->data; 1856 node = bp->data;
1850 switch (be16_to_cpu(node->hdr.info.magic)) { 1857 switch (be16_to_cpu(node->hdr.info.magic)) {
1851 case XFS_DA_NODE_MAGIC: 1858 case XFS_DA_NODE_MAGIC:
1852 xfs_attr_trace_l_cn("wrong blk", context, node); 1859 trace_xfs_attr_list_wrong_blk(context);
1853 xfs_da_brelse(NULL, bp); 1860 xfs_da_brelse(NULL, bp);
1854 bp = NULL; 1861 bp = NULL;
1855 break; 1862 break;
@@ -1857,20 +1864,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1857 leaf = bp->data; 1864 leaf = bp->data;
1858 if (cursor->hashval > be32_to_cpu(leaf->entries[ 1865 if (cursor->hashval > be32_to_cpu(leaf->entries[
1859 be16_to_cpu(leaf->hdr.count)-1].hashval)) { 1866 be16_to_cpu(leaf->hdr.count)-1].hashval)) {
1860 xfs_attr_trace_l_cl("wrong blk", 1867 trace_xfs_attr_list_wrong_blk(context);
1861 context, leaf);
1862 xfs_da_brelse(NULL, bp); 1868 xfs_da_brelse(NULL, bp);
1863 bp = NULL; 1869 bp = NULL;
1864 } else if (cursor->hashval <= 1870 } else if (cursor->hashval <=
1865 be32_to_cpu(leaf->entries[0].hashval)) { 1871 be32_to_cpu(leaf->entries[0].hashval)) {
1866 xfs_attr_trace_l_cl("maybe wrong blk", 1872 trace_xfs_attr_list_wrong_blk(context);
1867 context, leaf);
1868 xfs_da_brelse(NULL, bp); 1873 xfs_da_brelse(NULL, bp);
1869 bp = NULL; 1874 bp = NULL;
1870 } 1875 }
1871 break; 1876 break;
1872 default: 1877 default:
1873 xfs_attr_trace_l_c("wrong blk - ??", context); 1878 trace_xfs_attr_list_wrong_blk(context);
1874 xfs_da_brelse(NULL, bp); 1879 xfs_da_brelse(NULL, bp);
1875 bp = NULL; 1880 bp = NULL;
1876 } 1881 }
@@ -1915,8 +1920,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1915 if (cursor->hashval 1920 if (cursor->hashval
1916 <= be32_to_cpu(btree->hashval)) { 1921 <= be32_to_cpu(btree->hashval)) {
1917 cursor->blkno = be32_to_cpu(btree->before); 1922 cursor->blkno = be32_to_cpu(btree->before);
1918 xfs_attr_trace_l_cb("descending", 1923 trace_xfs_attr_list_node_descend(context,
1919 context, btree); 1924 btree);
1920 break; 1925 break;
1921 } 1926 }
1922 } 1927 }
@@ -1983,7 +1988,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
1983 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE]; 1988 xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
1984 xfs_mount_t *mp; 1989 xfs_mount_t *mp;
1985 xfs_daddr_t dblkno; 1990 xfs_daddr_t dblkno;
1986 xfs_caddr_t dst; 1991 void *dst;
1987 xfs_buf_t *bp; 1992 xfs_buf_t *bp;
1988 int nmap, error, tmp, valuelen, blkcnt, i; 1993 int nmap, error, tmp, valuelen, blkcnt, i;
1989 xfs_dablk_t lblkno; 1994 xfs_dablk_t lblkno;
@@ -2010,15 +2015,14 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 2015 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 2016 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, 2017 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2013 blkcnt, 2018 blkcnt, XBF_LOCK | XBF_DONT_BLOCK,
2014 XFS_BUF_LOCK | XBF_DONT_BLOCK,
2015 &bp); 2019 &bp);
2016 if (error) 2020 if (error)
2017 return(error); 2021 return(error);
2018 2022
2019 tmp = (valuelen < XFS_BUF_SIZE(bp)) 2023 tmp = (valuelen < XFS_BUF_SIZE(bp))
2020 ? valuelen : XFS_BUF_SIZE(bp); 2024 ? valuelen : XFS_BUF_SIZE(bp);
2021 xfs_biomove(bp, 0, tmp, dst, XFS_B_READ); 2025 xfs_biomove(bp, 0, tmp, dst, XBF_READ);
2022 xfs_buf_relse(bp); 2026 xfs_buf_relse(bp);
2023 dst += tmp; 2027 dst += tmp;
2024 valuelen -= tmp; 2028 valuelen -= tmp;
@@ -2042,7 +2046,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2042 xfs_inode_t *dp; 2046 xfs_inode_t *dp;
2043 xfs_bmbt_irec_t map; 2047 xfs_bmbt_irec_t map;
2044 xfs_daddr_t dblkno; 2048 xfs_daddr_t dblkno;
2045 xfs_caddr_t src; 2049 void *src;
2046 xfs_buf_t *bp; 2050 xfs_buf_t *bp;
2047 xfs_dablk_t lblkno; 2051 xfs_dablk_t lblkno;
2048 int blkcnt, valuelen, nmap, error, tmp, committed; 2052 int blkcnt, valuelen, nmap, error, tmp, committed;
@@ -2143,14 +2147,14 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2143 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 2147 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2144 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2148 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2145 2149
2146 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt, 2150 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
2147 XFS_BUF_LOCK | XBF_DONT_BLOCK); 2151 XBF_LOCK | XBF_DONT_BLOCK);
2148 ASSERT(bp); 2152 ASSERT(bp);
2149 ASSERT(!XFS_BUF_GETERROR(bp)); 2153 ASSERT(!XFS_BUF_GETERROR(bp));
2150 2154
2151 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : 2155 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
2152 XFS_BUF_SIZE(bp); 2156 XFS_BUF_SIZE(bp);
2153 xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE); 2157 xfs_biomove(bp, 0, tmp, src, XBF_WRITE);
2154 if (tmp < XFS_BUF_SIZE(bp)) 2158 if (tmp < XFS_BUF_SIZE(bp))
2155 xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); 2159 xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
2156 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ 2160 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
@@ -2211,8 +2215,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2211 /* 2215 /*
2212 * If the "remote" value is in the cache, remove it. 2216 * If the "remote" value is in the cache, remove it.
2213 */ 2217 */
2214 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, 2218 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
2215 XFS_INCORE_TRYLOCK);
2216 if (bp) { 2219 if (bp) {
2217 XFS_BUF_STALE(bp); 2220 XFS_BUF_STALE(bp);
2218 XFS_BUF_UNDELAYWRITE(bp); 2221 XFS_BUF_UNDELAYWRITE(bp);
@@ -2266,85 +2269,3 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2266 } 2269 }
2267 return(0); 2270 return(0);
2268} 2271}
2269
2270#if defined(XFS_ATTR_TRACE)
2271/*
2272 * Add a trace buffer entry for an attr_list context structure.
2273 */
2274void
2275xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2276{
2277 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
2278 (__psunsigned_t)NULL,
2279 (__psunsigned_t)NULL,
2280 (__psunsigned_t)NULL);
2281}
2282
2283/*
2284 * Add a trace buffer entry for a context structure and a Btree node.
2285 */
2286void
2287xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2288 struct xfs_da_intnode *node)
2289{
2290 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
2291 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2292 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2293 (__psunsigned_t)be32_to_cpu(node->btree[
2294 be16_to_cpu(node->hdr.count)-1].hashval));
2295}
2296
2297/*
2298 * Add a trace buffer entry for a context structure and a Btree element.
2299 */
2300void
2301xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2302 struct xfs_da_node_entry *btree)
2303{
2304 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
2305 (__psunsigned_t)be32_to_cpu(btree->hashval),
2306 (__psunsigned_t)be32_to_cpu(btree->before),
2307 (__psunsigned_t)NULL);
2308}
2309
2310/*
2311 * Add a trace buffer entry for a context structure and a leaf block.
2312 */
2313void
2314xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2315 struct xfs_attr_leafblock *leaf)
2316{
2317 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
2318 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2319 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2320 (__psunsigned_t)be32_to_cpu(leaf->entries[
2321 be16_to_cpu(leaf->hdr.count)-1].hashval));
2322}
2323
2324/*
2325 * Add a trace buffer entry for the arguments given to the routine,
2326 * generic form.
2327 */
2328void
2329xfs_attr_trace_enter(int type, char *where,
2330 struct xfs_attr_list_context *context,
2331 __psunsigned_t a13, __psunsigned_t a14,
2332 __psunsigned_t a15)
2333{
2334 ASSERT(xfs_attr_trace_buf);
2335 ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2336 (void *)((__psunsigned_t)where),
2337 (void *)((__psunsigned_t)context->dp),
2338 (void *)((__psunsigned_t)context->cursor->hashval),
2339 (void *)((__psunsigned_t)context->cursor->blkno),
2340 (void *)((__psunsigned_t)context->cursor->offset),
2341 (void *)((__psunsigned_t)context->alist),
2342 (void *)((__psunsigned_t)context->bufsize),
2343 (void *)((__psunsigned_t)context->count),
2344 (void *)((__psunsigned_t)context->firstu),
2345 NULL,
2346 (void *)((__psunsigned_t)context->dupcnt),
2347 (void *)((__psunsigned_t)context->flags),
2348 (void *)a13, (void *)a14, (void *)a15);
2349}
2350#endif /* XFS_ATTR_TRACE */
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index fb3b2a68b9b9..e920d68ef509 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -48,6 +48,16 @@ struct xfs_attr_list_context;
48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ 48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ 49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
50 50
51#define XFS_ATTR_FLAGS \
52 { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \
53 { ATTR_ROOT, "ROOT" }, \
54 { ATTR_TRUST, "TRUST" }, \
55 { ATTR_SECURE, "SECURE" }, \
56 { ATTR_CREATE, "CREATE" }, \
57 { ATTR_REPLACE, "REPLACE" }, \
58 { ATTR_KERNOTIME, "KERNOTIME" }, \
59 { ATTR_KERNOVAL, "KERNOVAL" }
60
51/* 61/*
52 * The maximum size (into the kernel or returned from the kernel) of an 62 * The maximum size (into the kernel or returned from the kernel) of an
53 * attribute value or the buffer used for an attr_list() call. Larger 63 * attribute value or the buffer used for an attr_list() call. Larger
@@ -103,7 +113,7 @@ typedef struct attrlist_cursor_kern {
103 113
104 114
105typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, 115typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
106 char *, int, int, char *); 116 unsigned char *, int, int, unsigned char *);
107 117
108typedef struct xfs_attr_list_context { 118typedef struct xfs_attr_list_context {
109 struct xfs_inode *dp; /* inode */ 119 struct xfs_inode *dp; /* inode */
@@ -129,9 +139,7 @@ typedef struct xfs_attr_list_context {
129/* 139/*
130 * Overall external interface routines. 140 * Overall external interface routines.
131 */ 141 */
132int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);
133int xfs_attr_inactive(struct xfs_inode *dp); 142int xfs_attr_inactive(struct xfs_inode *dp);
134int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
135int xfs_attr_rmtval_get(struct xfs_da_args *args); 143int xfs_attr_rmtval_get(struct xfs_da_args *args);
136int xfs_attr_list_int(struct xfs_attr_list_context *); 144int xfs_attr_list_int(struct xfs_attr_list_context *);
137 145
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index afdc8911637d..a90ce74fc256 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -42,6 +42,7 @@
42#include "xfs_attr.h" 42#include "xfs_attr.h"
43#include "xfs_attr_leaf.h" 43#include "xfs_attr_leaf.h"
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_trace.h"
45 46
46/* 47/*
47 * xfs_attr_leaf.c 48 * xfs_attr_leaf.c
@@ -98,7 +99,7 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
98 * If namespace bits don't match return 0. 99 * If namespace bits don't match return 0.
99 * If all match then return 1. 100 * If all match then return 1.
100 */ 101 */
101STATIC_INLINE int 102STATIC int
102xfs_attr_namesp_match(int arg_flags, int ondisk_flags) 103xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
103{ 104{
104 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); 105 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
@@ -520,11 +521,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
520 521
521 sfe = &sf->list[0]; 522 sfe = &sf->list[0];
522 for (i = 0; i < sf->hdr.count; i++) { 523 for (i = 0; i < sf->hdr.count; i++) {
523 nargs.name = (char *)sfe->nameval; 524 nargs.name = sfe->nameval;
524 nargs.namelen = sfe->namelen; 525 nargs.namelen = sfe->namelen;
525 nargs.value = (char *)&sfe->nameval[nargs.namelen]; 526 nargs.value = &sfe->nameval[nargs.namelen];
526 nargs.valuelen = sfe->valuelen; 527 nargs.valuelen = sfe->valuelen;
527 nargs.hashval = xfs_da_hashname((char *)sfe->nameval, 528 nargs.hashval = xfs_da_hashname(sfe->nameval,
528 sfe->namelen); 529 sfe->namelen);
529 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); 530 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
530 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */ 531 error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */
@@ -594,7 +595,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
594 cursor = context->cursor; 595 cursor = context->cursor;
595 ASSERT(cursor != NULL); 596 ASSERT(cursor != NULL);
596 597
597 xfs_attr_trace_l_c("sf start", context); 598 trace_xfs_attr_list_sf(context);
598 599
599 /* 600 /*
600 * If the buffer is large enough and the cursor is at the start, 601 * If the buffer is large enough and the cursor is at the start,
@@ -611,10 +612,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
611 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { 612 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
612 error = context->put_listent(context, 613 error = context->put_listent(context,
613 sfe->flags, 614 sfe->flags,
614 (char *)sfe->nameval, 615 sfe->nameval,
615 (int)sfe->namelen, 616 (int)sfe->namelen,
616 (int)sfe->valuelen, 617 (int)sfe->valuelen,
617 (char*)&sfe->nameval[sfe->namelen]); 618 &sfe->nameval[sfe->namelen]);
618 619
619 /* 620 /*
620 * Either search callback finished early or 621 * Either search callback finished early or
@@ -627,7 +628,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
627 return error; 628 return error;
628 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 629 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
629 } 630 }
630 xfs_attr_trace_l_c("sf big-gulp", context); 631 trace_xfs_attr_list_sf_all(context);
631 return(0); 632 return(0);
632 } 633 }
633 634
@@ -653,14 +654,13 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
653 XFS_CORRUPTION_ERROR("xfs_attr_shortform_list", 654 XFS_CORRUPTION_ERROR("xfs_attr_shortform_list",
654 XFS_ERRLEVEL_LOW, 655 XFS_ERRLEVEL_LOW,
655 context->dp->i_mount, sfe); 656 context->dp->i_mount, sfe);
656 xfs_attr_trace_l_c("sf corrupted", context);
657 kmem_free(sbuf); 657 kmem_free(sbuf);
658 return XFS_ERROR(EFSCORRUPTED); 658 return XFS_ERROR(EFSCORRUPTED);
659 } 659 }
660 660
661 sbp->entno = i; 661 sbp->entno = i;
662 sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); 662 sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen);
663 sbp->name = (char *)sfe->nameval; 663 sbp->name = sfe->nameval;
664 sbp->namelen = sfe->namelen; 664 sbp->namelen = sfe->namelen;
665 /* These are bytes, and both on-disk, don't endian-flip */ 665 /* These are bytes, and both on-disk, don't endian-flip */
666 sbp->valuelen = sfe->valuelen; 666 sbp->valuelen = sfe->valuelen;
@@ -693,7 +693,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
693 } 693 }
694 if (i == nsbuf) { 694 if (i == nsbuf) {
695 kmem_free(sbuf); 695 kmem_free(sbuf);
696 xfs_attr_trace_l_c("blk end", context);
697 return(0); 696 return(0);
698 } 697 }
699 698
@@ -719,7 +718,6 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
719 } 718 }
720 719
721 kmem_free(sbuf); 720 kmem_free(sbuf);
722 xfs_attr_trace_l_c("sf E-O-F", context);
723 return(0); 721 return(0);
724} 722}
725 723
@@ -820,9 +818,9 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
820 continue; 818 continue;
821 ASSERT(entry->flags & XFS_ATTR_LOCAL); 819 ASSERT(entry->flags & XFS_ATTR_LOCAL);
822 name_loc = xfs_attr_leaf_name_local(leaf, i); 820 name_loc = xfs_attr_leaf_name_local(leaf, i);
823 nargs.name = (char *)name_loc->nameval; 821 nargs.name = name_loc->nameval;
824 nargs.namelen = name_loc->namelen; 822 nargs.namelen = name_loc->namelen;
825 nargs.value = (char *)&name_loc->nameval[nargs.namelen]; 823 nargs.value = &name_loc->nameval[nargs.namelen];
826 nargs.valuelen = be16_to_cpu(name_loc->valuelen); 824 nargs.valuelen = be16_to_cpu(name_loc->valuelen);
827 nargs.hashval = be32_to_cpu(entry->hashval); 825 nargs.hashval = be32_to_cpu(entry->hashval);
828 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); 826 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags);
@@ -2323,7 +2321,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2323 cursor = context->cursor; 2321 cursor = context->cursor;
2324 cursor->initted = 1; 2322 cursor->initted = 1;
2325 2323
2326 xfs_attr_trace_l_cl("blk start", context, leaf); 2324 trace_xfs_attr_list_leaf(context);
2327 2325
2328 /* 2326 /*
2329 * Re-find our place in the leaf block if this is a new syscall. 2327 * Re-find our place in the leaf block if this is a new syscall.
@@ -2344,7 +2342,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2344 } 2342 }
2345 } 2343 }
2346 if (i == be16_to_cpu(leaf->hdr.count)) { 2344 if (i == be16_to_cpu(leaf->hdr.count)) {
2347 xfs_attr_trace_l_c("not found", context); 2345 trace_xfs_attr_list_notfound(context);
2348 return(0); 2346 return(0);
2349 } 2347 }
2350 } else { 2348 } else {
@@ -2372,10 +2370,10 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2372 2370
2373 retval = context->put_listent(context, 2371 retval = context->put_listent(context,
2374 entry->flags, 2372 entry->flags,
2375 (char *)name_loc->nameval, 2373 name_loc->nameval,
2376 (int)name_loc->namelen, 2374 (int)name_loc->namelen,
2377 be16_to_cpu(name_loc->valuelen), 2375 be16_to_cpu(name_loc->valuelen),
2378 (char *)&name_loc->nameval[name_loc->namelen]); 2376 &name_loc->nameval[name_loc->namelen]);
2379 if (retval) 2377 if (retval)
2380 return retval; 2378 return retval;
2381 } else { 2379 } else {
@@ -2399,15 +2397,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2399 return retval; 2397 return retval;
2400 retval = context->put_listent(context, 2398 retval = context->put_listent(context,
2401 entry->flags, 2399 entry->flags,
2402 (char *)name_rmt->name, 2400 name_rmt->name,
2403 (int)name_rmt->namelen, 2401 (int)name_rmt->namelen,
2404 valuelen, 2402 valuelen,
2405 (char*)args.value); 2403 args.value);
2406 kmem_free(args.value); 2404 kmem_free(args.value);
2407 } else { 2405 } else {
2408 retval = context->put_listent(context, 2406 retval = context->put_listent(context,
2409 entry->flags, 2407 entry->flags,
2410 (char *)name_rmt->name, 2408 name_rmt->name,
2411 (int)name_rmt->namelen, 2409 (int)name_rmt->namelen,
2412 valuelen, 2410 valuelen,
2413 NULL); 2411 NULL);
@@ -2419,7 +2417,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2419 break; 2417 break;
2420 cursor->offset++; 2418 cursor->offset++;
2421 } 2419 }
2422 xfs_attr_trace_l_cl("blk end", context, leaf); 2420 trace_xfs_attr_list_leaf_end(context);
2423 return(retval); 2421 return(retval);
2424} 2422}
2425 2423
@@ -2952,7 +2950,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
2952 map.br_blockcount); 2950 map.br_blockcount);
2953 bp = xfs_trans_get_buf(*trans, 2951 bp = xfs_trans_get_buf(*trans,
2954 dp->i_mount->m_ddev_targp, 2952 dp->i_mount->m_ddev_targp,
2955 dblkno, dblkcnt, XFS_BUF_LOCK); 2953 dblkno, dblkcnt, XBF_LOCK);
2956 xfs_trans_binval(*trans, bp); 2954 xfs_trans_binval(*trans, bp);
2957 /* 2955 /*
2958 * Roll to next transaction. 2956 * Roll to next transaction.
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index ea22839caed2..919756e3ba53 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -25,8 +25,6 @@
25 * to fit into the literal area of the inode. 25 * to fit into the literal area of the inode.
26 */ 26 */
27 27
28struct xfs_inode;
29
30/* 28/*
31 * Entries are packed toward the top as tight as possible. 29 * Entries are packed toward the top as tight as possible.
32 */ 30 */
@@ -54,7 +52,7 @@ typedef struct xfs_attr_sf_sort {
54 __uint8_t valuelen; /* length of value */ 52 __uint8_t valuelen; /* length of value */
55 __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ 53 __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
56 xfs_dahash_t hash; /* this entry's hash value */ 54 xfs_dahash_t hash; /* this entry's hash value */
57 char *name; /* name value, pointer into buffer */ 55 unsigned char *name; /* name value, pointer into buffer */
58} xfs_attr_sf_sort_t; 56} xfs_attr_sf_sort_t;
59 57
60#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ 58#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \
@@ -69,42 +67,4 @@ typedef struct xfs_attr_sf_sort {
69 (be16_to_cpu(((xfs_attr_shortform_t *) \ 67 (be16_to_cpu(((xfs_attr_shortform_t *) \
70 ((dp)->i_afp->if_u1.if_data))->hdr.totsize)) 68 ((dp)->i_afp->if_u1.if_data))->hdr.totsize))
71 69
72#if defined(XFS_ATTR_TRACE)
73/*
74 * Kernel tracing support for attribute lists
75 */
76struct xfs_attr_list_context;
77struct xfs_da_intnode;
78struct xfs_da_node_entry;
79struct xfs_attr_leafblock;
80
81#define XFS_ATTR_TRACE_SIZE 4096 /* size of global trace buffer */
82extern ktrace_t *xfs_attr_trace_buf;
83
84/*
85 * Trace record types.
86 */
87#define XFS_ATTR_KTRACE_L_C 1 /* context */
88#define XFS_ATTR_KTRACE_L_CN 2 /* context, node */
89#define XFS_ATTR_KTRACE_L_CB 3 /* context, btree */
90#define XFS_ATTR_KTRACE_L_CL 4 /* context, leaf */
91
92void xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context);
93void xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
94 struct xfs_da_intnode *node);
95void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
96 struct xfs_da_node_entry *btree);
97void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
98 struct xfs_attr_leafblock *leaf);
99void xfs_attr_trace_enter(int type, char *where,
100 struct xfs_attr_list_context *context,
101 __psunsigned_t a13, __psunsigned_t a14,
102 __psunsigned_t a15);
103#else
104#define xfs_attr_trace_l_c(w,c)
105#define xfs_attr_trace_l_cn(w,c,n)
106#define xfs_attr_trace_l_cb(w,c,b)
107#define xfs_attr_trace_l_cl(w,c,l)
108#endif /* XFS_ATTR_TRACE */
109
110#endif /* __XFS_ATTR_SF_H__ */ 70#endif /* __XFS_ATTR_SF_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8971fb09d387..5c11e4d17010 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -54,6 +54,7 @@
54#include "xfs_buf_item.h" 54#include "xfs_buf_item.h"
55#include "xfs_filestream.h" 55#include "xfs_filestream.h"
56#include "xfs_vnodeops.h" 56#include "xfs_vnodeops.h"
57#include "xfs_trace.h"
57 58
58 59
59#ifdef DEBUG 60#ifdef DEBUG
@@ -272,71 +273,6 @@ xfs_bmap_isaeof(
272 int whichfork, /* data or attribute fork */ 273 int whichfork, /* data or attribute fork */
273 char *aeof); /* return value */ 274 char *aeof); /* return value */
274 275
275#ifdef XFS_BMAP_TRACE
276/*
277 * Add bmap trace entry prior to a call to xfs_iext_remove.
278 */
279STATIC void
280xfs_bmap_trace_delete(
281 const char *fname, /* function name */
282 char *desc, /* operation description */
283 xfs_inode_t *ip, /* incore inode pointer */
284 xfs_extnum_t idx, /* index of entry(entries) deleted */
285 xfs_extnum_t cnt, /* count of entries deleted, 1 or 2 */
286 int whichfork); /* data or attr fork */
287
288/*
289 * Add bmap trace entry prior to a call to xfs_iext_insert, or
290 * reading in the extents list from the disk (in the btree).
291 */
292STATIC void
293xfs_bmap_trace_insert(
294 const char *fname, /* function name */
295 char *desc, /* operation description */
296 xfs_inode_t *ip, /* incore inode pointer */
297 xfs_extnum_t idx, /* index of entry(entries) inserted */
298 xfs_extnum_t cnt, /* count of entries inserted, 1 or 2 */
299 xfs_bmbt_irec_t *r1, /* inserted record 1 */
300 xfs_bmbt_irec_t *r2, /* inserted record 2 or null */
301 int whichfork); /* data or attr fork */
302
303/*
304 * Add bmap trace entry after updating an extent record in place.
305 */
306STATIC void
307xfs_bmap_trace_post_update(
308 const char *fname, /* function name */
309 char *desc, /* operation description */
310 xfs_inode_t *ip, /* incore inode pointer */
311 xfs_extnum_t idx, /* index of entry updated */
312 int whichfork); /* data or attr fork */
313
314/*
315 * Add bmap trace entry prior to updating an extent record in place.
316 */
317STATIC void
318xfs_bmap_trace_pre_update(
319 const char *fname, /* function name */
320 char *desc, /* operation description */
321 xfs_inode_t *ip, /* incore inode pointer */
322 xfs_extnum_t idx, /* index of entry to be updated */
323 int whichfork); /* data or attr fork */
324
325#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \
326 xfs_bmap_trace_delete(__func__,d,ip,i,c,w)
327#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \
328 xfs_bmap_trace_insert(__func__,d,ip,i,c,r1,r2,w)
329#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \
330 xfs_bmap_trace_post_update(__func__,d,ip,i,w)
331#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \
332 xfs_bmap_trace_pre_update(__func__,d,ip,i,w)
333#else
334#define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w)
335#define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w)
336#define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w)
337#define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w)
338#endif /* XFS_BMAP_TRACE */
339
340/* 276/*
341 * Compute the worst-case number of indirect blocks that will be used 277 * Compute the worst-case number of indirect blocks that will be used
342 * for ip's delayed extent of length "len". 278 * for ip's delayed extent of length "len".
@@ -363,18 +299,6 @@ xfs_bmap_validate_ret(
363#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) 299#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
364#endif /* DEBUG */ 300#endif /* DEBUG */
365 301
366#if defined(XFS_RW_TRACE)
367STATIC void
368xfs_bunmap_trace(
369 xfs_inode_t *ip,
370 xfs_fileoff_t bno,
371 xfs_filblks_t len,
372 int flags,
373 inst_t *ra);
374#else
375#define xfs_bunmap_trace(ip, bno, len, flags, ra)
376#endif /* XFS_RW_TRACE */
377
378STATIC int 302STATIC int
379xfs_bmap_count_tree( 303xfs_bmap_count_tree(
380 xfs_mount_t *mp, 304 xfs_mount_t *mp,
@@ -590,9 +514,9 @@ xfs_bmap_add_extent(
590 * already extents in the list. 514 * already extents in the list.
591 */ 515 */
592 if (nextents == 0) { 516 if (nextents == 0) {
593 XFS_BMAP_TRACE_INSERT("insert empty", ip, 0, 1, new, NULL, 517 xfs_iext_insert(ip, 0, 1, new,
594 whichfork); 518 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
595 xfs_iext_insert(ifp, 0, 1, new); 519
596 ASSERT(cur == NULL); 520 ASSERT(cur == NULL);
597 ifp->if_lastex = 0; 521 ifp->if_lastex = 0;
598 if (!isnullstartblock(new->br_startblock)) { 522 if (!isnullstartblock(new->br_startblock)) {
@@ -759,26 +683,10 @@ xfs_bmap_add_extent_delay_real(
759 xfs_filblks_t temp=0; /* value for dnew calculations */ 683 xfs_filblks_t temp=0; /* value for dnew calculations */
760 xfs_filblks_t temp2=0;/* value for dnew calculations */ 684 xfs_filblks_t temp2=0;/* value for dnew calculations */
761 int tmp_rval; /* partial logging flags */ 685 int tmp_rval; /* partial logging flags */
762 enum { /* bit number definitions for state */
763 LEFT_CONTIG, RIGHT_CONTIG,
764 LEFT_FILLING, RIGHT_FILLING,
765 LEFT_DELAY, RIGHT_DELAY,
766 LEFT_VALID, RIGHT_VALID
767 };
768 686
769#define LEFT r[0] 687#define LEFT r[0]
770#define RIGHT r[1] 688#define RIGHT r[1]
771#define PREV r[2] 689#define PREV r[2]
772#define MASK(b) (1 << (b))
773#define MASK2(a,b) (MASK(a) | MASK(b))
774#define MASK3(a,b,c) (MASK2(a,b) | MASK(c))
775#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d))
776#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
777#define STATE_TEST(b) (state & MASK(b))
778#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
779 ((state &= ~MASK(b)), 0))
780#define SWITCH_STATE \
781 (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
782 690
783 /* 691 /*
784 * Set up a bunch of variables to make the tests simpler. 692 * Set up a bunch of variables to make the tests simpler.
@@ -790,69 +698,80 @@ xfs_bmap_add_extent_delay_real(
790 new_endoff = new->br_startoff + new->br_blockcount; 698 new_endoff = new->br_startoff + new->br_blockcount;
791 ASSERT(PREV.br_startoff <= new->br_startoff); 699 ASSERT(PREV.br_startoff <= new->br_startoff);
792 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 700 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
701
793 /* 702 /*
794 * Set flags determining what part of the previous delayed allocation 703 * Set flags determining what part of the previous delayed allocation
795 * extent is being replaced by a real allocation. 704 * extent is being replaced by a real allocation.
796 */ 705 */
797 STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); 706 if (PREV.br_startoff == new->br_startoff)
798 STATE_SET(RIGHT_FILLING, 707 state |= BMAP_LEFT_FILLING;
799 PREV.br_startoff + PREV.br_blockcount == new_endoff); 708 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
709 state |= BMAP_RIGHT_FILLING;
710
800 /* 711 /*
801 * Check and set flags if this segment has a left neighbor. 712 * Check and set flags if this segment has a left neighbor.
802 * Don't set contiguous if the combined extent would be too large. 713 * Don't set contiguous if the combined extent would be too large.
803 */ 714 */
804 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 715 if (idx > 0) {
716 state |= BMAP_LEFT_VALID;
805 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); 717 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
806 STATE_SET(LEFT_DELAY, isnullstartblock(LEFT.br_startblock)); 718
719 if (isnullstartblock(LEFT.br_startblock))
720 state |= BMAP_LEFT_DELAY;
807 } 721 }
808 STATE_SET(LEFT_CONTIG, 722
809 STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && 723 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
810 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 724 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
811 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 725 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
812 LEFT.br_state == new->br_state && 726 LEFT.br_state == new->br_state &&
813 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); 727 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
728 state |= BMAP_LEFT_CONTIG;
729
814 /* 730 /*
815 * Check and set flags if this segment has a right neighbor. 731 * Check and set flags if this segment has a right neighbor.
816 * Don't set contiguous if the combined extent would be too large. 732 * Don't set contiguous if the combined extent would be too large.
817 * Also check for all-three-contiguous being too large. 733 * Also check for all-three-contiguous being too large.
818 */ 734 */
819 if (STATE_SET_TEST(RIGHT_VALID, 735 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
820 idx < 736 state |= BMAP_RIGHT_VALID;
821 ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
822 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); 737 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
823 STATE_SET(RIGHT_DELAY, isnullstartblock(RIGHT.br_startblock)); 738
739 if (isnullstartblock(RIGHT.br_startblock))
740 state |= BMAP_RIGHT_DELAY;
824 } 741 }
825 STATE_SET(RIGHT_CONTIG, 742
826 STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && 743 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
827 new_endoff == RIGHT.br_startoff && 744 new_endoff == RIGHT.br_startoff &&
828 new->br_startblock + new->br_blockcount == 745 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
829 RIGHT.br_startblock && 746 new->br_state == RIGHT.br_state &&
830 new->br_state == RIGHT.br_state && 747 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
831 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 748 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
832 ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != 749 BMAP_RIGHT_FILLING)) !=
833 MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || 750 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
834 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 751 BMAP_RIGHT_FILLING) ||
835 <= MAXEXTLEN)); 752 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
753 <= MAXEXTLEN))
754 state |= BMAP_RIGHT_CONTIG;
755
836 error = 0; 756 error = 0;
837 /* 757 /*
838 * Switch out based on the FILLING and CONTIG state bits. 758 * Switch out based on the FILLING and CONTIG state bits.
839 */ 759 */
840 switch (SWITCH_STATE) { 760 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
841 761 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
842 case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 762 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
763 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
843 /* 764 /*
844 * Filling in all of a previously delayed allocation extent. 765 * Filling in all of a previously delayed allocation extent.
845 * The left and right neighbors are both contiguous with new. 766 * The left and right neighbors are both contiguous with new.
846 */ 767 */
847 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, 768 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
848 XFS_DATA_FORK);
849 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 769 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
850 LEFT.br_blockcount + PREV.br_blockcount + 770 LEFT.br_blockcount + PREV.br_blockcount +
851 RIGHT.br_blockcount); 771 RIGHT.br_blockcount);
852 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, 772 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
853 XFS_DATA_FORK); 773
854 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); 774 xfs_iext_remove(ip, idx, 2, state);
855 xfs_iext_remove(ifp, idx, 2);
856 ip->i_df.if_lastex = idx - 1; 775 ip->i_df.if_lastex = idx - 1;
857 ip->i_d.di_nextents--; 776 ip->i_d.di_nextents--;
858 if (cur == NULL) 777 if (cur == NULL)
@@ -885,20 +804,18 @@ xfs_bmap_add_extent_delay_real(
885 RIGHT.br_blockcount; 804 RIGHT.br_blockcount;
886 break; 805 break;
887 806
888 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): 807 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
889 /* 808 /*
890 * Filling in all of a previously delayed allocation extent. 809 * Filling in all of a previously delayed allocation extent.
891 * The left neighbor is contiguous, the right is not. 810 * The left neighbor is contiguous, the right is not.
892 */ 811 */
893 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, 812 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
894 XFS_DATA_FORK);
895 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 813 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
896 LEFT.br_blockcount + PREV.br_blockcount); 814 LEFT.br_blockcount + PREV.br_blockcount);
897 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, 815 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
898 XFS_DATA_FORK); 816
899 ip->i_df.if_lastex = idx - 1; 817 ip->i_df.if_lastex = idx - 1;
900 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); 818 xfs_iext_remove(ip, idx, 1, state);
901 xfs_iext_remove(ifp, idx, 1);
902 if (cur == NULL) 819 if (cur == NULL)
903 rval = XFS_ILOG_DEXT; 820 rval = XFS_ILOG_DEXT;
904 else { 821 else {
@@ -921,19 +838,19 @@ xfs_bmap_add_extent_delay_real(
921 PREV.br_blockcount; 838 PREV.br_blockcount;
922 break; 839 break;
923 840
924 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): 841 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
925 /* 842 /*
926 * Filling in all of a previously delayed allocation extent. 843 * Filling in all of a previously delayed allocation extent.
927 * The right neighbor is contiguous, the left is not. 844 * The right neighbor is contiguous, the left is not.
928 */ 845 */
929 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); 846 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
930 xfs_bmbt_set_startblock(ep, new->br_startblock); 847 xfs_bmbt_set_startblock(ep, new->br_startblock);
931 xfs_bmbt_set_blockcount(ep, 848 xfs_bmbt_set_blockcount(ep,
932 PREV.br_blockcount + RIGHT.br_blockcount); 849 PREV.br_blockcount + RIGHT.br_blockcount);
933 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, XFS_DATA_FORK); 850 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
851
934 ip->i_df.if_lastex = idx; 852 ip->i_df.if_lastex = idx;
935 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); 853 xfs_iext_remove(ip, idx + 1, 1, state);
936 xfs_iext_remove(ifp, idx + 1, 1);
937 if (cur == NULL) 854 if (cur == NULL)
938 rval = XFS_ILOG_DEXT; 855 rval = XFS_ILOG_DEXT;
939 else { 856 else {
@@ -956,15 +873,16 @@ xfs_bmap_add_extent_delay_real(
956 RIGHT.br_blockcount; 873 RIGHT.br_blockcount;
957 break; 874 break;
958 875
959 case MASK2(LEFT_FILLING, RIGHT_FILLING): 876 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
960 /* 877 /*
961 * Filling in all of a previously delayed allocation extent. 878 * Filling in all of a previously delayed allocation extent.
962 * Neither the left nor right neighbors are contiguous with 879 * Neither the left nor right neighbors are contiguous with
963 * the new one. 880 * the new one.
964 */ 881 */
965 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); 882 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
966 xfs_bmbt_set_startblock(ep, new->br_startblock); 883 xfs_bmbt_set_startblock(ep, new->br_startblock);
967 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, XFS_DATA_FORK); 884 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
885
968 ip->i_df.if_lastex = idx; 886 ip->i_df.if_lastex = idx;
969 ip->i_d.di_nextents++; 887 ip->i_d.di_nextents++;
970 if (cur == NULL) 888 if (cur == NULL)
@@ -987,19 +905,20 @@ xfs_bmap_add_extent_delay_real(
987 temp2 = new->br_blockcount; 905 temp2 = new->br_blockcount;
988 break; 906 break;
989 907
990 case MASK2(LEFT_FILLING, LEFT_CONTIG): 908 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
991 /* 909 /*
992 * Filling in the first part of a previous delayed allocation. 910 * Filling in the first part of a previous delayed allocation.
993 * The left neighbor is contiguous. 911 * The left neighbor is contiguous.
994 */ 912 */
995 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); 913 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
996 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 914 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
997 LEFT.br_blockcount + new->br_blockcount); 915 LEFT.br_blockcount + new->br_blockcount);
998 xfs_bmbt_set_startoff(ep, 916 xfs_bmbt_set_startoff(ep,
999 PREV.br_startoff + new->br_blockcount); 917 PREV.br_startoff + new->br_blockcount);
1000 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, XFS_DATA_FORK); 918 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
919
1001 temp = PREV.br_blockcount - new->br_blockcount; 920 temp = PREV.br_blockcount - new->br_blockcount;
1002 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); 921 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1003 xfs_bmbt_set_blockcount(ep, temp); 922 xfs_bmbt_set_blockcount(ep, temp);
1004 ip->i_df.if_lastex = idx - 1; 923 ip->i_df.if_lastex = idx - 1;
1005 if (cur == NULL) 924 if (cur == NULL)
@@ -1021,7 +940,7 @@ xfs_bmap_add_extent_delay_real(
1021 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 940 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1022 startblockval(PREV.br_startblock)); 941 startblockval(PREV.br_startblock));
1023 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 942 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1024 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, XFS_DATA_FORK); 943 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1025 *dnew = temp; 944 *dnew = temp;
1026 /* DELTA: The boundary between two in-core extents moved. */ 945 /* DELTA: The boundary between two in-core extents moved. */
1027 temp = LEFT.br_startoff; 946 temp = LEFT.br_startoff;
@@ -1029,18 +948,16 @@ xfs_bmap_add_extent_delay_real(
1029 PREV.br_blockcount; 948 PREV.br_blockcount;
1030 break; 949 break;
1031 950
1032 case MASK(LEFT_FILLING): 951 case BMAP_LEFT_FILLING:
1033 /* 952 /*
1034 * Filling in the first part of a previous delayed allocation. 953 * Filling in the first part of a previous delayed allocation.
1035 * The left neighbor is not contiguous. 954 * The left neighbor is not contiguous.
1036 */ 955 */
1037 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); 956 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1038 xfs_bmbt_set_startoff(ep, new_endoff); 957 xfs_bmbt_set_startoff(ep, new_endoff);
1039 temp = PREV.br_blockcount - new->br_blockcount; 958 temp = PREV.br_blockcount - new->br_blockcount;
1040 xfs_bmbt_set_blockcount(ep, temp); 959 xfs_bmbt_set_blockcount(ep, temp);
1041 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, 960 xfs_iext_insert(ip, idx, 1, new, state);
1042 XFS_DATA_FORK);
1043 xfs_iext_insert(ifp, idx, 1, new);
1044 ip->i_df.if_lastex = idx; 961 ip->i_df.if_lastex = idx;
1045 ip->i_d.di_nextents++; 962 ip->i_d.di_nextents++;
1046 if (cur == NULL) 963 if (cur == NULL)
@@ -1071,27 +988,27 @@ xfs_bmap_add_extent_delay_real(
1071 (cur ? cur->bc_private.b.allocated : 0)); 988 (cur ? cur->bc_private.b.allocated : 0));
1072 ep = xfs_iext_get_ext(ifp, idx + 1); 989 ep = xfs_iext_get_ext(ifp, idx + 1);
1073 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 990 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1074 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx + 1, XFS_DATA_FORK); 991 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
1075 *dnew = temp; 992 *dnew = temp;
1076 /* DELTA: One in-core extent is split in two. */ 993 /* DELTA: One in-core extent is split in two. */
1077 temp = PREV.br_startoff; 994 temp = PREV.br_startoff;
1078 temp2 = PREV.br_blockcount; 995 temp2 = PREV.br_blockcount;
1079 break; 996 break;
1080 997
1081 case MASK2(RIGHT_FILLING, RIGHT_CONTIG): 998 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1082 /* 999 /*
1083 * Filling in the last part of a previous delayed allocation. 1000 * Filling in the last part of a previous delayed allocation.
1084 * The right neighbor is contiguous with the new allocation. 1001 * The right neighbor is contiguous with the new allocation.
1085 */ 1002 */
1086 temp = PREV.br_blockcount - new->br_blockcount; 1003 temp = PREV.br_blockcount - new->br_blockcount;
1087 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); 1004 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1088 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); 1005 trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
1089 xfs_bmbt_set_blockcount(ep, temp); 1006 xfs_bmbt_set_blockcount(ep, temp);
1090 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1007 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1091 new->br_startoff, new->br_startblock, 1008 new->br_startoff, new->br_startblock,
1092 new->br_blockcount + RIGHT.br_blockcount, 1009 new->br_blockcount + RIGHT.br_blockcount,
1093 RIGHT.br_state); 1010 RIGHT.br_state);
1094 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, XFS_DATA_FORK); 1011 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
1095 ip->i_df.if_lastex = idx + 1; 1012 ip->i_df.if_lastex = idx + 1;
1096 if (cur == NULL) 1013 if (cur == NULL)
1097 rval = XFS_ILOG_DEXT; 1014 rval = XFS_ILOG_DEXT;
@@ -1112,7 +1029,7 @@ xfs_bmap_add_extent_delay_real(
1112 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 1029 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1113 startblockval(PREV.br_startblock)); 1030 startblockval(PREV.br_startblock));
1114 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1031 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1115 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, XFS_DATA_FORK); 1032 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1116 *dnew = temp; 1033 *dnew = temp;
1117 /* DELTA: The boundary between two in-core extents moved. */ 1034 /* DELTA: The boundary between two in-core extents moved. */
1118 temp = PREV.br_startoff; 1035 temp = PREV.br_startoff;
@@ -1120,17 +1037,15 @@ xfs_bmap_add_extent_delay_real(
1120 RIGHT.br_blockcount; 1037 RIGHT.br_blockcount;
1121 break; 1038 break;
1122 1039
1123 case MASK(RIGHT_FILLING): 1040 case BMAP_RIGHT_FILLING:
1124 /* 1041 /*
1125 * Filling in the last part of a previous delayed allocation. 1042 * Filling in the last part of a previous delayed allocation.
1126 * The right neighbor is not contiguous. 1043 * The right neighbor is not contiguous.
1127 */ 1044 */
1128 temp = PREV.br_blockcount - new->br_blockcount; 1045 temp = PREV.br_blockcount - new->br_blockcount;
1129 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1046 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1130 xfs_bmbt_set_blockcount(ep, temp); 1047 xfs_bmbt_set_blockcount(ep, temp);
1131 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, 1048 xfs_iext_insert(ip, idx + 1, 1, new, state);
1132 XFS_DATA_FORK);
1133 xfs_iext_insert(ifp, idx + 1, 1, new);
1134 ip->i_df.if_lastex = idx + 1; 1049 ip->i_df.if_lastex = idx + 1;
1135 ip->i_d.di_nextents++; 1050 ip->i_d.di_nextents++;
1136 if (cur == NULL) 1051 if (cur == NULL)
@@ -1161,7 +1076,7 @@ xfs_bmap_add_extent_delay_real(
1161 (cur ? cur->bc_private.b.allocated : 0)); 1076 (cur ? cur->bc_private.b.allocated : 0));
1162 ep = xfs_iext_get_ext(ifp, idx); 1077 ep = xfs_iext_get_ext(ifp, idx);
1163 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1078 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1164 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1079 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1165 *dnew = temp; 1080 *dnew = temp;
1166 /* DELTA: One in-core extent is split in two. */ 1081 /* DELTA: One in-core extent is split in two. */
1167 temp = PREV.br_startoff; 1082 temp = PREV.br_startoff;
@@ -1175,7 +1090,7 @@ xfs_bmap_add_extent_delay_real(
1175 * This case is avoided almost all the time. 1090 * This case is avoided almost all the time.
1176 */ 1091 */
1177 temp = new->br_startoff - PREV.br_startoff; 1092 temp = new->br_startoff - PREV.br_startoff;
1178 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); 1093 trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
1179 xfs_bmbt_set_blockcount(ep, temp); 1094 xfs_bmbt_set_blockcount(ep, temp);
1180 r[0] = *new; 1095 r[0] = *new;
1181 r[1].br_state = PREV.br_state; 1096 r[1].br_state = PREV.br_state;
@@ -1183,9 +1098,7 @@ xfs_bmap_add_extent_delay_real(
1183 r[1].br_startoff = new_endoff; 1098 r[1].br_startoff = new_endoff;
1184 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1099 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
1185 r[1].br_blockcount = temp2; 1100 r[1].br_blockcount = temp2;
1186 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], 1101 xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
1187 XFS_DATA_FORK);
1188 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1189 ip->i_df.if_lastex = idx + 1; 1102 ip->i_df.if_lastex = idx + 1;
1190 ip->i_d.di_nextents++; 1103 ip->i_d.di_nextents++;
1191 if (cur == NULL) 1104 if (cur == NULL)
@@ -1242,24 +1155,24 @@ xfs_bmap_add_extent_delay_real(
1242 } 1155 }
1243 ep = xfs_iext_get_ext(ifp, idx); 1156 ep = xfs_iext_get_ext(ifp, idx);
1244 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 1157 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
1245 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); 1158 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1246 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); 1159 trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_);
1247 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), 1160 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2),
1248 nullstartblock((int)temp2)); 1161 nullstartblock((int)temp2));
1249 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx + 2, XFS_DATA_FORK); 1162 trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_);
1250 *dnew = temp + temp2; 1163 *dnew = temp + temp2;
1251 /* DELTA: One in-core extent is split in three. */ 1164 /* DELTA: One in-core extent is split in three. */
1252 temp = PREV.br_startoff; 1165 temp = PREV.br_startoff;
1253 temp2 = PREV.br_blockcount; 1166 temp2 = PREV.br_blockcount;
1254 break; 1167 break;
1255 1168
1256 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1169 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1257 case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1170 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1258 case MASK2(LEFT_FILLING, RIGHT_CONTIG): 1171 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1259 case MASK2(RIGHT_FILLING, LEFT_CONTIG): 1172 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1260 case MASK2(LEFT_CONTIG, RIGHT_CONTIG): 1173 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1261 case MASK(LEFT_CONTIG): 1174 case BMAP_LEFT_CONTIG:
1262 case MASK(RIGHT_CONTIG): 1175 case BMAP_RIGHT_CONTIG:
1263 /* 1176 /*
1264 * These cases are all impossible. 1177 * These cases are all impossible.
1265 */ 1178 */
@@ -1279,14 +1192,6 @@ done:
1279#undef LEFT 1192#undef LEFT
1280#undef RIGHT 1193#undef RIGHT
1281#undef PREV 1194#undef PREV
1282#undef MASK
1283#undef MASK2
1284#undef MASK3
1285#undef MASK4
1286#undef STATE_SET
1287#undef STATE_TEST
1288#undef STATE_SET_TEST
1289#undef SWITCH_STATE
1290} 1195}
1291 1196
1292/* 1197/*
@@ -1316,27 +1221,10 @@ xfs_bmap_add_extent_unwritten_real(
1316 int state = 0;/* state bits, accessed thru macros */ 1221 int state = 0;/* state bits, accessed thru macros */
1317 xfs_filblks_t temp=0; 1222 xfs_filblks_t temp=0;
1318 xfs_filblks_t temp2=0; 1223 xfs_filblks_t temp2=0;
1319 enum { /* bit number definitions for state */
1320 LEFT_CONTIG, RIGHT_CONTIG,
1321 LEFT_FILLING, RIGHT_FILLING,
1322 LEFT_DELAY, RIGHT_DELAY,
1323 LEFT_VALID, RIGHT_VALID
1324 };
1325 1224
1326#define LEFT r[0] 1225#define LEFT r[0]
1327#define RIGHT r[1] 1226#define RIGHT r[1]
1328#define PREV r[2] 1227#define PREV r[2]
1329#define MASK(b) (1 << (b))
1330#define MASK2(a,b) (MASK(a) | MASK(b))
1331#define MASK3(a,b,c) (MASK2(a,b) | MASK(c))
1332#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d))
1333#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
1334#define STATE_TEST(b) (state & MASK(b))
1335#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
1336 ((state &= ~MASK(b)), 0))
1337#define SWITCH_STATE \
1338 (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
1339
1340 /* 1228 /*
1341 * Set up a bunch of variables to make the tests simpler. 1229 * Set up a bunch of variables to make the tests simpler.
1342 */ 1230 */
@@ -1352,68 +1240,78 @@ xfs_bmap_add_extent_unwritten_real(
1352 new_endoff = new->br_startoff + new->br_blockcount; 1240 new_endoff = new->br_startoff + new->br_blockcount;
1353 ASSERT(PREV.br_startoff <= new->br_startoff); 1241 ASSERT(PREV.br_startoff <= new->br_startoff);
1354 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 1242 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1243
1355 /* 1244 /*
1356 * Set flags determining what part of the previous oldext allocation 1245 * Set flags determining what part of the previous oldext allocation
1357 * extent is being replaced by a newext allocation. 1246 * extent is being replaced by a newext allocation.
1358 */ 1247 */
1359 STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); 1248 if (PREV.br_startoff == new->br_startoff)
1360 STATE_SET(RIGHT_FILLING, 1249 state |= BMAP_LEFT_FILLING;
1361 PREV.br_startoff + PREV.br_blockcount == new_endoff); 1250 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1251 state |= BMAP_RIGHT_FILLING;
1252
1362 /* 1253 /*
1363 * Check and set flags if this segment has a left neighbor. 1254 * Check and set flags if this segment has a left neighbor.
1364 * Don't set contiguous if the combined extent would be too large. 1255 * Don't set contiguous if the combined extent would be too large.
1365 */ 1256 */
1366 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 1257 if (idx > 0) {
1258 state |= BMAP_LEFT_VALID;
1367 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); 1259 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT);
1368 STATE_SET(LEFT_DELAY, isnullstartblock(LEFT.br_startblock)); 1260
1261 if (isnullstartblock(LEFT.br_startblock))
1262 state |= BMAP_LEFT_DELAY;
1369 } 1263 }
1370 STATE_SET(LEFT_CONTIG, 1264
1371 STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && 1265 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1372 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 1266 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1373 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 1267 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1374 LEFT.br_state == newext && 1268 LEFT.br_state == newext &&
1375 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); 1269 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1270 state |= BMAP_LEFT_CONTIG;
1271
1376 /* 1272 /*
1377 * Check and set flags if this segment has a right neighbor. 1273 * Check and set flags if this segment has a right neighbor.
1378 * Don't set contiguous if the combined extent would be too large. 1274 * Don't set contiguous if the combined extent would be too large.
1379 * Also check for all-three-contiguous being too large. 1275 * Also check for all-three-contiguous being too large.
1380 */ 1276 */
1381 if (STATE_SET_TEST(RIGHT_VALID, 1277 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1382 idx < 1278 state |= BMAP_RIGHT_VALID;
1383 ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
1384 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); 1279 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT);
1385 STATE_SET(RIGHT_DELAY, isnullstartblock(RIGHT.br_startblock)); 1280 if (isnullstartblock(RIGHT.br_startblock))
1281 state |= BMAP_RIGHT_DELAY;
1386 } 1282 }
1387 STATE_SET(RIGHT_CONTIG, 1283
1388 STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && 1284 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1389 new_endoff == RIGHT.br_startoff && 1285 new_endoff == RIGHT.br_startoff &&
1390 new->br_startblock + new->br_blockcount == 1286 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1391 RIGHT.br_startblock && 1287 newext == RIGHT.br_state &&
1392 newext == RIGHT.br_state && 1288 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1393 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 1289 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1394 ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != 1290 BMAP_RIGHT_FILLING)) !=
1395 MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || 1291 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1396 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 1292 BMAP_RIGHT_FILLING) ||
1397 <= MAXEXTLEN)); 1293 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1294 <= MAXEXTLEN))
1295 state |= BMAP_RIGHT_CONTIG;
1296
1398 /* 1297 /*
1399 * Switch out based on the FILLING and CONTIG state bits. 1298 * Switch out based on the FILLING and CONTIG state bits.
1400 */ 1299 */
1401 switch (SWITCH_STATE) { 1300 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1402 1301 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1403 case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1302 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1303 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1404 /* 1304 /*
1405 * Setting all of a previous oldext extent to newext. 1305 * Setting all of a previous oldext extent to newext.
1406 * The left and right neighbors are both contiguous with new. 1306 * The left and right neighbors are both contiguous with new.
1407 */ 1307 */
1408 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC|RC", ip, idx - 1, 1308 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1409 XFS_DATA_FORK);
1410 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1309 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1411 LEFT.br_blockcount + PREV.br_blockcount + 1310 LEFT.br_blockcount + PREV.br_blockcount +
1412 RIGHT.br_blockcount); 1311 RIGHT.br_blockcount);
1413 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC|RC", ip, idx - 1, 1312 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1414 XFS_DATA_FORK); 1313
1415 XFS_BMAP_TRACE_DELETE("LF|RF|LC|RC", ip, idx, 2, XFS_DATA_FORK); 1314 xfs_iext_remove(ip, idx, 2, state);
1416 xfs_iext_remove(ifp, idx, 2);
1417 ip->i_df.if_lastex = idx - 1; 1315 ip->i_df.if_lastex = idx - 1;
1418 ip->i_d.di_nextents -= 2; 1316 ip->i_d.di_nextents -= 2;
1419 if (cur == NULL) 1317 if (cur == NULL)
@@ -1450,20 +1348,18 @@ xfs_bmap_add_extent_unwritten_real(
1450 RIGHT.br_blockcount; 1348 RIGHT.br_blockcount;
1451 break; 1349 break;
1452 1350
1453 case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): 1351 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1454 /* 1352 /*
1455 * Setting all of a previous oldext extent to newext. 1353 * Setting all of a previous oldext extent to newext.
1456 * The left neighbor is contiguous, the right is not. 1354 * The left neighbor is contiguous, the right is not.
1457 */ 1355 */
1458 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|LC", ip, idx - 1, 1356 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1459 XFS_DATA_FORK);
1460 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1357 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1461 LEFT.br_blockcount + PREV.br_blockcount); 1358 LEFT.br_blockcount + PREV.br_blockcount);
1462 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|LC", ip, idx - 1, 1359 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1463 XFS_DATA_FORK); 1360
1464 ip->i_df.if_lastex = idx - 1; 1361 ip->i_df.if_lastex = idx - 1;
1465 XFS_BMAP_TRACE_DELETE("LF|RF|LC", ip, idx, 1, XFS_DATA_FORK); 1362 xfs_iext_remove(ip, idx, 1, state);
1466 xfs_iext_remove(ifp, idx, 1);
1467 ip->i_d.di_nextents--; 1363 ip->i_d.di_nextents--;
1468 if (cur == NULL) 1364 if (cur == NULL)
1469 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1365 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1492,21 +1388,18 @@ xfs_bmap_add_extent_unwritten_real(
1492 PREV.br_blockcount; 1388 PREV.br_blockcount;
1493 break; 1389 break;
1494 1390
1495 case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): 1391 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1496 /* 1392 /*
1497 * Setting all of a previous oldext extent to newext. 1393 * Setting all of a previous oldext extent to newext.
1498 * The right neighbor is contiguous, the left is not. 1394 * The right neighbor is contiguous, the left is not.
1499 */ 1395 */
1500 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF|RC", ip, idx, 1396 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1501 XFS_DATA_FORK);
1502 xfs_bmbt_set_blockcount(ep, 1397 xfs_bmbt_set_blockcount(ep,
1503 PREV.br_blockcount + RIGHT.br_blockcount); 1398 PREV.br_blockcount + RIGHT.br_blockcount);
1504 xfs_bmbt_set_state(ep, newext); 1399 xfs_bmbt_set_state(ep, newext);
1505 XFS_BMAP_TRACE_POST_UPDATE("LF|RF|RC", ip, idx, 1400 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1506 XFS_DATA_FORK);
1507 ip->i_df.if_lastex = idx; 1401 ip->i_df.if_lastex = idx;
1508 XFS_BMAP_TRACE_DELETE("LF|RF|RC", ip, idx + 1, 1, XFS_DATA_FORK); 1402 xfs_iext_remove(ip, idx + 1, 1, state);
1509 xfs_iext_remove(ifp, idx + 1, 1);
1510 ip->i_d.di_nextents--; 1403 ip->i_d.di_nextents--;
1511 if (cur == NULL) 1404 if (cur == NULL)
1512 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1405 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
@@ -1535,17 +1428,16 @@ xfs_bmap_add_extent_unwritten_real(
1535 RIGHT.br_blockcount; 1428 RIGHT.br_blockcount;
1536 break; 1429 break;
1537 1430
1538 case MASK2(LEFT_FILLING, RIGHT_FILLING): 1431 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1539 /* 1432 /*
1540 * Setting all of a previous oldext extent to newext. 1433 * Setting all of a previous oldext extent to newext.
1541 * Neither the left nor right neighbors are contiguous with 1434 * Neither the left nor right neighbors are contiguous with
1542 * the new one. 1435 * the new one.
1543 */ 1436 */
1544 XFS_BMAP_TRACE_PRE_UPDATE("LF|RF", ip, idx, 1437 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1545 XFS_DATA_FORK);
1546 xfs_bmbt_set_state(ep, newext); 1438 xfs_bmbt_set_state(ep, newext);
1547 XFS_BMAP_TRACE_POST_UPDATE("LF|RF", ip, idx, 1439 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1548 XFS_DATA_FORK); 1440
1549 ip->i_df.if_lastex = idx; 1441 ip->i_df.if_lastex = idx;
1550 if (cur == NULL) 1442 if (cur == NULL)
1551 rval = XFS_ILOG_DEXT; 1443 rval = XFS_ILOG_DEXT;
@@ -1566,27 +1458,25 @@ xfs_bmap_add_extent_unwritten_real(
1566 temp2 = new->br_blockcount; 1458 temp2 = new->br_blockcount;
1567 break; 1459 break;
1568 1460
1569 case MASK2(LEFT_FILLING, LEFT_CONTIG): 1461 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1570 /* 1462 /*
1571 * Setting the first part of a previous oldext extent to newext. 1463 * Setting the first part of a previous oldext extent to newext.
1572 * The left neighbor is contiguous. 1464 * The left neighbor is contiguous.
1573 */ 1465 */
1574 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx - 1, 1466 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1575 XFS_DATA_FORK);
1576 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1467 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
1577 LEFT.br_blockcount + new->br_blockcount); 1468 LEFT.br_blockcount + new->br_blockcount);
1578 xfs_bmbt_set_startoff(ep, 1469 xfs_bmbt_set_startoff(ep,
1579 PREV.br_startoff + new->br_blockcount); 1470 PREV.br_startoff + new->br_blockcount);
1580 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx - 1, 1471 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1581 XFS_DATA_FORK); 1472
1582 XFS_BMAP_TRACE_PRE_UPDATE("LF|LC", ip, idx, 1473 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1583 XFS_DATA_FORK);
1584 xfs_bmbt_set_startblock(ep, 1474 xfs_bmbt_set_startblock(ep,
1585 new->br_startblock + new->br_blockcount); 1475 new->br_startblock + new->br_blockcount);
1586 xfs_bmbt_set_blockcount(ep, 1476 xfs_bmbt_set_blockcount(ep,
1587 PREV.br_blockcount - new->br_blockcount); 1477 PREV.br_blockcount - new->br_blockcount);
1588 XFS_BMAP_TRACE_POST_UPDATE("LF|LC", ip, idx, 1478 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1589 XFS_DATA_FORK); 1479
1590 ip->i_df.if_lastex = idx - 1; 1480 ip->i_df.if_lastex = idx - 1;
1591 if (cur == NULL) 1481 if (cur == NULL)
1592 rval = XFS_ILOG_DEXT; 1482 rval = XFS_ILOG_DEXT;
@@ -1617,22 +1507,21 @@ xfs_bmap_add_extent_unwritten_real(
1617 PREV.br_blockcount; 1507 PREV.br_blockcount;
1618 break; 1508 break;
1619 1509
1620 case MASK(LEFT_FILLING): 1510 case BMAP_LEFT_FILLING:
1621 /* 1511 /*
1622 * Setting the first part of a previous oldext extent to newext. 1512 * Setting the first part of a previous oldext extent to newext.
1623 * The left neighbor is not contiguous. 1513 * The left neighbor is not contiguous.
1624 */ 1514 */
1625 XFS_BMAP_TRACE_PRE_UPDATE("LF", ip, idx, XFS_DATA_FORK); 1515 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1626 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); 1516 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
1627 xfs_bmbt_set_startoff(ep, new_endoff); 1517 xfs_bmbt_set_startoff(ep, new_endoff);
1628 xfs_bmbt_set_blockcount(ep, 1518 xfs_bmbt_set_blockcount(ep,
1629 PREV.br_blockcount - new->br_blockcount); 1519 PREV.br_blockcount - new->br_blockcount);
1630 xfs_bmbt_set_startblock(ep, 1520 xfs_bmbt_set_startblock(ep,
1631 new->br_startblock + new->br_blockcount); 1521 new->br_startblock + new->br_blockcount);
1632 XFS_BMAP_TRACE_POST_UPDATE("LF", ip, idx, XFS_DATA_FORK); 1522 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1633 XFS_BMAP_TRACE_INSERT("LF", ip, idx, 1, new, NULL, 1523
1634 XFS_DATA_FORK); 1524 xfs_iext_insert(ip, idx, 1, new, state);
1635 xfs_iext_insert(ifp, idx, 1, new);
1636 ip->i_df.if_lastex = idx; 1525 ip->i_df.if_lastex = idx;
1637 ip->i_d.di_nextents++; 1526 ip->i_d.di_nextents++;
1638 if (cur == NULL) 1527 if (cur == NULL)
@@ -1660,24 +1549,21 @@ xfs_bmap_add_extent_unwritten_real(
1660 temp2 = PREV.br_blockcount; 1549 temp2 = PREV.br_blockcount;
1661 break; 1550 break;
1662 1551
1663 case MASK2(RIGHT_FILLING, RIGHT_CONTIG): 1552 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1664 /* 1553 /*
1665 * Setting the last part of a previous oldext extent to newext. 1554 * Setting the last part of a previous oldext extent to newext.
1666 * The right neighbor is contiguous with the new allocation. 1555 * The right neighbor is contiguous with the new allocation.
1667 */ 1556 */
1668 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx, 1557 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1669 XFS_DATA_FORK); 1558 trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_);
1670 XFS_BMAP_TRACE_PRE_UPDATE("RF|RC", ip, idx + 1,
1671 XFS_DATA_FORK);
1672 xfs_bmbt_set_blockcount(ep, 1559 xfs_bmbt_set_blockcount(ep,
1673 PREV.br_blockcount - new->br_blockcount); 1560 PREV.br_blockcount - new->br_blockcount);
1674 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx, 1561 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1675 XFS_DATA_FORK);
1676 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), 1562 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1),
1677 new->br_startoff, new->br_startblock, 1563 new->br_startoff, new->br_startblock,
1678 new->br_blockcount + RIGHT.br_blockcount, newext); 1564 new->br_blockcount + RIGHT.br_blockcount, newext);
1679 XFS_BMAP_TRACE_POST_UPDATE("RF|RC", ip, idx + 1, 1565 trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
1680 XFS_DATA_FORK); 1566
1681 ip->i_df.if_lastex = idx + 1; 1567 ip->i_df.if_lastex = idx + 1;
1682 if (cur == NULL) 1568 if (cur == NULL)
1683 rval = XFS_ILOG_DEXT; 1569 rval = XFS_ILOG_DEXT;
@@ -1707,18 +1593,17 @@ xfs_bmap_add_extent_unwritten_real(
1707 RIGHT.br_blockcount; 1593 RIGHT.br_blockcount;
1708 break; 1594 break;
1709 1595
1710 case MASK(RIGHT_FILLING): 1596 case BMAP_RIGHT_FILLING:
1711 /* 1597 /*
1712 * Setting the last part of a previous oldext extent to newext. 1598 * Setting the last part of a previous oldext extent to newext.
1713 * The right neighbor is not contiguous. 1599 * The right neighbor is not contiguous.
1714 */ 1600 */
1715 XFS_BMAP_TRACE_PRE_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1601 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1716 xfs_bmbt_set_blockcount(ep, 1602 xfs_bmbt_set_blockcount(ep,
1717 PREV.br_blockcount - new->br_blockcount); 1603 PREV.br_blockcount - new->br_blockcount);
1718 XFS_BMAP_TRACE_POST_UPDATE("RF", ip, idx, XFS_DATA_FORK); 1604 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1719 XFS_BMAP_TRACE_INSERT("RF", ip, idx + 1, 1, new, NULL, 1605
1720 XFS_DATA_FORK); 1606 xfs_iext_insert(ip, idx + 1, 1, new, state);
1721 xfs_iext_insert(ifp, idx + 1, 1, new);
1722 ip->i_df.if_lastex = idx + 1; 1607 ip->i_df.if_lastex = idx + 1;
1723 ip->i_d.di_nextents++; 1608 ip->i_d.di_nextents++;
1724 if (cur == NULL) 1609 if (cur == NULL)
@@ -1756,19 +1641,18 @@ xfs_bmap_add_extent_unwritten_real(
1756 * newext. Contiguity is impossible here. 1641 * newext. Contiguity is impossible here.
1757 * One extent becomes three extents. 1642 * One extent becomes three extents.
1758 */ 1643 */
1759 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, XFS_DATA_FORK); 1644 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1760 xfs_bmbt_set_blockcount(ep, 1645 xfs_bmbt_set_blockcount(ep,
1761 new->br_startoff - PREV.br_startoff); 1646 new->br_startoff - PREV.br_startoff);
1762 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, XFS_DATA_FORK); 1647 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1648
1763 r[0] = *new; 1649 r[0] = *new;
1764 r[1].br_startoff = new_endoff; 1650 r[1].br_startoff = new_endoff;
1765 r[1].br_blockcount = 1651 r[1].br_blockcount =
1766 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1652 PREV.br_startoff + PREV.br_blockcount - new_endoff;
1767 r[1].br_startblock = new->br_startblock + new->br_blockcount; 1653 r[1].br_startblock = new->br_startblock + new->br_blockcount;
1768 r[1].br_state = oldext; 1654 r[1].br_state = oldext;
1769 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 2, &r[0], &r[1], 1655 xfs_iext_insert(ip, idx + 1, 2, &r[0], state);
1770 XFS_DATA_FORK);
1771 xfs_iext_insert(ifp, idx + 1, 2, &r[0]);
1772 ip->i_df.if_lastex = idx + 1; 1656 ip->i_df.if_lastex = idx + 1;
1773 ip->i_d.di_nextents += 2; 1657 ip->i_d.di_nextents += 2;
1774 if (cur == NULL) 1658 if (cur == NULL)
@@ -1813,13 +1697,13 @@ xfs_bmap_add_extent_unwritten_real(
1813 temp2 = PREV.br_blockcount; 1697 temp2 = PREV.br_blockcount;
1814 break; 1698 break;
1815 1699
1816 case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1700 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1817 case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): 1701 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1818 case MASK2(LEFT_FILLING, RIGHT_CONTIG): 1702 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1819 case MASK2(RIGHT_FILLING, LEFT_CONTIG): 1703 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1820 case MASK2(LEFT_CONTIG, RIGHT_CONTIG): 1704 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1821 case MASK(LEFT_CONTIG): 1705 case BMAP_LEFT_CONTIG:
1822 case MASK(RIGHT_CONTIG): 1706 case BMAP_RIGHT_CONTIG:
1823 /* 1707 /*
1824 * These cases are all impossible. 1708 * These cases are all impossible.
1825 */ 1709 */
@@ -1839,14 +1723,6 @@ done:
1839#undef LEFT 1723#undef LEFT
1840#undef RIGHT 1724#undef RIGHT
1841#undef PREV 1725#undef PREV
1842#undef MASK
1843#undef MASK2
1844#undef MASK3
1845#undef MASK4
1846#undef STATE_SET
1847#undef STATE_TEST
1848#undef STATE_SET_TEST
1849#undef SWITCH_STATE
1850} 1726}
1851 1727
1852/* 1728/*
@@ -1872,62 +1748,57 @@ xfs_bmap_add_extent_hole_delay(
1872 int state; /* state bits, accessed thru macros */ 1748 int state; /* state bits, accessed thru macros */
1873 xfs_filblks_t temp=0; /* temp for indirect calculations */ 1749 xfs_filblks_t temp=0; /* temp for indirect calculations */
1874 xfs_filblks_t temp2=0; 1750 xfs_filblks_t temp2=0;
1875 enum { /* bit number definitions for state */
1876 LEFT_CONTIG, RIGHT_CONTIG,
1877 LEFT_DELAY, RIGHT_DELAY,
1878 LEFT_VALID, RIGHT_VALID
1879 };
1880
1881#define MASK(b) (1 << (b))
1882#define MASK2(a,b) (MASK(a) | MASK(b))
1883#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
1884#define STATE_TEST(b) (state & MASK(b))
1885#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
1886 ((state &= ~MASK(b)), 0))
1887#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
1888 1751
1889 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1752 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1890 ep = xfs_iext_get_ext(ifp, idx); 1753 ep = xfs_iext_get_ext(ifp, idx);
1891 state = 0; 1754 state = 0;
1892 ASSERT(isnullstartblock(new->br_startblock)); 1755 ASSERT(isnullstartblock(new->br_startblock));
1756
1893 /* 1757 /*
1894 * Check and set flags if this segment has a left neighbor 1758 * Check and set flags if this segment has a left neighbor
1895 */ 1759 */
1896 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 1760 if (idx > 0) {
1761 state |= BMAP_LEFT_VALID;
1897 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); 1762 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
1898 STATE_SET(LEFT_DELAY, isnullstartblock(left.br_startblock)); 1763
1764 if (isnullstartblock(left.br_startblock))
1765 state |= BMAP_LEFT_DELAY;
1899 } 1766 }
1767
1900 /* 1768 /*
1901 * Check and set flags if the current (right) segment exists. 1769 * Check and set flags if the current (right) segment exists.
1902 * If it doesn't exist, we're converting the hole at end-of-file. 1770 * If it doesn't exist, we're converting the hole at end-of-file.
1903 */ 1771 */
1904 if (STATE_SET_TEST(RIGHT_VALID, 1772 if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
1905 idx < 1773 state |= BMAP_RIGHT_VALID;
1906 ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1907 xfs_bmbt_get_all(ep, &right); 1774 xfs_bmbt_get_all(ep, &right);
1908 STATE_SET(RIGHT_DELAY, isnullstartblock(right.br_startblock)); 1775
1776 if (isnullstartblock(right.br_startblock))
1777 state |= BMAP_RIGHT_DELAY;
1909 } 1778 }
1779
1910 /* 1780 /*
1911 * Set contiguity flags on the left and right neighbors. 1781 * Set contiguity flags on the left and right neighbors.
1912 * Don't let extents get too large, even if the pieces are contiguous. 1782 * Don't let extents get too large, even if the pieces are contiguous.
1913 */ 1783 */
1914 STATE_SET(LEFT_CONTIG, 1784 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
1915 STATE_TEST(LEFT_VALID) && STATE_TEST(LEFT_DELAY) && 1785 left.br_startoff + left.br_blockcount == new->br_startoff &&
1916 left.br_startoff + left.br_blockcount == new->br_startoff && 1786 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1917 left.br_blockcount + new->br_blockcount <= MAXEXTLEN); 1787 state |= BMAP_LEFT_CONTIG;
1918 STATE_SET(RIGHT_CONTIG, 1788
1919 STATE_TEST(RIGHT_VALID) && STATE_TEST(RIGHT_DELAY) && 1789 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
1920 new->br_startoff + new->br_blockcount == right.br_startoff && 1790 new->br_startoff + new->br_blockcount == right.br_startoff &&
1921 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 1791 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
1922 (!STATE_TEST(LEFT_CONTIG) || 1792 (!(state & BMAP_LEFT_CONTIG) ||
1923 (left.br_blockcount + new->br_blockcount + 1793 (left.br_blockcount + new->br_blockcount +
1924 right.br_blockcount <= MAXEXTLEN))); 1794 right.br_blockcount <= MAXEXTLEN)))
1795 state |= BMAP_RIGHT_CONTIG;
1796
1925 /* 1797 /*
1926 * Switch out based on the contiguity flags. 1798 * Switch out based on the contiguity flags.
1927 */ 1799 */
1928 switch (SWITCH_STATE) { 1800 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
1929 1801 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1930 case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
1931 /* 1802 /*
1932 * New allocation is contiguous with delayed allocations 1803 * New allocation is contiguous with delayed allocations
1933 * on the left and on the right. 1804 * on the left and on the right.
@@ -1935,8 +1806,8 @@ xfs_bmap_add_extent_hole_delay(
1935 */ 1806 */
1936 temp = left.br_blockcount + new->br_blockcount + 1807 temp = left.br_blockcount + new->br_blockcount +
1937 right.br_blockcount; 1808 right.br_blockcount;
1938 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, 1809
1939 XFS_DATA_FORK); 1810 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1940 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1811 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1941 oldlen = startblockval(left.br_startblock) + 1812 oldlen = startblockval(left.br_startblock) +
1942 startblockval(new->br_startblock) + 1813 startblockval(new->br_startblock) +
@@ -1944,53 +1815,52 @@ xfs_bmap_add_extent_hole_delay(
1944 newlen = xfs_bmap_worst_indlen(ip, temp); 1815 newlen = xfs_bmap_worst_indlen(ip, temp);
1945 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1816 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1946 nullstartblock((int)newlen)); 1817 nullstartblock((int)newlen));
1947 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, 1818 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1948 XFS_DATA_FORK); 1819
1949 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, XFS_DATA_FORK); 1820 xfs_iext_remove(ip, idx, 1, state);
1950 xfs_iext_remove(ifp, idx, 1);
1951 ip->i_df.if_lastex = idx - 1; 1821 ip->i_df.if_lastex = idx - 1;
1952 /* DELTA: Two in-core extents were replaced by one. */ 1822 /* DELTA: Two in-core extents were replaced by one. */
1953 temp2 = temp; 1823 temp2 = temp;
1954 temp = left.br_startoff; 1824 temp = left.br_startoff;
1955 break; 1825 break;
1956 1826
1957 case MASK(LEFT_CONTIG): 1827 case BMAP_LEFT_CONTIG:
1958 /* 1828 /*
1959 * New allocation is contiguous with a delayed allocation 1829 * New allocation is contiguous with a delayed allocation
1960 * on the left. 1830 * on the left.
1961 * Merge the new allocation with the left neighbor. 1831 * Merge the new allocation with the left neighbor.
1962 */ 1832 */
1963 temp = left.br_blockcount + new->br_blockcount; 1833 temp = left.br_blockcount + new->br_blockcount;
1964 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, 1834 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
1965 XFS_DATA_FORK);
1966 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); 1835 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp);
1967 oldlen = startblockval(left.br_startblock) + 1836 oldlen = startblockval(left.br_startblock) +
1968 startblockval(new->br_startblock); 1837 startblockval(new->br_startblock);
1969 newlen = xfs_bmap_worst_indlen(ip, temp); 1838 newlen = xfs_bmap_worst_indlen(ip, temp);
1970 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), 1839 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1),
1971 nullstartblock((int)newlen)); 1840 nullstartblock((int)newlen));
1972 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, 1841 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
1973 XFS_DATA_FORK); 1842
1974 ip->i_df.if_lastex = idx - 1; 1843 ip->i_df.if_lastex = idx - 1;
1975 /* DELTA: One in-core extent grew into a hole. */ 1844 /* DELTA: One in-core extent grew into a hole. */
1976 temp2 = temp; 1845 temp2 = temp;
1977 temp = left.br_startoff; 1846 temp = left.br_startoff;
1978 break; 1847 break;
1979 1848
1980 case MASK(RIGHT_CONTIG): 1849 case BMAP_RIGHT_CONTIG:
1981 /* 1850 /*
1982 * New allocation is contiguous with a delayed allocation 1851 * New allocation is contiguous with a delayed allocation
1983 * on the right. 1852 * on the right.
1984 * Merge the new allocation with the right neighbor. 1853 * Merge the new allocation with the right neighbor.
1985 */ 1854 */
1986 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, XFS_DATA_FORK); 1855 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
1987 temp = new->br_blockcount + right.br_blockcount; 1856 temp = new->br_blockcount + right.br_blockcount;
1988 oldlen = startblockval(new->br_startblock) + 1857 oldlen = startblockval(new->br_startblock) +
1989 startblockval(right.br_startblock); 1858 startblockval(right.br_startblock);
1990 newlen = xfs_bmap_worst_indlen(ip, temp); 1859 newlen = xfs_bmap_worst_indlen(ip, temp);
1991 xfs_bmbt_set_allf(ep, new->br_startoff, 1860 xfs_bmbt_set_allf(ep, new->br_startoff,
1992 nullstartblock((int)newlen), temp, right.br_state); 1861 nullstartblock((int)newlen), temp, right.br_state);
1993 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, XFS_DATA_FORK); 1862 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
1863
1994 ip->i_df.if_lastex = idx; 1864 ip->i_df.if_lastex = idx;
1995 /* DELTA: One in-core extent grew into a hole. */ 1865 /* DELTA: One in-core extent grew into a hole. */
1996 temp2 = temp; 1866 temp2 = temp;
@@ -2004,9 +1874,7 @@ xfs_bmap_add_extent_hole_delay(
2004 * Insert a new entry. 1874 * Insert a new entry.
2005 */ 1875 */
2006 oldlen = newlen = 0; 1876 oldlen = newlen = 0;
2007 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, 1877 xfs_iext_insert(ip, idx, 1, new, state);
2008 XFS_DATA_FORK);
2009 xfs_iext_insert(ifp, idx, 1, new);
2010 ip->i_df.if_lastex = idx; 1878 ip->i_df.if_lastex = idx;
2011 /* DELTA: A new in-core extent was added in a hole. */ 1879 /* DELTA: A new in-core extent was added in a hole. */
2012 temp2 = new->br_blockcount; 1880 temp2 = new->br_blockcount;
@@ -2030,12 +1898,6 @@ xfs_bmap_add_extent_hole_delay(
2030 } 1898 }
2031 *logflagsp = 0; 1899 *logflagsp = 0;
2032 return 0; 1900 return 0;
2033#undef MASK
2034#undef MASK2
2035#undef STATE_SET
2036#undef STATE_TEST
2037#undef STATE_SET_TEST
2038#undef SWITCH_STATE
2039} 1901}
2040 1902
2041/* 1903/*
@@ -2062,83 +1924,75 @@ xfs_bmap_add_extent_hole_real(
2062 int state; /* state bits, accessed thru macros */ 1924 int state; /* state bits, accessed thru macros */
2063 xfs_filblks_t temp=0; 1925 xfs_filblks_t temp=0;
2064 xfs_filblks_t temp2=0; 1926 xfs_filblks_t temp2=0;
2065 enum { /* bit number definitions for state */
2066 LEFT_CONTIG, RIGHT_CONTIG,
2067 LEFT_DELAY, RIGHT_DELAY,
2068 LEFT_VALID, RIGHT_VALID
2069 };
2070
2071#define MASK(b) (1 << (b))
2072#define MASK2(a,b) (MASK(a) | MASK(b))
2073#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
2074#define STATE_TEST(b) (state & MASK(b))
2075#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \
2076 ((state &= ~MASK(b)), 0))
2077#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
2078 1927
2079 ifp = XFS_IFORK_PTR(ip, whichfork); 1928 ifp = XFS_IFORK_PTR(ip, whichfork);
2080 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 1929 ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
2081 ep = xfs_iext_get_ext(ifp, idx); 1930 ep = xfs_iext_get_ext(ifp, idx);
2082 state = 0; 1931 state = 0;
1932
1933 if (whichfork == XFS_ATTR_FORK)
1934 state |= BMAP_ATTRFORK;
1935
2083 /* 1936 /*
2084 * Check and set flags if this segment has a left neighbor. 1937 * Check and set flags if this segment has a left neighbor.
2085 */ 1938 */
2086 if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { 1939 if (idx > 0) {
1940 state |= BMAP_LEFT_VALID;
2087 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); 1941 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left);
2088 STATE_SET(LEFT_DELAY, isnullstartblock(left.br_startblock)); 1942 if (isnullstartblock(left.br_startblock))
1943 state |= BMAP_LEFT_DELAY;
2089 } 1944 }
1945
2090 /* 1946 /*
2091 * Check and set flags if this segment has a current value. 1947 * Check and set flags if this segment has a current value.
2092 * Not true if we're inserting into the "hole" at eof. 1948 * Not true if we're inserting into the "hole" at eof.
2093 */ 1949 */
2094 if (STATE_SET_TEST(RIGHT_VALID, 1950 if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2095 idx < 1951 state |= BMAP_RIGHT_VALID;
2096 ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
2097 xfs_bmbt_get_all(ep, &right); 1952 xfs_bmbt_get_all(ep, &right);
2098 STATE_SET(RIGHT_DELAY, isnullstartblock(right.br_startblock)); 1953 if (isnullstartblock(right.br_startblock))
1954 state |= BMAP_RIGHT_DELAY;
2099 } 1955 }
1956
2100 /* 1957 /*
2101 * We're inserting a real allocation between "left" and "right". 1958 * We're inserting a real allocation between "left" and "right".
2102 * Set the contiguity flags. Don't let extents get too large. 1959 * Set the contiguity flags. Don't let extents get too large.
2103 */ 1960 */
2104 STATE_SET(LEFT_CONTIG, 1961 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2105 STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && 1962 left.br_startoff + left.br_blockcount == new->br_startoff &&
2106 left.br_startoff + left.br_blockcount == new->br_startoff && 1963 left.br_startblock + left.br_blockcount == new->br_startblock &&
2107 left.br_startblock + left.br_blockcount == new->br_startblock && 1964 left.br_state == new->br_state &&
2108 left.br_state == new->br_state && 1965 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2109 left.br_blockcount + new->br_blockcount <= MAXEXTLEN); 1966 state |= BMAP_LEFT_CONTIG;
2110 STATE_SET(RIGHT_CONTIG, 1967
2111 STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && 1968 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2112 new->br_startoff + new->br_blockcount == right.br_startoff && 1969 new->br_startoff + new->br_blockcount == right.br_startoff &&
2113 new->br_startblock + new->br_blockcount == 1970 new->br_startblock + new->br_blockcount == right.br_startblock &&
2114 right.br_startblock && 1971 new->br_state == right.br_state &&
2115 new->br_state == right.br_state && 1972 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2116 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 1973 (!(state & BMAP_LEFT_CONTIG) ||
2117 (!STATE_TEST(LEFT_CONTIG) || 1974 left.br_blockcount + new->br_blockcount +
2118 left.br_blockcount + new->br_blockcount + 1975 right.br_blockcount <= MAXEXTLEN))
2119 right.br_blockcount <= MAXEXTLEN)); 1976 state |= BMAP_RIGHT_CONTIG;
2120 1977
2121 error = 0; 1978 error = 0;
2122 /* 1979 /*
2123 * Select which case we're in here, and implement it. 1980 * Select which case we're in here, and implement it.
2124 */ 1981 */
2125 switch (SWITCH_STATE) { 1982 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2126 1983 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2127 case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
2128 /* 1984 /*
2129 * New allocation is contiguous with real allocations on the 1985 * New allocation is contiguous with real allocations on the
2130 * left and on the right. 1986 * left and on the right.
2131 * Merge all three into a single extent record. 1987 * Merge all three into a single extent record.
2132 */ 1988 */
2133 XFS_BMAP_TRACE_PRE_UPDATE("LC|RC", ip, idx - 1, 1989 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
2134 whichfork);
2135 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 1990 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2136 left.br_blockcount + new->br_blockcount + 1991 left.br_blockcount + new->br_blockcount +
2137 right.br_blockcount); 1992 right.br_blockcount);
2138 XFS_BMAP_TRACE_POST_UPDATE("LC|RC", ip, idx - 1, 1993 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
2139 whichfork); 1994
2140 XFS_BMAP_TRACE_DELETE("LC|RC", ip, idx, 1, whichfork); 1995 xfs_iext_remove(ip, idx, 1, state);
2141 xfs_iext_remove(ifp, idx, 1);
2142 ifp->if_lastex = idx - 1; 1996 ifp->if_lastex = idx - 1;
2143 XFS_IFORK_NEXT_SET(ip, whichfork, 1997 XFS_IFORK_NEXT_SET(ip, whichfork,
2144 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 1998 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -2173,16 +2027,17 @@ xfs_bmap_add_extent_hole_real(
2173 right.br_blockcount; 2027 right.br_blockcount;
2174 break; 2028 break;
2175 2029
2176 case MASK(LEFT_CONTIG): 2030 case BMAP_LEFT_CONTIG:
2177 /* 2031 /*
2178 * New allocation is contiguous with a real allocation 2032 * New allocation is contiguous with a real allocation
2179 * on the left. 2033 * on the left.
2180 * Merge the new allocation with the left neighbor. 2034 * Merge the new allocation with the left neighbor.
2181 */ 2035 */
2182 XFS_BMAP_TRACE_PRE_UPDATE("LC", ip, idx - 1, whichfork); 2036 trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_);
2183 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), 2037 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1),
2184 left.br_blockcount + new->br_blockcount); 2038 left.br_blockcount + new->br_blockcount);
2185 XFS_BMAP_TRACE_POST_UPDATE("LC", ip, idx - 1, whichfork); 2039 trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
2040
2186 ifp->if_lastex = idx - 1; 2041 ifp->if_lastex = idx - 1;
2187 if (cur == NULL) { 2042 if (cur == NULL) {
2188 rval = xfs_ilog_fext(whichfork); 2043 rval = xfs_ilog_fext(whichfork);
@@ -2207,17 +2062,18 @@ xfs_bmap_add_extent_hole_real(
2207 new->br_blockcount; 2062 new->br_blockcount;
2208 break; 2063 break;
2209 2064
2210 case MASK(RIGHT_CONTIG): 2065 case BMAP_RIGHT_CONTIG:
2211 /* 2066 /*
2212 * New allocation is contiguous with a real allocation 2067 * New allocation is contiguous with a real allocation
2213 * on the right. 2068 * on the right.
2214 * Merge the new allocation with the right neighbor. 2069 * Merge the new allocation with the right neighbor.
2215 */ 2070 */
2216 XFS_BMAP_TRACE_PRE_UPDATE("RC", ip, idx, whichfork); 2071 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
2217 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, 2072 xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
2218 new->br_blockcount + right.br_blockcount, 2073 new->br_blockcount + right.br_blockcount,
2219 right.br_state); 2074 right.br_state);
2220 XFS_BMAP_TRACE_POST_UPDATE("RC", ip, idx, whichfork); 2075 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
2076
2221 ifp->if_lastex = idx; 2077 ifp->if_lastex = idx;
2222 if (cur == NULL) { 2078 if (cur == NULL) {
2223 rval = xfs_ilog_fext(whichfork); 2079 rval = xfs_ilog_fext(whichfork);
@@ -2248,8 +2104,7 @@ xfs_bmap_add_extent_hole_real(
2248 * real allocation. 2104 * real allocation.
2249 * Insert a new entry. 2105 * Insert a new entry.
2250 */ 2106 */
2251 XFS_BMAP_TRACE_INSERT("0", ip, idx, 1, new, NULL, whichfork); 2107 xfs_iext_insert(ip, idx, 1, new, state);
2252 xfs_iext_insert(ifp, idx, 1, new);
2253 ifp->if_lastex = idx; 2108 ifp->if_lastex = idx;
2254 XFS_IFORK_NEXT_SET(ip, whichfork, 2109 XFS_IFORK_NEXT_SET(ip, whichfork,
2255 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2110 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
@@ -2283,12 +2138,6 @@ xfs_bmap_add_extent_hole_real(
2283done: 2138done:
2284 *logflagsp = rval; 2139 *logflagsp = rval;
2285 return error; 2140 return error;
2286#undef MASK
2287#undef MASK2
2288#undef STATE_SET
2289#undef STATE_TEST
2290#undef STATE_SET_TEST
2291#undef SWITCH_STATE
2292} 2141}
2293 2142
2294/* 2143/*
@@ -2701,22 +2550,134 @@ xfs_bmap_rtalloc(
2701} 2550}
2702 2551
2703STATIC int 2552STATIC int
2553xfs_bmap_btalloc_nullfb(
2554 struct xfs_bmalloca *ap,
2555 struct xfs_alloc_arg *args,
2556 xfs_extlen_t *blen)
2557{
2558 struct xfs_mount *mp = ap->ip->i_mount;
2559 struct xfs_perag *pag;
2560 xfs_agnumber_t ag, startag;
2561 int notinit = 0;
2562 int error;
2563
2564 if (ap->userdata && xfs_inode_is_filestream(ap->ip))
2565 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2566 else
2567 args->type = XFS_ALLOCTYPE_START_BNO;
2568 args->total = ap->total;
2569
2570 /*
2571 * Search for an allocation group with a single extent large enough
2572 * for the request. If one isn't found, then adjust the minimum
2573 * allocation size to the largest space found.
2574 */
2575 startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
2576 if (startag == NULLAGNUMBER)
2577 startag = ag = 0;
2578
2579 pag = xfs_perag_get(mp, ag);
2580 while (*blen < ap->alen) {
2581 if (!pag->pagf_init) {
2582 error = xfs_alloc_pagf_init(mp, args->tp, ag,
2583 XFS_ALLOC_FLAG_TRYLOCK);
2584 if (error) {
2585 xfs_perag_put(pag);
2586 return error;
2587 }
2588 }
2589
2590 /*
2591 * See xfs_alloc_fix_freelist...
2592 */
2593 if (pag->pagf_init) {
2594 xfs_extlen_t longest;
2595 longest = xfs_alloc_longest_free_extent(mp, pag);
2596 if (*blen < longest)
2597 *blen = longest;
2598 } else
2599 notinit = 1;
2600
2601 if (xfs_inode_is_filestream(ap->ip)) {
2602 if (*blen >= ap->alen)
2603 break;
2604
2605 if (ap->userdata) {
2606 /*
2607 * If startag is an invalid AG, we've
2608 * come here once before and
2609 * xfs_filestream_new_ag picked the
2610 * best currently available.
2611 *
2612 * Don't continue looping, since we
2613 * could loop forever.
2614 */
2615 if (startag == NULLAGNUMBER)
2616 break;
2617
2618 error = xfs_filestream_new_ag(ap, &ag);
2619 xfs_perag_put(pag);
2620 if (error)
2621 return error;
2622
2623 /* loop again to set 'blen'*/
2624 startag = NULLAGNUMBER;
2625 pag = xfs_perag_get(mp, ag);
2626 continue;
2627 }
2628 }
2629 if (++ag == mp->m_sb.sb_agcount)
2630 ag = 0;
2631 if (ag == startag)
2632 break;
2633 xfs_perag_put(pag);
2634 pag = xfs_perag_get(mp, ag);
2635 }
2636 xfs_perag_put(pag);
2637
2638 /*
2639 * Since the above loop did a BUF_TRYLOCK, it is
2640 * possible that there is space for this request.
2641 */
2642 if (notinit || *blen < ap->minlen)
2643 args->minlen = ap->minlen;
2644 /*
2645 * If the best seen length is less than the request
2646 * length, use the best as the minimum.
2647 */
2648 else if (*blen < ap->alen)
2649 args->minlen = *blen;
2650 /*
2651 * Otherwise we've seen an extent as big as alen,
2652 * use that as the minimum.
2653 */
2654 else
2655 args->minlen = ap->alen;
2656
2657 /*
2658 * set the failure fallback case to look in the selected
2659 * AG as the stream may have moved.
2660 */
2661 if (xfs_inode_is_filestream(ap->ip))
2662 ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
2663
2664 return 0;
2665}
2666
2667STATIC int
2704xfs_bmap_btalloc( 2668xfs_bmap_btalloc(
2705 xfs_bmalloca_t *ap) /* bmap alloc argument struct */ 2669 xfs_bmalloca_t *ap) /* bmap alloc argument struct */
2706{ 2670{
2707 xfs_mount_t *mp; /* mount point structure */ 2671 xfs_mount_t *mp; /* mount point structure */
2708 xfs_alloctype_t atype = 0; /* type for allocation routines */ 2672 xfs_alloctype_t atype = 0; /* type for allocation routines */
2709 xfs_extlen_t align; /* minimum allocation alignment */ 2673 xfs_extlen_t align; /* minimum allocation alignment */
2710 xfs_agnumber_t ag;
2711 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 2674 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
2712 xfs_agnumber_t startag; 2675 xfs_agnumber_t ag;
2713 xfs_alloc_arg_t args; 2676 xfs_alloc_arg_t args;
2714 xfs_extlen_t blen; 2677 xfs_extlen_t blen;
2715 xfs_extlen_t nextminlen = 0; 2678 xfs_extlen_t nextminlen = 0;
2716 xfs_perag_t *pag;
2717 int nullfb; /* true if ap->firstblock isn't set */ 2679 int nullfb; /* true if ap->firstblock isn't set */
2718 int isaligned; 2680 int isaligned;
2719 int notinit;
2720 int tryagain; 2681 int tryagain;
2721 int error; 2682 int error;
2722 2683
@@ -2763,102 +2724,9 @@ xfs_bmap_btalloc(
2763 args.firstblock = ap->firstblock; 2724 args.firstblock = ap->firstblock;
2764 blen = 0; 2725 blen = 0;
2765 if (nullfb) { 2726 if (nullfb) {
2766 if (ap->userdata && xfs_inode_is_filestream(ap->ip)) 2727 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
2767 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2728 if (error)
2768 else 2729 return error;
2769 args.type = XFS_ALLOCTYPE_START_BNO;
2770 args.total = ap->total;
2771
2772 /*
2773 * Search for an allocation group with a single extent
2774 * large enough for the request.
2775 *
2776 * If one isn't found, then adjust the minimum allocation
2777 * size to the largest space found.
2778 */
2779 startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
2780 if (startag == NULLAGNUMBER)
2781 startag = ag = 0;
2782 notinit = 0;
2783 down_read(&mp->m_peraglock);
2784 while (blen < ap->alen) {
2785 pag = &mp->m_perag[ag];
2786 if (!pag->pagf_init &&
2787 (error = xfs_alloc_pagf_init(mp, args.tp,
2788 ag, XFS_ALLOC_FLAG_TRYLOCK))) {
2789 up_read(&mp->m_peraglock);
2790 return error;
2791 }
2792 /*
2793 * See xfs_alloc_fix_freelist...
2794 */
2795 if (pag->pagf_init) {
2796 xfs_extlen_t longest;
2797 longest = xfs_alloc_longest_free_extent(mp, pag);
2798 if (blen < longest)
2799 blen = longest;
2800 } else
2801 notinit = 1;
2802
2803 if (xfs_inode_is_filestream(ap->ip)) {
2804 if (blen >= ap->alen)
2805 break;
2806
2807 if (ap->userdata) {
2808 /*
2809 * If startag is an invalid AG, we've
2810 * come here once before and
2811 * xfs_filestream_new_ag picked the
2812 * best currently available.
2813 *
2814 * Don't continue looping, since we
2815 * could loop forever.
2816 */
2817 if (startag == NULLAGNUMBER)
2818 break;
2819
2820 error = xfs_filestream_new_ag(ap, &ag);
2821 if (error) {
2822 up_read(&mp->m_peraglock);
2823 return error;
2824 }
2825
2826 /* loop again to set 'blen'*/
2827 startag = NULLAGNUMBER;
2828 continue;
2829 }
2830 }
2831 if (++ag == mp->m_sb.sb_agcount)
2832 ag = 0;
2833 if (ag == startag)
2834 break;
2835 }
2836 up_read(&mp->m_peraglock);
2837 /*
2838 * Since the above loop did a BUF_TRYLOCK, it is
2839 * possible that there is space for this request.
2840 */
2841 if (notinit || blen < ap->minlen)
2842 args.minlen = ap->minlen;
2843 /*
2844 * If the best seen length is less than the request
2845 * length, use the best as the minimum.
2846 */
2847 else if (blen < ap->alen)
2848 args.minlen = blen;
2849 /*
2850 * Otherwise we've seen an extent as big as alen,
2851 * use that as the minimum.
2852 */
2853 else
2854 args.minlen = ap->alen;
2855
2856 /*
2857 * set the failure fallback case to look in the selected
2858 * AG as the stream may have moved.
2859 */
2860 if (xfs_inode_is_filestream(ap->ip))
2861 ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
2862 } else if (ap->low) { 2730 } else if (ap->low) {
2863 if (xfs_inode_is_filestream(ap->ip)) 2731 if (xfs_inode_is_filestream(ap->ip))
2864 args.type = XFS_ALLOCTYPE_FIRST_AG; 2732 args.type = XFS_ALLOCTYPE_FIRST_AG;
@@ -3115,8 +2983,13 @@ xfs_bmap_del_extent(
3115 uint qfield; /* quota field to update */ 2983 uint qfield; /* quota field to update */
3116 xfs_filblks_t temp; /* for indirect length calculations */ 2984 xfs_filblks_t temp; /* for indirect length calculations */
3117 xfs_filblks_t temp2; /* for indirect length calculations */ 2985 xfs_filblks_t temp2; /* for indirect length calculations */
2986 int state = 0;
3118 2987
3119 XFS_STATS_INC(xs_del_exlist); 2988 XFS_STATS_INC(xs_del_exlist);
2989
2990 if (whichfork == XFS_ATTR_FORK)
2991 state |= BMAP_ATTRFORK;
2992
3120 mp = ip->i_mount; 2993 mp = ip->i_mount;
3121 ifp = XFS_IFORK_PTR(ip, whichfork); 2994 ifp = XFS_IFORK_PTR(ip, whichfork);
3122 ASSERT((idx >= 0) && (idx < ifp->if_bytes / 2995 ASSERT((idx >= 0) && (idx < ifp->if_bytes /
@@ -3196,8 +3069,8 @@ xfs_bmap_del_extent(
3196 /* 3069 /*
3197 * Matches the whole extent. Delete the entry. 3070 * Matches the whole extent. Delete the entry.
3198 */ 3071 */
3199 XFS_BMAP_TRACE_DELETE("3", ip, idx, 1, whichfork); 3072 xfs_iext_remove(ip, idx, 1,
3200 xfs_iext_remove(ifp, idx, 1); 3073 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
3201 ifp->if_lastex = idx; 3074 ifp->if_lastex = idx;
3202 if (delay) 3075 if (delay)
3203 break; 3076 break;
@@ -3217,7 +3090,7 @@ xfs_bmap_del_extent(
3217 /* 3090 /*
3218 * Deleting the first part of the extent. 3091 * Deleting the first part of the extent.
3219 */ 3092 */
3220 XFS_BMAP_TRACE_PRE_UPDATE("2", ip, idx, whichfork); 3093 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
3221 xfs_bmbt_set_startoff(ep, del_endoff); 3094 xfs_bmbt_set_startoff(ep, del_endoff);
3222 temp = got.br_blockcount - del->br_blockcount; 3095 temp = got.br_blockcount - del->br_blockcount;
3223 xfs_bmbt_set_blockcount(ep, temp); 3096 xfs_bmbt_set_blockcount(ep, temp);
@@ -3226,13 +3099,12 @@ xfs_bmap_del_extent(
3226 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3099 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3227 da_old); 3100 da_old);
3228 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 3101 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
3229 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, 3102 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3230 whichfork);
3231 da_new = temp; 3103 da_new = temp;
3232 break; 3104 break;
3233 } 3105 }
3234 xfs_bmbt_set_startblock(ep, del_endblock); 3106 xfs_bmbt_set_startblock(ep, del_endblock);
3235 XFS_BMAP_TRACE_POST_UPDATE("2", ip, idx, whichfork); 3107 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3236 if (!cur) { 3108 if (!cur) {
3237 flags |= xfs_ilog_fext(whichfork); 3109 flags |= xfs_ilog_fext(whichfork);
3238 break; 3110 break;
@@ -3248,19 +3120,18 @@ xfs_bmap_del_extent(
3248 * Deleting the last part of the extent. 3120 * Deleting the last part of the extent.
3249 */ 3121 */
3250 temp = got.br_blockcount - del->br_blockcount; 3122 temp = got.br_blockcount - del->br_blockcount;
3251 XFS_BMAP_TRACE_PRE_UPDATE("1", ip, idx, whichfork); 3123 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
3252 xfs_bmbt_set_blockcount(ep, temp); 3124 xfs_bmbt_set_blockcount(ep, temp);
3253 ifp->if_lastex = idx; 3125 ifp->if_lastex = idx;
3254 if (delay) { 3126 if (delay) {
3255 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 3127 temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
3256 da_old); 3128 da_old);
3257 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); 3129 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
3258 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, 3130 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3259 whichfork);
3260 da_new = temp; 3131 da_new = temp;
3261 break; 3132 break;
3262 } 3133 }
3263 XFS_BMAP_TRACE_POST_UPDATE("1", ip, idx, whichfork); 3134 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3264 if (!cur) { 3135 if (!cur) {
3265 flags |= xfs_ilog_fext(whichfork); 3136 flags |= xfs_ilog_fext(whichfork);
3266 break; 3137 break;
@@ -3277,7 +3148,7 @@ xfs_bmap_del_extent(
3277 * Deleting the middle of the extent. 3148 * Deleting the middle of the extent.
3278 */ 3149 */
3279 temp = del->br_startoff - got.br_startoff; 3150 temp = del->br_startoff - got.br_startoff;
3280 XFS_BMAP_TRACE_PRE_UPDATE("0", ip, idx, whichfork); 3151 trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_);
3281 xfs_bmbt_set_blockcount(ep, temp); 3152 xfs_bmbt_set_blockcount(ep, temp);
3282 new.br_startoff = del_endoff; 3153 new.br_startoff = del_endoff;
3283 temp2 = got_endoff - del_endoff; 3154 temp2 = got_endoff - del_endoff;
@@ -3364,10 +3235,8 @@ xfs_bmap_del_extent(
3364 } 3235 }
3365 } 3236 }
3366 } 3237 }
3367 XFS_BMAP_TRACE_POST_UPDATE("0", ip, idx, whichfork); 3238 trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
3368 XFS_BMAP_TRACE_INSERT("0", ip, idx + 1, 1, &new, NULL, 3239 xfs_iext_insert(ip, idx + 1, 1, &new, state);
3369 whichfork);
3370 xfs_iext_insert(ifp, idx + 1, 1, &new);
3371 ifp->if_lastex = idx + 1; 3240 ifp->if_lastex = idx + 1;
3372 break; 3241 break;
3373 } 3242 }
@@ -3687,7 +3556,9 @@ xfs_bmap_local_to_extents(
3687 xfs_iext_add(ifp, 0, 1); 3556 xfs_iext_add(ifp, 0, 1);
3688 ep = xfs_iext_get_ext(ifp, 0); 3557 ep = xfs_iext_get_ext(ifp, 0);
3689 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); 3558 xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3690 XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork); 3559 trace_xfs_bmap_post_update(ip, 0,
3560 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
3561 _THIS_IP_);
3691 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 3562 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
3692 ip->i_d.di_nblocks = 1; 3563 ip->i_d.di_nblocks = 1;
3693 xfs_trans_mod_dquot_byino(tp, ip, 3564 xfs_trans_mod_dquot_byino(tp, ip,
@@ -3800,158 +3671,6 @@ xfs_bmap_search_extents(
3800 return ep; 3671 return ep;
3801} 3672}
3802 3673
3803
3804#ifdef XFS_BMAP_TRACE
3805ktrace_t *xfs_bmap_trace_buf;
3806
3807/*
3808 * Add a bmap trace buffer entry. Base routine for the others.
3809 */
3810STATIC void
3811xfs_bmap_trace_addentry(
3812 int opcode, /* operation */
3813 const char *fname, /* function name */
3814 char *desc, /* operation description */
3815 xfs_inode_t *ip, /* incore inode pointer */
3816 xfs_extnum_t idx, /* index of entry(ies) */
3817 xfs_extnum_t cnt, /* count of entries, 1 or 2 */
3818 xfs_bmbt_rec_host_t *r1, /* first record */
3819 xfs_bmbt_rec_host_t *r2, /* second record or null */
3820 int whichfork) /* data or attr fork */
3821{
3822 xfs_bmbt_rec_host_t tr2;
3823
3824 ASSERT(cnt == 1 || cnt == 2);
3825 ASSERT(r1 != NULL);
3826 if (cnt == 1) {
3827 ASSERT(r2 == NULL);
3828 r2 = &tr2;
3829 memset(&tr2, 0, sizeof(tr2));
3830 } else
3831 ASSERT(r2 != NULL);
3832 ktrace_enter(xfs_bmap_trace_buf,
3833 (void *)(__psint_t)(opcode | (whichfork << 16)),
3834 (void *)fname, (void *)desc, (void *)ip,
3835 (void *)(__psint_t)idx,
3836 (void *)(__psint_t)cnt,
3837 (void *)(__psunsigned_t)(ip->i_ino >> 32),
3838 (void *)(__psunsigned_t)(unsigned)ip->i_ino,
3839 (void *)(__psunsigned_t)(r1->l0 >> 32),
3840 (void *)(__psunsigned_t)(unsigned)(r1->l0),
3841 (void *)(__psunsigned_t)(r1->l1 >> 32),
3842 (void *)(__psunsigned_t)(unsigned)(r1->l1),
3843 (void *)(__psunsigned_t)(r2->l0 >> 32),
3844 (void *)(__psunsigned_t)(unsigned)(r2->l0),
3845 (void *)(__psunsigned_t)(r2->l1 >> 32),
3846 (void *)(__psunsigned_t)(unsigned)(r2->l1)
3847 );
3848 ASSERT(ip->i_xtrace);
3849 ktrace_enter(ip->i_xtrace,
3850 (void *)(__psint_t)(opcode | (whichfork << 16)),
3851 (void *)fname, (void *)desc, (void *)ip,
3852 (void *)(__psint_t)idx,
3853 (void *)(__psint_t)cnt,
3854 (void *)(__psunsigned_t)(ip->i_ino >> 32),
3855 (void *)(__psunsigned_t)(unsigned)ip->i_ino,
3856 (void *)(__psunsigned_t)(r1->l0 >> 32),
3857 (void *)(__psunsigned_t)(unsigned)(r1->l0),
3858 (void *)(__psunsigned_t)(r1->l1 >> 32),
3859 (void *)(__psunsigned_t)(unsigned)(r1->l1),
3860 (void *)(__psunsigned_t)(r2->l0 >> 32),
3861 (void *)(__psunsigned_t)(unsigned)(r2->l0),
3862 (void *)(__psunsigned_t)(r2->l1 >> 32),
3863 (void *)(__psunsigned_t)(unsigned)(r2->l1)
3864 );
3865}
3866
3867/*
3868 * Add bmap trace entry prior to a call to xfs_iext_remove.
3869 */
3870STATIC void
3871xfs_bmap_trace_delete(
3872 const char *fname, /* function name */
3873 char *desc, /* operation description */
3874 xfs_inode_t *ip, /* incore inode pointer */
3875 xfs_extnum_t idx, /* index of entry(entries) deleted */
3876 xfs_extnum_t cnt, /* count of entries deleted, 1 or 2 */
3877 int whichfork) /* data or attr fork */
3878{
3879 xfs_ifork_t *ifp; /* inode fork pointer */
3880
3881 ifp = XFS_IFORK_PTR(ip, whichfork);
3882 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_DELETE, fname, desc, ip, idx,
3883 cnt, xfs_iext_get_ext(ifp, idx),
3884 cnt == 2 ? xfs_iext_get_ext(ifp, idx + 1) : NULL,
3885 whichfork);
3886}
3887
3888/*
3889 * Add bmap trace entry prior to a call to xfs_iext_insert, or
3890 * reading in the extents list from the disk (in the btree).
3891 */
3892STATIC void
3893xfs_bmap_trace_insert(
3894 const char *fname, /* function name */
3895 char *desc, /* operation description */
3896 xfs_inode_t *ip, /* incore inode pointer */
3897 xfs_extnum_t idx, /* index of entry(entries) inserted */
3898 xfs_extnum_t cnt, /* count of entries inserted, 1 or 2 */
3899 xfs_bmbt_irec_t *r1, /* inserted record 1 */
3900 xfs_bmbt_irec_t *r2, /* inserted record 2 or null */
3901 int whichfork) /* data or attr fork */
3902{
3903 xfs_bmbt_rec_host_t tr1; /* compressed record 1 */
3904 xfs_bmbt_rec_host_t tr2; /* compressed record 2 if needed */
3905
3906 xfs_bmbt_set_all(&tr1, r1);
3907 if (cnt == 2) {
3908 ASSERT(r2 != NULL);
3909 xfs_bmbt_set_all(&tr2, r2);
3910 } else {
3911 ASSERT(cnt == 1);
3912 ASSERT(r2 == NULL);
3913 }
3914 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_INSERT, fname, desc, ip, idx,
3915 cnt, &tr1, cnt == 2 ? &tr2 : NULL, whichfork);
3916}
3917
3918/*
3919 * Add bmap trace entry after updating an extent record in place.
3920 */
3921STATIC void
3922xfs_bmap_trace_post_update(
3923 const char *fname, /* function name */
3924 char *desc, /* operation description */
3925 xfs_inode_t *ip, /* incore inode pointer */
3926 xfs_extnum_t idx, /* index of entry updated */
3927 int whichfork) /* data or attr fork */
3928{
3929 xfs_ifork_t *ifp; /* inode fork pointer */
3930
3931 ifp = XFS_IFORK_PTR(ip, whichfork);
3932 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_POST_UP, fname, desc, ip, idx,
3933 1, xfs_iext_get_ext(ifp, idx), NULL, whichfork);
3934}
3935
3936/*
3937 * Add bmap trace entry prior to updating an extent record in place.
3938 */
3939STATIC void
3940xfs_bmap_trace_pre_update(
3941 const char *fname, /* function name */
3942 char *desc, /* operation description */
3943 xfs_inode_t *ip, /* incore inode pointer */
3944 xfs_extnum_t idx, /* index of entry to be updated */
3945 int whichfork) /* data or attr fork */
3946{
3947 xfs_ifork_t *ifp; /* inode fork pointer */
3948
3949 ifp = XFS_IFORK_PTR(ip, whichfork);
3950 xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_PRE_UP, fname, desc, ip, idx, 1,
3951 xfs_iext_get_ext(ifp, idx), NULL, whichfork);
3952}
3953#endif /* XFS_BMAP_TRACE */
3954
3955/* 3674/*
3956 * Compute the worst-case number of indirect blocks that will be used 3675 * Compute the worst-case number of indirect blocks that will be used
3957 * for ip's delayed extent of length "len". 3676 * for ip's delayed extent of length "len".
@@ -3983,37 +3702,6 @@ xfs_bmap_worst_indlen(
3983 return rval; 3702 return rval;
3984} 3703}
3985 3704
3986#if defined(XFS_RW_TRACE)
3987STATIC void
3988xfs_bunmap_trace(
3989 xfs_inode_t *ip,
3990 xfs_fileoff_t bno,
3991 xfs_filblks_t len,
3992 int flags,
3993 inst_t *ra)
3994{
3995 if (ip->i_rwtrace == NULL)
3996 return;
3997 ktrace_enter(ip->i_rwtrace,
3998 (void *)(__psint_t)XFS_BUNMAP,
3999 (void *)ip,
4000 (void *)(__psint_t)((ip->i_d.di_size >> 32) & 0xffffffff),
4001 (void *)(__psint_t)(ip->i_d.di_size & 0xffffffff),
4002 (void *)(__psint_t)(((xfs_dfiloff_t)bno >> 32) & 0xffffffff),
4003 (void *)(__psint_t)((xfs_dfiloff_t)bno & 0xffffffff),
4004 (void *)(__psint_t)len,
4005 (void *)(__psint_t)flags,
4006 (void *)(unsigned long)current_cpu(),
4007 (void *)ra,
4008 (void *)0,
4009 (void *)0,
4010 (void *)0,
4011 (void *)0,
4012 (void *)0,
4013 (void *)0);
4014}
4015#endif
4016
4017/* 3705/*
4018 * Convert inode from non-attributed to attributed. 3706 * Convert inode from non-attributed to attributed.
4019 * Must not be in a transaction, ip must not be locked. 3707 * Must not be in a transaction, ip must not be locked.
@@ -4702,34 +4390,30 @@ error0:
4702 return XFS_ERROR(EFSCORRUPTED); 4390 return XFS_ERROR(EFSCORRUPTED);
4703} 4391}
4704 4392
4705#ifdef XFS_BMAP_TRACE 4393#ifdef DEBUG
4706/* 4394/*
4707 * Add bmap trace insert entries for all the contents of the extent records. 4395 * Add bmap trace insert entries for all the contents of the extent records.
4708 */ 4396 */
4709void 4397void
4710xfs_bmap_trace_exlist( 4398xfs_bmap_trace_exlist(
4711 const char *fname, /* function name */
4712 xfs_inode_t *ip, /* incore inode pointer */ 4399 xfs_inode_t *ip, /* incore inode pointer */
4713 xfs_extnum_t cnt, /* count of entries in the list */ 4400 xfs_extnum_t cnt, /* count of entries in the list */
4714 int whichfork) /* data or attr fork */ 4401 int whichfork, /* data or attr fork */
4402 unsigned long caller_ip)
4715{ 4403{
4716 xfs_bmbt_rec_host_t *ep; /* current extent record */
4717 xfs_extnum_t idx; /* extent record index */ 4404 xfs_extnum_t idx; /* extent record index */
4718 xfs_ifork_t *ifp; /* inode fork pointer */ 4405 xfs_ifork_t *ifp; /* inode fork pointer */
4719 xfs_bmbt_irec_t s; /* file extent record */ 4406 int state = 0;
4407
4408 if (whichfork == XFS_ATTR_FORK)
4409 state |= BMAP_ATTRFORK;
4720 4410
4721 ifp = XFS_IFORK_PTR(ip, whichfork); 4411 ifp = XFS_IFORK_PTR(ip, whichfork);
4722 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 4412 ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4723 for (idx = 0; idx < cnt; idx++) { 4413 for (idx = 0; idx < cnt; idx++)
4724 ep = xfs_iext_get_ext(ifp, idx); 4414 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
4725 xfs_bmbt_get_all(ep, &s);
4726 XFS_BMAP_TRACE_INSERT("exlist", ip, idx, 1, &s, NULL,
4727 whichfork);
4728 }
4729} 4415}
4730#endif
4731 4416
4732#ifdef DEBUG
4733/* 4417/*
4734 * Validate that the bmbt_irecs being returned from bmapi are valid 4418 * Validate that the bmbt_irecs being returned from bmapi are valid
4735 * given the callers original parameters. Specifically check the 4419 * given the callers original parameters. Specifically check the
@@ -4805,7 +4489,7 @@ xfs_bmapi(
4805 xfs_fsblock_t abno; /* allocated block number */ 4489 xfs_fsblock_t abno; /* allocated block number */
4806 xfs_extlen_t alen; /* allocated extent length */ 4490 xfs_extlen_t alen; /* allocated extent length */
4807 xfs_fileoff_t aoff; /* allocated file offset */ 4491 xfs_fileoff_t aoff; /* allocated file offset */
4808 xfs_bmalloca_t bma; /* args for xfs_bmap_alloc */ 4492 xfs_bmalloca_t bma = { 0 }; /* args for xfs_bmap_alloc */
4809 xfs_btree_cur_t *cur; /* bmap btree cursor */ 4493 xfs_btree_cur_t *cur; /* bmap btree cursor */
4810 xfs_fileoff_t end; /* end of mapped file region */ 4494 xfs_fileoff_t end; /* end of mapped file region */
4811 int eof; /* we've hit the end of extents */ 4495 int eof; /* we've hit the end of extents */
@@ -5478,7 +5162,8 @@ xfs_bunmapi(
5478 int rsvd; /* OK to allocate reserved blocks */ 5162 int rsvd; /* OK to allocate reserved blocks */
5479 xfs_fsblock_t sum; 5163 xfs_fsblock_t sum;
5480 5164
5481 xfs_bunmap_trace(ip, bno, len, flags, (inst_t *)__return_address); 5165 trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5166
5482 whichfork = (flags & XFS_BMAPI_ATTRFORK) ? 5167 whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5483 XFS_ATTR_FORK : XFS_DATA_FORK; 5168 XFS_ATTR_FORK : XFS_DATA_FORK;
5484 ifp = XFS_IFORK_PTR(ip, whichfork); 5169 ifp = XFS_IFORK_PTR(ip, whichfork);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 56f62d2edc35..419dafb9d87d 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -95,6 +95,21 @@ typedef struct xfs_bmap_free
95 /* need write cache flushing and no */ 95 /* need write cache flushing and no */
96 /* additional allocation alignments */ 96 /* additional allocation alignments */
97 97
98#define XFS_BMAPI_FLAGS \
99 { XFS_BMAPI_WRITE, "WRITE" }, \
100 { XFS_BMAPI_DELAY, "DELAY" }, \
101 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
102 { XFS_BMAPI_METADATA, "METADATA" }, \
103 { XFS_BMAPI_EXACT, "EXACT" }, \
104 { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
105 { XFS_BMAPI_ASYNC, "ASYNC" }, \
106 { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \
107 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
108 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
109 { XFS_BMAPI_CONTIG, "CONTIG" }, \
110 { XFS_BMAPI_CONVERT, "CONVERT" }
111
112
98static inline int xfs_bmapi_aflag(int w) 113static inline int xfs_bmapi_aflag(int w)
99{ 114{
100 return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0); 115 return (w == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0);
@@ -135,36 +150,43 @@ typedef struct xfs_bmalloca {
135 char conv; /* overwriting unwritten extents */ 150 char conv; /* overwriting unwritten extents */
136} xfs_bmalloca_t; 151} xfs_bmalloca_t;
137 152
138#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE)
139/* 153/*
140 * Trace operations for bmap extent tracing 154 * Flags for xfs_bmap_add_extent*.
141 */ 155 */
142#define XFS_BMAP_KTRACE_DELETE 1 156#define BMAP_LEFT_CONTIG (1 << 0)
143#define XFS_BMAP_KTRACE_INSERT 2 157#define BMAP_RIGHT_CONTIG (1 << 1)
144#define XFS_BMAP_KTRACE_PRE_UP 3 158#define BMAP_LEFT_FILLING (1 << 2)
145#define XFS_BMAP_KTRACE_POST_UP 4 159#define BMAP_RIGHT_FILLING (1 << 3)
146 160#define BMAP_LEFT_DELAY (1 << 4)
147#define XFS_BMAP_TRACE_SIZE 4096 /* size of global trace buffer */ 161#define BMAP_RIGHT_DELAY (1 << 5)
148#define XFS_BMAP_KTRACE_SIZE 32 /* size of per-inode trace buffer */ 162#define BMAP_LEFT_VALID (1 << 6)
149extern ktrace_t *xfs_bmap_trace_buf; 163#define BMAP_RIGHT_VALID (1 << 7)
164#define BMAP_ATTRFORK (1 << 8)
165
166#define XFS_BMAP_EXT_FLAGS \
167 { BMAP_LEFT_CONTIG, "LC" }, \
168 { BMAP_RIGHT_CONTIG, "RC" }, \
169 { BMAP_LEFT_FILLING, "LF" }, \
170 { BMAP_RIGHT_FILLING, "RF" }, \
171 { BMAP_ATTRFORK, "ATTR" }
150 172
151/* 173/*
152 * Add bmap trace insert entries for all the contents of the extent list. 174 * Add bmap trace insert entries for all the contents of the extent list.
175 *
176 * Quite excessive tracing. Only do this for debug builds.
153 */ 177 */
178#if defined(__KERNEL) && defined(DEBUG)
154void 179void
155xfs_bmap_trace_exlist( 180xfs_bmap_trace_exlist(
156 const char *fname, /* function name */
157 struct xfs_inode *ip, /* incore inode pointer */ 181 struct xfs_inode *ip, /* incore inode pointer */
158 xfs_extnum_t cnt, /* count of entries in list */ 182 xfs_extnum_t cnt, /* count of entries in list */
159 int whichfork); /* data or attr fork */ 183 int whichfork,
184 unsigned long caller_ip); /* data or attr fork */
160#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ 185#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
161 xfs_bmap_trace_exlist(__func__,ip,c,w) 186 xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
162 187#else
163#else /* __KERNEL__ && XFS_BMAP_TRACE */
164
165#define XFS_BMAP_TRACE_EXLIST(ip,c,w) 188#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
166 189#endif
167#endif /* __KERNEL__ && XFS_BMAP_TRACE */
168 190
169/* 191/*
170 * Convert inode from non-attributed to attributed. 192 * Convert inode from non-attributed to attributed.
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index eb7b702d0690..416e47e54b83 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -98,8 +98,7 @@ xfs_bmdr_to_bmbt(
98 * This code must be in sync with the routines xfs_bmbt_get_startoff, 98 * This code must be in sync with the routines xfs_bmbt_get_startoff,
99 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. 99 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
100 */ 100 */
101 101STATIC void
102STATIC_INLINE void
103__xfs_bmbt_get_all( 102__xfs_bmbt_get_all(
104 __uint64_t l0, 103 __uint64_t l0,
105 __uint64_t l1, 104 __uint64_t l1,
@@ -335,7 +334,7 @@ xfs_bmbt_disk_set_allf(
335/* 334/*
336 * Set all the fields in a bmap extent record from the uncompressed form. 335 * Set all the fields in a bmap extent record from the uncompressed form.
337 */ 336 */
338void 337STATIC void
339xfs_bmbt_disk_set_all( 338xfs_bmbt_disk_set_all(
340 xfs_bmbt_rec_t *r, 339 xfs_bmbt_rec_t *r,
341 xfs_bmbt_irec_t *s) 340 xfs_bmbt_irec_t *s)
@@ -769,12 +768,6 @@ xfs_bmbt_trace_enter(
769 (void *)a0, (void *)a1, (void *)a2, (void *)a3, 768 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
770 (void *)a4, (void *)a5, (void *)a6, (void *)a7, 769 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
771 (void *)a8, (void *)a9, (void *)a10); 770 (void *)a8, (void *)a9, (void *)a10);
772 ktrace_enter(ip->i_btrace,
773 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
774 (void *)func, (void *)s, (void *)ip, (void *)cur,
775 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
776 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
777 (void *)a8, (void *)a9, (void *)a10);
778} 771}
779 772
780STATIC void 773STATIC void
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 5549d495947f..0e66c4ea0f85 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -46,20 +46,12 @@ typedef struct xfs_bmdr_block {
46#define BMBT_STARTBLOCK_BITLEN 52 46#define BMBT_STARTBLOCK_BITLEN 52
47#define BMBT_BLOCKCOUNT_BITLEN 21 47#define BMBT_BLOCKCOUNT_BITLEN 21
48 48
49 49typedef struct xfs_bmbt_rec {
50#define BMBT_USE_64 1
51
52typedef struct xfs_bmbt_rec_32
53{
54 __uint32_t l0, l1, l2, l3;
55} xfs_bmbt_rec_32_t;
56typedef struct xfs_bmbt_rec_64
57{
58 __be64 l0, l1; 50 __be64 l0, l1;
59} xfs_bmbt_rec_64_t; 51} xfs_bmbt_rec_t;
60 52
61typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ 53typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
62typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t; 54typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
63 55
64typedef struct xfs_bmbt_rec_host { 56typedef struct xfs_bmbt_rec_host {
65 __uint64_t l0, l1; 57 __uint64_t l0, l1;
@@ -231,7 +223,6 @@ extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v);
231extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v); 223extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v);
232extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v); 224extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v);
233 225
234extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
235extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, 226extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
236 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 227 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
237 228
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 52b5f14d0c32..96be4b0f2496 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -39,6 +39,7 @@
39#include "xfs_btree_trace.h" 39#include "xfs_btree_trace.h"
40#include "xfs_ialloc.h" 40#include "xfs_ialloc.h"
41#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_trace.h"
42 43
43/* 44/*
44 * Cursor allocation zone. 45 * Cursor allocation zone.
@@ -81,7 +82,7 @@ xfs_btree_check_lblock(
81 XFS_ERRTAG_BTREE_CHECK_LBLOCK, 82 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
82 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 83 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
83 if (bp) 84 if (bp)
84 xfs_buftrace("LBTREE ERROR", bp); 85 trace_xfs_btree_corrupt(bp, _RET_IP_);
85 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, 86 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
86 mp); 87 mp);
87 return XFS_ERROR(EFSCORRUPTED); 88 return XFS_ERROR(EFSCORRUPTED);
@@ -119,7 +120,7 @@ xfs_btree_check_sblock(
119 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 120 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 121 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 if (bp) 122 if (bp)
122 xfs_buftrace("SBTREE ERROR", bp); 123 trace_xfs_btree_corrupt(bp, _RET_IP_);
123 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", 124 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124 XFS_ERRLEVEL_LOW, cur->bc_mp, block); 125 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125 return XFS_ERROR(EFSCORRUPTED); 126 return XFS_ERROR(EFSCORRUPTED);
@@ -976,7 +977,7 @@ xfs_btree_get_buf_block(
976 xfs_daddr_t d; 977 xfs_daddr_t d;
977 978
978 /* need to sort out how callers deal with failures first */ 979 /* need to sort out how callers deal with failures first */
979 ASSERT(!(flags & XFS_BUF_TRYLOCK)); 980 ASSERT(!(flags & XBF_TRYLOCK));
980 981
981 d = xfs_btree_ptr_to_daddr(cur, ptr); 982 d = xfs_btree_ptr_to_daddr(cur, ptr);
982 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 983 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
@@ -1007,7 +1008,7 @@ xfs_btree_read_buf_block(
1007 int error; 1008 int error;
1008 1009
1009 /* need to sort out how callers deal with failures first */ 1010 /* need to sort out how callers deal with failures first */
1010 ASSERT(!(flags & XFS_BUF_TRYLOCK)); 1011 ASSERT(!(flags & XBF_TRYLOCK));
1011 1012
1012 d = xfs_btree_ptr_to_daddr(cur, ptr); 1013 d = xfs_btree_ptr_to_daddr(cur, ptr);
1013 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1014 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
index b3f5eb3c3c6c..2d8a309873ea 100644
--- a/fs/xfs/xfs_btree_trace.h
+++ b/fs/xfs/xfs_btree_trace.h
@@ -58,8 +58,6 @@ void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
58 struct xfs_buf *, int, int); 58 struct xfs_buf *, int, int);
59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, 59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
60 struct xfs_buf *, int, int, int); 60 struct xfs_buf *, int, int, int);
61void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *,
62 xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int);
63void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); 61void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
64void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, 62void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
65 union xfs_btree_ptr, union xfs_btree_key *, int); 63 union xfs_btree_ptr, union xfs_btree_key *, int);
@@ -71,24 +69,10 @@ void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
71 union xfs_btree_rec *, int); 69 union xfs_btree_rec *, int);
72void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); 70void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
73 71
74
75#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */
76extern ktrace_t *xfs_allocbt_trace_buf;
77
78#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */
79extern ktrace_t *xfs_inobt_trace_buf;
80
81#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
82#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
83extern ktrace_t *xfs_bmbt_trace_buf;
84
85
86#define XFS_BTREE_TRACE_ARGBI(c, b, i) \ 72#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
87 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) 73 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
88#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ 74#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
89 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) 75 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
90#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \
91 xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
92#define XFS_BTREE_TRACE_ARGI(c, i) \ 76#define XFS_BTREE_TRACE_ARGI(c, i) \
93 xfs_btree_trace_argi(__func__, c, i, __LINE__) 77 xfs_btree_trace_argi(__func__, c, i, __LINE__)
94#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ 78#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
@@ -104,7 +88,6 @@ extern ktrace_t *xfs_bmbt_trace_buf;
104#else 88#else
105#define XFS_BTREE_TRACE_ARGBI(c, b, i) 89#define XFS_BTREE_TRACE_ARGBI(c, b, i)
106#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) 90#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
107#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j)
108#define XFS_BTREE_TRACE_ARGI(c, i) 91#define XFS_BTREE_TRACE_ARGI(c, i)
109#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) 92#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
110#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) 93#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 92af4098c7e8..f3c49e69eab9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -29,6 +29,7 @@
29#include "xfs_buf_item.h" 29#include "xfs_buf_item.h"
30#include "xfs_trans_priv.h" 30#include "xfs_trans_priv.h"
31#include "xfs_error.h" 31#include "xfs_error.h"
32#include "xfs_trace.h"
32 33
33 34
34kmem_zone_t *xfs_buf_item_zone; 35kmem_zone_t *xfs_buf_item_zone;
@@ -164,7 +165,7 @@ xfs_buf_item_size(
164 * is the buf log format structure with the 165 * is the buf log format structure with the
165 * cancel flag in it. 166 * cancel flag in it.
166 */ 167 */
167 xfs_buf_item_trace("SIZE STALE", bip); 168 trace_xfs_buf_item_size_stale(bip);
168 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 169 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
169 return 1; 170 return 1;
170 } 171 }
@@ -206,7 +207,7 @@ xfs_buf_item_size(
206 } 207 }
207 } 208 }
208 209
209 xfs_buf_item_trace("SIZE NORM", bip); 210 trace_xfs_buf_item_size(bip);
210 return nvecs; 211 return nvecs;
211} 212}
212 213
@@ -249,7 +250,7 @@ xfs_buf_item_format(
249 ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); 250 ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
250 vecp->i_addr = (xfs_caddr_t)&bip->bli_format; 251 vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
251 vecp->i_len = base_size; 252 vecp->i_len = base_size;
252 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); 253 vecp->i_type = XLOG_REG_TYPE_BFORMAT;
253 vecp++; 254 vecp++;
254 nvecs = 1; 255 nvecs = 1;
255 256
@@ -259,7 +260,7 @@ xfs_buf_item_format(
259 * is the buf log format structure with the 260 * is the buf log format structure with the
260 * cancel flag in it. 261 * cancel flag in it.
261 */ 262 */
262 xfs_buf_item_trace("FORMAT STALE", bip); 263 trace_xfs_buf_item_format_stale(bip);
263 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 264 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
264 bip->bli_format.blf_size = nvecs; 265 bip->bli_format.blf_size = nvecs;
265 return; 266 return;
@@ -296,14 +297,14 @@ xfs_buf_item_format(
296 buffer_offset = first_bit * XFS_BLI_CHUNK; 297 buffer_offset = first_bit * XFS_BLI_CHUNK;
297 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 298 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
298 vecp->i_len = nbits * XFS_BLI_CHUNK; 299 vecp->i_len = nbits * XFS_BLI_CHUNK;
299 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); 300 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
300 nvecs++; 301 nvecs++;
301 break; 302 break;
302 } else if (next_bit != last_bit + 1) { 303 } else if (next_bit != last_bit + 1) {
303 buffer_offset = first_bit * XFS_BLI_CHUNK; 304 buffer_offset = first_bit * XFS_BLI_CHUNK;
304 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 305 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
305 vecp->i_len = nbits * XFS_BLI_CHUNK; 306 vecp->i_len = nbits * XFS_BLI_CHUNK;
306 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); 307 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
307 nvecs++; 308 nvecs++;
308 vecp++; 309 vecp++;
309 first_bit = next_bit; 310 first_bit = next_bit;
@@ -315,7 +316,7 @@ xfs_buf_item_format(
315 buffer_offset = first_bit * XFS_BLI_CHUNK; 316 buffer_offset = first_bit * XFS_BLI_CHUNK;
316 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 317 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
317 vecp->i_len = nbits * XFS_BLI_CHUNK; 318 vecp->i_len = nbits * XFS_BLI_CHUNK;
318 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); 319 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
319/* You would think we need to bump the nvecs here too, but we do not 320/* You would think we need to bump the nvecs here too, but we do not
320 * this number is used by recovery, and it gets confused by the boundary 321 * this number is used by recovery, and it gets confused by the boundary
321 * split here 322 * split here
@@ -335,7 +336,7 @@ xfs_buf_item_format(
335 /* 336 /*
336 * Check to make sure everything is consistent. 337 * Check to make sure everything is consistent.
337 */ 338 */
338 xfs_buf_item_trace("FORMAT NORM", bip); 339 trace_xfs_buf_item_format(bip);
339 xfs_buf_item_log_check(bip); 340 xfs_buf_item_log_check(bip);
340} 341}
341 342
@@ -355,8 +356,7 @@ xfs_buf_item_pin(
355 ASSERT(atomic_read(&bip->bli_refcount) > 0); 356 ASSERT(atomic_read(&bip->bli_refcount) > 0);
356 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 357 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
357 (bip->bli_flags & XFS_BLI_STALE)); 358 (bip->bli_flags & XFS_BLI_STALE));
358 xfs_buf_item_trace("PIN", bip); 359 trace_xfs_buf_item_pin(bip);
359 xfs_buftrace("XFS_PIN", bp);
360 xfs_bpin(bp); 360 xfs_bpin(bp);
361} 361}
362 362
@@ -383,8 +383,7 @@ xfs_buf_item_unpin(
383 ASSERT(bp != NULL); 383 ASSERT(bp != NULL);
384 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); 384 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
385 ASSERT(atomic_read(&bip->bli_refcount) > 0); 385 ASSERT(atomic_read(&bip->bli_refcount) > 0);
386 xfs_buf_item_trace("UNPIN", bip); 386 trace_xfs_buf_item_unpin(bip);
387 xfs_buftrace("XFS_UNPIN", bp);
388 387
389 freed = atomic_dec_and_test(&bip->bli_refcount); 388 freed = atomic_dec_and_test(&bip->bli_refcount);
390 ailp = bip->bli_item.li_ailp; 389 ailp = bip->bli_item.li_ailp;
@@ -395,8 +394,8 @@ xfs_buf_item_unpin(
395 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 394 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
396 ASSERT(XFS_BUF_ISSTALE(bp)); 395 ASSERT(XFS_BUF_ISSTALE(bp));
397 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 396 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
398 xfs_buf_item_trace("UNPIN STALE", bip); 397 trace_xfs_buf_item_unpin_stale(bip);
399 xfs_buftrace("XFS_UNPIN STALE", bp); 398
400 /* 399 /*
401 * If we get called here because of an IO error, we may 400 * If we get called here because of an IO error, we may
402 * or may not have the item on the AIL. xfs_trans_ail_delete() 401 * or may not have the item on the AIL. xfs_trans_ail_delete()
@@ -440,8 +439,8 @@ xfs_buf_item_unpin_remove(
440 if ((atomic_read(&bip->bli_refcount) == 1) && 439 if ((atomic_read(&bip->bli_refcount) == 1) &&
441 (bip->bli_flags & XFS_BLI_STALE)) { 440 (bip->bli_flags & XFS_BLI_STALE)) {
442 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); 441 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
443 xfs_buf_item_trace("UNPIN REMOVE", bip); 442 trace_xfs_buf_item_unpin_stale(bip);
444 xfs_buftrace("XFS_UNPIN_REMOVE", bp); 443
445 /* 444 /*
446 * yes -- clear the xaction descriptor in-use flag 445 * yes -- clear the xaction descriptor in-use flag
447 * and free the chunk if required. We can safely 446 * and free the chunk if required. We can safely
@@ -468,8 +467,10 @@ xfs_buf_item_unpin_remove(
468/* 467/*
469 * This is called to attempt to lock the buffer associated with this 468 * This is called to attempt to lock the buffer associated with this
470 * buf log item. Don't sleep on the buffer lock. If we can't get 469 * buf log item. Don't sleep on the buffer lock. If we can't get
471 * the lock right away, return 0. If we can get the lock, pull the 470 * the lock right away, return 0. If we can get the lock, take a
472 * buffer from the free list, mark it busy, and return 1. 471 * reference to the buffer. If this is a delayed write buffer that
472 * needs AIL help to be written back, invoke the pushbuf routine
473 * rather than the normal success path.
473 */ 474 */
474STATIC uint 475STATIC uint
475xfs_buf_item_trylock( 476xfs_buf_item_trylock(
@@ -478,24 +479,18 @@ xfs_buf_item_trylock(
478 xfs_buf_t *bp; 479 xfs_buf_t *bp;
479 480
480 bp = bip->bli_buf; 481 bp = bip->bli_buf;
481 482 if (XFS_BUF_ISPINNED(bp))
482 if (XFS_BUF_ISPINNED(bp)) {
483 return XFS_ITEM_PINNED; 483 return XFS_ITEM_PINNED;
484 } 484 if (!XFS_BUF_CPSEMA(bp))
485
486 if (!XFS_BUF_CPSEMA(bp)) {
487 return XFS_ITEM_LOCKED; 485 return XFS_ITEM_LOCKED;
488 }
489 486
490 /* 487 /* take a reference to the buffer. */
491 * Remove the buffer from the free list. Only do this
492 * if it's on the free list. Private buffers like the
493 * superblock buffer are not.
494 */
495 XFS_BUF_HOLD(bp); 488 XFS_BUF_HOLD(bp);
496 489
497 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 490 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
498 xfs_buf_item_trace("TRYLOCK SUCCESS", bip); 491 trace_xfs_buf_item_trylock(bip);
492 if (XFS_BUF_ISDELAYWRITE(bp))
493 return XFS_ITEM_PUSHBUF;
499 return XFS_ITEM_SUCCESS; 494 return XFS_ITEM_SUCCESS;
500} 495}
501 496
@@ -524,7 +519,6 @@ xfs_buf_item_unlock(
524 uint hold; 519 uint hold;
525 520
526 bp = bip->bli_buf; 521 bp = bip->bli_buf;
527 xfs_buftrace("XFS_UNLOCK", bp);
528 522
529 /* 523 /*
530 * Clear the buffer's association with this transaction. 524 * Clear the buffer's association with this transaction.
@@ -547,7 +541,7 @@ xfs_buf_item_unlock(
547 */ 541 */
548 if (bip->bli_flags & XFS_BLI_STALE) { 542 if (bip->bli_flags & XFS_BLI_STALE) {
549 bip->bli_flags &= ~XFS_BLI_LOGGED; 543 bip->bli_flags &= ~XFS_BLI_LOGGED;
550 xfs_buf_item_trace("UNLOCK STALE", bip); 544 trace_xfs_buf_item_unlock_stale(bip);
551 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 545 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
552 if (!aborted) 546 if (!aborted)
553 return; 547 return;
@@ -574,7 +568,7 @@ xfs_buf_item_unlock(
574 * release the buffer at the end of this routine. 568 * release the buffer at the end of this routine.
575 */ 569 */
576 hold = bip->bli_flags & XFS_BLI_HOLD; 570 hold = bip->bli_flags & XFS_BLI_HOLD;
577 xfs_buf_item_trace("UNLOCK", bip); 571 trace_xfs_buf_item_unlock(bip);
578 572
579 /* 573 /*
580 * If the buf item isn't tracking any data, free it. 574 * If the buf item isn't tracking any data, free it.
@@ -618,7 +612,8 @@ xfs_buf_item_committed(
618 xfs_buf_log_item_t *bip, 612 xfs_buf_log_item_t *bip,
619 xfs_lsn_t lsn) 613 xfs_lsn_t lsn)
620{ 614{
621 xfs_buf_item_trace("COMMITTED", bip); 615 trace_xfs_buf_item_committed(bip);
616
622 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 617 if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
623 (bip->bli_item.li_lsn != 0)) { 618 (bip->bli_item.li_lsn != 0)) {
624 return bip->bli_item.li_lsn; 619 return bip->bli_item.li_lsn;
@@ -627,11 +622,9 @@ xfs_buf_item_committed(
627} 622}
628 623
629/* 624/*
630 * This is called to asynchronously write the buffer associated with this 625 * The buffer is locked, but is not a delayed write buffer. This happens
631 * buf log item out to disk. The buffer will already have been locked by 626 * if we race with IO completion and hence we don't want to try to write it
632 * a successful call to xfs_buf_item_trylock(). If the buffer still has 627 * again. Just release the buffer.
633 * B_DELWRI set, then get it going out to disk with a call to bawrite().
634 * If not, then just release the buffer.
635 */ 628 */
636STATIC void 629STATIC void
637xfs_buf_item_push( 630xfs_buf_item_push(
@@ -640,20 +633,32 @@ xfs_buf_item_push(
640 xfs_buf_t *bp; 633 xfs_buf_t *bp;
641 634
642 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 635 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
643 xfs_buf_item_trace("PUSH", bip); 636 trace_xfs_buf_item_push(bip);
644 637
645 bp = bip->bli_buf; 638 bp = bip->bli_buf;
639 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
640 xfs_buf_relse(bp);
641}
646 642
647 if (XFS_BUF_ISDELAYWRITE(bp)) { 643/*
648 int error; 644 * The buffer is locked and is a delayed write buffer. Promote the buffer
649 error = xfs_bawrite(bip->bli_item.li_mountp, bp); 645 * in the delayed write queue as the caller knows that they must invoke
650 if (error) 646 * the xfsbufd to get this buffer written. We have to unlock the buffer
651 xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, 647 * to allow the xfsbufd to write it, too.
652 "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", 648 */
653 error, bip, bp); 649STATIC void
654 } else { 650xfs_buf_item_pushbuf(
655 xfs_buf_relse(bp); 651 xfs_buf_log_item_t *bip)
656 } 652{
653 xfs_buf_t *bp;
654
655 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
656 trace_xfs_buf_item_pushbuf(bip);
657
658 bp = bip->bli_buf;
659 ASSERT(XFS_BUF_ISDELAYWRITE(bp));
660 xfs_buf_delwri_promote(bp);
661 xfs_buf_relse(bp);
657} 662}
658 663
659/* ARGSUSED */ 664/* ARGSUSED */
@@ -678,7 +683,7 @@ static struct xfs_item_ops xfs_buf_item_ops = {
678 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) 683 .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
679 xfs_buf_item_committed, 684 xfs_buf_item_committed,
680 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, 685 .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
681 .iop_pushbuf = NULL, 686 .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf,
682 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) 687 .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
683 xfs_buf_item_committing 688 xfs_buf_item_committing
684}; 689};
@@ -738,9 +743,6 @@ xfs_buf_item_init(
738 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); 743 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
739 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); 744 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
740 bip->bli_format.blf_map_size = map_size; 745 bip->bli_format.blf_map_size = map_size;
741#ifdef XFS_BLI_TRACE
742 bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS);
743#endif
744 746
745#ifdef XFS_TRANS_DEBUG 747#ifdef XFS_TRANS_DEBUG
746 /* 748 /*
@@ -878,9 +880,6 @@ xfs_buf_item_free(
878 kmem_free(bip->bli_logged); 880 kmem_free(bip->bli_logged);
879#endif /* XFS_TRANS_DEBUG */ 881#endif /* XFS_TRANS_DEBUG */
880 882
881#ifdef XFS_BLI_TRACE
882 ktrace_free(bip->bli_trace);
883#endif
884 kmem_zone_free(xfs_buf_item_zone, bip); 883 kmem_zone_free(xfs_buf_item_zone, bip);
885} 884}
886 885
@@ -897,7 +896,8 @@ xfs_buf_item_relse(
897{ 896{
898 xfs_buf_log_item_t *bip; 897 xfs_buf_log_item_t *bip;
899 898
900 xfs_buftrace("XFS_RELSE", bp); 899 trace_xfs_buf_item_relse(bp, _RET_IP_);
900
901 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 901 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
902 XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); 902 XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
903 if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && 903 if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
@@ -994,7 +994,7 @@ xfs_buf_iodone_callbacks(
994 if (XFS_FORCED_SHUTDOWN(mp)) { 994 if (XFS_FORCED_SHUTDOWN(mp)) {
995 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); 995 ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp);
996 XFS_BUF_SUPER_STALE(bp); 996 XFS_BUF_SUPER_STALE(bp);
997 xfs_buftrace("BUF_IODONE_CB", bp); 997 trace_xfs_buf_item_iodone(bp, _RET_IP_);
998 xfs_buf_do_callbacks(bp, lip); 998 xfs_buf_do_callbacks(bp, lip);
999 XFS_BUF_SET_FSPRIVATE(bp, NULL); 999 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1000 XFS_BUF_CLR_IODONE_FUNC(bp); 1000 XFS_BUF_CLR_IODONE_FUNC(bp);
@@ -1030,7 +1030,7 @@ xfs_buf_iodone_callbacks(
1030 XFS_BUF_SET_START(bp); 1030 XFS_BUF_SET_START(bp);
1031 } 1031 }
1032 ASSERT(XFS_BUF_IODONE_FUNC(bp)); 1032 ASSERT(XFS_BUF_IODONE_FUNC(bp));
1033 xfs_buftrace("BUF_IODONE ASYNC", bp); 1033 trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
1034 xfs_buf_relse(bp); 1034 xfs_buf_relse(bp);
1035 } else { 1035 } else {
1036 /* 1036 /*
@@ -1053,9 +1053,7 @@ xfs_buf_iodone_callbacks(
1053 } 1053 }
1054 return; 1054 return;
1055 } 1055 }
1056#ifdef XFSERRORDEBUG 1056
1057 xfs_buftrace("XFS BUFCB NOERR", bp);
1058#endif
1059 xfs_buf_do_callbacks(bp, lip); 1057 xfs_buf_do_callbacks(bp, lip);
1060 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1058 XFS_BUF_SET_FSPRIVATE(bp, NULL);
1061 XFS_BUF_CLR_IODONE_FUNC(bp); 1059 XFS_BUF_CLR_IODONE_FUNC(bp);
@@ -1081,7 +1079,9 @@ xfs_buf_error_relse(
1081 XFS_BUF_DONE(bp); 1079 XFS_BUF_DONE(bp);
1082 XFS_BUF_UNDELAYWRITE(bp); 1080 XFS_BUF_UNDELAYWRITE(bp);
1083 XFS_BUF_ERROR(bp,0); 1081 XFS_BUF_ERROR(bp,0);
1084 xfs_buftrace("BUF_ERROR_RELSE", bp); 1082
1083 trace_xfs_buf_error_relse(bp, _RET_IP_);
1084
1085 if (! XFS_FORCED_SHUTDOWN(mp)) 1085 if (! XFS_FORCED_SHUTDOWN(mp))
1086 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1086 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1087 /* 1087 /*
@@ -1128,34 +1128,3 @@ xfs_buf_iodone(
1128 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); 1128 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
1129 xfs_buf_item_free(bip); 1129 xfs_buf_item_free(bip);
1130} 1130}
1131
1132#if defined(XFS_BLI_TRACE)
1133void
1134xfs_buf_item_trace(
1135 char *id,
1136 xfs_buf_log_item_t *bip)
1137{
1138 xfs_buf_t *bp;
1139 ASSERT(bip->bli_trace != NULL);
1140
1141 bp = bip->bli_buf;
1142 ktrace_enter(bip->bli_trace,
1143 (void *)id,
1144 (void *)bip->bli_buf,
1145 (void *)((unsigned long)bip->bli_flags),
1146 (void *)((unsigned long)bip->bli_recur),
1147 (void *)((unsigned long)atomic_read(&bip->bli_refcount)),
1148 (void *)((unsigned long)
1149 (0xFFFFFFFF & XFS_BUF_ADDR(bp) >> 32)),
1150 (void *)((unsigned long)(0xFFFFFFFF & XFS_BUF_ADDR(bp))),
1151 (void *)((unsigned long)XFS_BUF_COUNT(bp)),
1152 (void *)((unsigned long)XFS_BUF_BFLAGS(bp)),
1153 XFS_BUF_FSPRIVATE(bp, void *),
1154 XFS_BUF_FSPRIVATE2(bp, void *),
1155 (void *)(unsigned long)XFS_BUF_ISPINNED(bp),
1156 (void *)XFS_BUF_IODONE_FUNC(bp),
1157 (void *)((unsigned long)(XFS_BUF_VALUSEMA(bp))),
1158 (void *)bip->bli_item.li_desc,
1159 (void *)((unsigned long)bip->bli_item.li_flags));
1160}
1161#endif /* XFS_BLI_TRACE */
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 5a41c348bb1c..217f34af00cb 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -70,22 +70,21 @@ typedef struct xfs_buf_log_format_t {
70#define XFS_BLI_INODE_ALLOC_BUF 0x10 70#define XFS_BLI_INODE_ALLOC_BUF 0x10
71#define XFS_BLI_STALE_INODE 0x20 71#define XFS_BLI_STALE_INODE 0x20
72 72
73#define XFS_BLI_FLAGS \
74 { XFS_BLI_HOLD, "HOLD" }, \
75 { XFS_BLI_DIRTY, "DIRTY" }, \
76 { XFS_BLI_STALE, "STALE" }, \
77 { XFS_BLI_LOGGED, "LOGGED" }, \
78 { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \
79 { XFS_BLI_STALE_INODE, "STALE_INODE" }
80
73 81
74#ifdef __KERNEL__ 82#ifdef __KERNEL__
75 83
76struct xfs_buf; 84struct xfs_buf;
77struct ktrace;
78struct xfs_mount; 85struct xfs_mount;
79struct xfs_buf_log_item; 86struct xfs_buf_log_item;
80 87
81#if defined(XFS_BLI_TRACE)
82#define XFS_BLI_TRACE_SIZE 32
83
84void xfs_buf_item_trace(char *, struct xfs_buf_log_item *);
85#else
86#define xfs_buf_item_trace(id, bip)
87#endif
88
89/* 88/*
90 * This is the in core log item structure used to track information 89 * This is the in core log item structure used to track information
91 * needed to log buffers. It tracks how many times the lock has been 90 * needed to log buffers. It tracks how many times the lock has been
@@ -97,9 +96,6 @@ typedef struct xfs_buf_log_item {
97 unsigned int bli_flags; /* misc flags */ 96 unsigned int bli_flags; /* misc flags */
98 unsigned int bli_recur; /* lock recursion count */ 97 unsigned int bli_recur; /* lock recursion count */
99 atomic_t bli_refcount; /* cnt of tp refs */ 98 atomic_t bli_refcount; /* cnt of tp refs */
100#ifdef XFS_BLI_TRACE
101 struct ktrace *bli_trace; /* event trace buf */
102#endif
103#ifdef XFS_TRANS_DEBUG 99#ifdef XFS_TRANS_DEBUG
104 char *bli_orig; /* original buffer copy */ 100 char *bli_orig; /* original buffer copy */
105 char *bli_logged; /* bytes logged (bitmap) */ 101 char *bli_logged; /* bytes logged (bitmap) */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 2847bbc1c534..0ca556b4bf31 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -46,6 +46,7 @@
46#include "xfs_dir2_block.h" 46#include "xfs_dir2_block.h"
47#include "xfs_dir2_node.h" 47#include "xfs_dir2_node.h"
48#include "xfs_error.h" 48#include "xfs_error.h"
49#include "xfs_trace.h"
49 50
50/* 51/*
51 * xfs_da_btree.c 52 * xfs_da_btree.c
@@ -1533,8 +1534,8 @@ xfs_da_hashname(const __uint8_t *name, int namelen)
1533enum xfs_dacmp 1534enum xfs_dacmp
1534xfs_da_compname( 1535xfs_da_compname(
1535 struct xfs_da_args *args, 1536 struct xfs_da_args *args,
1536 const char *name, 1537 const unsigned char *name,
1537 int len) 1538 int len)
1538{ 1539{
1539 return (args->namelen == len && memcmp(args->name, name, len) == 0) ? 1540 return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1540 XFS_CMP_EXACT : XFS_CMP_DIFFERENT; 1541 XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
@@ -2107,7 +2108,7 @@ xfs_da_do_buf(
2107 (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), 2108 (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
2108 mp, XFS_ERRTAG_DA_READ_BUF, 2109 mp, XFS_ERRTAG_DA_READ_BUF,
2109 XFS_RANDOM_DA_READ_BUF))) { 2110 XFS_RANDOM_DA_READ_BUF))) {
2110 xfs_buftrace("DA READ ERROR", rbp->bps[0]); 2111 trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
2111 XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", 2112 XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2112 XFS_ERRLEVEL_LOW, mp, info); 2113 XFS_ERRLEVEL_LOW, mp, info);
2113 error = XFS_ERROR(EFSCORRUPTED); 2114 error = XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 8c536167bf75..fe9f5a8c1d2a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -125,6 +125,13 @@ typedef struct xfs_da_args {
125#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ 125#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
126#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ 126#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
127 127
128#define XFS_DA_OP_FLAGS \
129 { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
130 { XFS_DA_OP_RENAME, "RENAME" }, \
131 { XFS_DA_OP_ADDNAME, "ADDNAME" }, \
132 { XFS_DA_OP_OKNOENT, "OKNOENT" }, \
133 { XFS_DA_OP_CILOOKUP, "CILOOKUP" }
134
128/* 135/*
129 * Structure to describe buffer(s) for a block. 136 * Structure to describe buffer(s) for a block.
130 * This is needed in the directory version 2 format case, when 137 * This is needed in the directory version 2 format case, when
@@ -202,7 +209,8 @@ typedef struct xfs_da_state {
202 */ 209 */
203struct xfs_nameops { 210struct xfs_nameops {
204 xfs_dahash_t (*hashname)(struct xfs_name *); 211 xfs_dahash_t (*hashname)(struct xfs_name *);
205 enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int); 212 enum xfs_dacmp (*compname)(struct xfs_da_args *,
213 const unsigned char *, int);
206}; 214};
207 215
208 216
@@ -253,7 +261,7 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
253 261
254uint xfs_da_hashname(const __uint8_t *name_string, int name_length); 262uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
255enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, 263enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
256 const char *name, int len); 264 const unsigned char *name, int len);
257 265
258 266
259xfs_da_state_t *xfs_da_state_alloc(void); 267xfs_da_state_t *xfs_da_state_alloc(void);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index ab89a7e94a0f..5bba29a07812 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -43,16 +43,23 @@
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_rw.h" 44#include "xfs_rw.h"
45#include "xfs_vnodeops.h" 45#include "xfs_vnodeops.h"
46#include "xfs_trace.h"
47
48
49static int xfs_swap_extents(
50 xfs_inode_t *ip, /* target inode */
51 xfs_inode_t *tip, /* tmp inode */
52 xfs_swapext_t *sxp);
46 53
47/* 54/*
48 * Syssgi interface for swapext 55 * ioctl interface for swapext
49 */ 56 */
50int 57int
51xfs_swapext( 58xfs_swapext(
52 xfs_swapext_t *sxp) 59 xfs_swapext_t *sxp)
53{ 60{
54 xfs_inode_t *ip, *tip; 61 xfs_inode_t *ip, *tip;
55 struct file *file, *target_file; 62 struct file *file, *tmp_file;
56 int error = 0; 63 int error = 0;
57 64
58 /* Pull information for the target fd */ 65 /* Pull information for the target fd */
@@ -67,56 +74,138 @@ xfs_swapext(
67 goto out_put_file; 74 goto out_put_file;
68 } 75 }
69 76
70 target_file = fget((int)sxp->sx_fdtmp); 77 tmp_file = fget((int)sxp->sx_fdtmp);
71 if (!target_file) { 78 if (!tmp_file) {
72 error = XFS_ERROR(EINVAL); 79 error = XFS_ERROR(EINVAL);
73 goto out_put_file; 80 goto out_put_file;
74 } 81 }
75 82
76 if (!(target_file->f_mode & FMODE_WRITE) || 83 if (!(tmp_file->f_mode & FMODE_WRITE) ||
77 (target_file->f_flags & O_APPEND)) { 84 (tmp_file->f_flags & O_APPEND)) {
78 error = XFS_ERROR(EBADF); 85 error = XFS_ERROR(EBADF);
79 goto out_put_target_file; 86 goto out_put_tmp_file;
80 } 87 }
81 88
82 if (IS_SWAPFILE(file->f_path.dentry->d_inode) || 89 if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
83 IS_SWAPFILE(target_file->f_path.dentry->d_inode)) { 90 IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) {
84 error = XFS_ERROR(EINVAL); 91 error = XFS_ERROR(EINVAL);
85 goto out_put_target_file; 92 goto out_put_tmp_file;
86 } 93 }
87 94
88 ip = XFS_I(file->f_path.dentry->d_inode); 95 ip = XFS_I(file->f_path.dentry->d_inode);
89 tip = XFS_I(target_file->f_path.dentry->d_inode); 96 tip = XFS_I(tmp_file->f_path.dentry->d_inode);
90 97
91 if (ip->i_mount != tip->i_mount) { 98 if (ip->i_mount != tip->i_mount) {
92 error = XFS_ERROR(EINVAL); 99 error = XFS_ERROR(EINVAL);
93 goto out_put_target_file; 100 goto out_put_tmp_file;
94 } 101 }
95 102
96 if (ip->i_ino == tip->i_ino) { 103 if (ip->i_ino == tip->i_ino) {
97 error = XFS_ERROR(EINVAL); 104 error = XFS_ERROR(EINVAL);
98 goto out_put_target_file; 105 goto out_put_tmp_file;
99 } 106 }
100 107
101 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 108 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
102 error = XFS_ERROR(EIO); 109 error = XFS_ERROR(EIO);
103 goto out_put_target_file; 110 goto out_put_tmp_file;
104 } 111 }
105 112
106 error = xfs_swap_extents(ip, tip, sxp); 113 error = xfs_swap_extents(ip, tip, sxp);
107 114
108 out_put_target_file: 115 out_put_tmp_file:
109 fput(target_file); 116 fput(tmp_file);
110 out_put_file: 117 out_put_file:
111 fput(file); 118 fput(file);
112 out: 119 out:
113 return error; 120 return error;
114} 121}
115 122
116int 123/*
124 * We need to check that the format of the data fork in the temporary inode is
125 * valid for the target inode before doing the swap. This is not a problem with
126 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
127 * data fork depending on the space the attribute fork is taking so we can get
128 * invalid formats on the target inode.
129 *
130 * E.g. target has space for 7 extents in extent format, temp inode only has
131 * space for 6. If we defragment down to 7 extents, then the tmp format is a
132 * btree, but when swapped it needs to be in extent format. Hence we can't just
133 * blindly swap data forks on attr2 filesystems.
134 *
135 * Note that we check the swap in both directions so that we don't end up with
136 * a corrupt temporary inode, either.
137 *
138 * Note that fixing the way xfs_fsr sets up the attribute fork in the source
139 * inode will prevent this situation from occurring, so all we do here is
140 * reject and log the attempt. basically we are putting the responsibility on
141 * userspace to get this right.
142 */
143static int
144xfs_swap_extents_check_format(
145 xfs_inode_t *ip, /* target inode */
146 xfs_inode_t *tip) /* tmp inode */
147{
148
149 /* Should never get a local format */
150 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
151 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
152 return EINVAL;
153
154 /*
155 * if the target inode has less extents that then temporary inode then
156 * why did userspace call us?
157 */
158 if (ip->i_d.di_nextents < tip->i_d.di_nextents)
159 return EINVAL;
160
161 /*
162 * if the target inode is in extent form and the temp inode is in btree
163 * form then we will end up with the target inode in the wrong format
164 * as we already know there are less extents in the temp inode.
165 */
166 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
167 tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
168 return EINVAL;
169
170 /* Check temp in extent form to max in target */
171 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
172 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
173 return EINVAL;
174
175 /* Check target in extent form to max in temp */
176 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
177 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
178 return EINVAL;
179
180 /*
181 * If we are in a btree format, check that the temp root block will fit
182 * in the target and that it has enough extents to be in btree format
183 * in the target.
184 *
185 * Note that we have to be careful to allow btree->extent conversions
186 * (a common defrag case) which will occur when the temp inode is in
187 * extent format...
188 */
189 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
190 ((XFS_IFORK_BOFF(ip) &&
191 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) ||
192 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max))
193 return EINVAL;
194
195 /* Reciprocal target->temp btree format checks */
196 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
197 ((XFS_IFORK_BOFF(tip) &&
198 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) ||
199 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max))
200 return EINVAL;
201
202 return 0;
203}
204
205static int
117xfs_swap_extents( 206xfs_swap_extents(
118 xfs_inode_t *ip, 207 xfs_inode_t *ip, /* target inode */
119 xfs_inode_t *tip, 208 xfs_inode_t *tip, /* tmp inode */
120 xfs_swapext_t *sxp) 209 xfs_swapext_t *sxp)
121{ 210{
122 xfs_mount_t *mp; 211 xfs_mount_t *mp;
@@ -160,15 +249,7 @@ xfs_swap_extents(
160 goto out_unlock; 249 goto out_unlock;
161 } 250 }
162 251
163 /* Should never get a local format */
164 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
165 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
166 error = XFS_ERROR(EINVAL);
167 goto out_unlock;
168 }
169
170 if (VN_CACHED(VFS_I(tip)) != 0) { 252 if (VN_CACHED(VFS_I(tip)) != 0) {
171 xfs_inval_cached_trace(tip, 0, -1, 0, -1);
172 error = xfs_flushinval_pages(tip, 0, -1, 253 error = xfs_flushinval_pages(tip, 0, -1,
173 FI_REMAPF_LOCKED); 254 FI_REMAPF_LOCKED);
174 if (error) 255 if (error)
@@ -189,13 +270,15 @@ xfs_swap_extents(
189 goto out_unlock; 270 goto out_unlock;
190 } 271 }
191 272
192 /* 273 trace_xfs_swap_extent_before(ip, 0);
193 * If the target has extended attributes, the tmp file 274 trace_xfs_swap_extent_before(tip, 1);
194 * must also in order to ensure the correct data fork 275
195 * format. 276 /* check inode formats now that data is flushed */
196 */ 277 error = xfs_swap_extents_check_format(ip, tip);
197 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 278 if (error) {
198 error = XFS_ERROR(EINVAL); 279 xfs_fs_cmn_err(CE_NOTE, mp,
280 "%s: inode 0x%llx format is incompatible for exchanging.",
281 __FILE__, ip->i_ino);
199 goto out_unlock; 282 goto out_unlock;
200 } 283 }
201 284
@@ -276,6 +359,16 @@ xfs_swap_extents(
276 *tifp = *tempifp; /* struct copy */ 359 *tifp = *tempifp; /* struct copy */
277 360
278 /* 361 /*
362 * Fix the in-memory data fork values that are dependent on the fork
363 * offset in the inode. We can't assume they remain the same as attr2
364 * has dynamic fork offsets.
365 */
366 ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
367 (uint)sizeof(xfs_bmbt_rec_t);
368 tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
369 (uint)sizeof(xfs_bmbt_rec_t);
370
371 /*
279 * Fix the on-disk inode values 372 * Fix the on-disk inode values
280 */ 373 */
281 tmp = (__uint64_t)ip->i_d.di_nblocks; 374 tmp = (__uint64_t)ip->i_d.di_nblocks;
@@ -347,6 +440,8 @@ xfs_swap_extents(
347 440
348 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); 441 error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
349 442
443 trace_xfs_swap_extent_after(ip, 0);
444 trace_xfs_swap_extent_after(tip, 1);
350out: 445out:
351 kmem_free(tempifp); 446 kmem_free(tempifp);
352 return error; 447 return error;
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h
index 4f55a6306558..20bdd935c121 100644
--- a/fs/xfs/xfs_dfrag.h
+++ b/fs/xfs/xfs_dfrag.h
@@ -48,9 +48,6 @@ typedef struct xfs_swapext
48 */ 48 */
49int xfs_swapext(struct xfs_swapext *sx); 49int xfs_swapext(struct xfs_swapext *sx);
50 50
51int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
52 struct xfs_swapext *sxp);
53
54#endif /* __KERNEL__ */ 51#endif /* __KERNEL__ */
55 52
56#endif /* __XFS_DFRAG_H__ */ 53#endif /* __XFS_DFRAG_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index bb1d58eb3982..42520f041265 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -40,11 +40,11 @@
40#include "xfs_dir2_leaf.h" 40#include "xfs_dir2_leaf.h"
41#include "xfs_dir2_block.h" 41#include "xfs_dir2_block.h"
42#include "xfs_dir2_node.h" 42#include "xfs_dir2_node.h"
43#include "xfs_dir2_trace.h"
44#include "xfs_error.h" 43#include "xfs_error.h"
45#include "xfs_vnodeops.h" 44#include "xfs_vnodeops.h"
45#include "xfs_trace.h"
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2}; 47struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2};
48 48
49/* 49/*
50 * ASCII case-insensitive (ie. A-Z) support for directories that was 50 * ASCII case-insensitive (ie. A-Z) support for directories that was
@@ -66,8 +66,8 @@ xfs_ascii_ci_hashname(
66STATIC enum xfs_dacmp 66STATIC enum xfs_dacmp
67xfs_ascii_ci_compname( 67xfs_ascii_ci_compname(
68 struct xfs_da_args *args, 68 struct xfs_da_args *args,
69 const char *name, 69 const unsigned char *name,
70 int len) 70 int len)
71{ 71{
72 enum xfs_dacmp result; 72 enum xfs_dacmp result;
73 int i; 73 int i;
@@ -247,7 +247,7 @@ xfs_dir_createname(
247int 247int
248xfs_dir_cilookup_result( 248xfs_dir_cilookup_result(
249 struct xfs_da_args *args, 249 struct xfs_da_args *args,
250 const char *name, 250 const unsigned char *name,
251 int len) 251 int len)
252{ 252{
253 if (args->cmpresult == XFS_CMP_DIFFERENT) 253 if (args->cmpresult == XFS_CMP_DIFFERENT)
@@ -525,7 +525,8 @@ xfs_dir2_grow_inode(
525 xfs_trans_t *tp; 525 xfs_trans_t *tp;
526 xfs_drfsbno_t nblks; 526 xfs_drfsbno_t nblks;
527 527
528 xfs_dir2_trace_args_s("grow_inode", args, space); 528 trace_xfs_dir2_grow_inode(args, space);
529
529 dp = args->dp; 530 dp = args->dp;
530 tp = args->trans; 531 tp = args->trans;
531 mp = dp->i_mount; 532 mp = dp->i_mount;
@@ -703,7 +704,8 @@ xfs_dir2_shrink_inode(
703 xfs_mount_t *mp; 704 xfs_mount_t *mp;
704 xfs_trans_t *tp; 705 xfs_trans_t *tp;
705 706
706 xfs_dir2_trace_args_db("shrink_inode", args, db, bp); 707 trace_xfs_dir2_shrink_inode(args, db);
708
707 dp = args->dp; 709 dp = args->dp;
708 mp = dp->i_mount; 710 mp = dp->i_mount;
709 tp = args->trans; 711 tp = args->trans;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 1d9ef96f33aa..74a3b1057685 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -100,7 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, 100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
101 struct xfs_dabuf *bp); 101 struct xfs_dabuf *bp);
102 102
103extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name, 103extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
104 int len); 104 const unsigned char *name, int len);
105 105
106#endif /* __XFS_DIR2_H__ */ 106#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index ab52e9e1c1ee..779a267b0a84 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -36,8 +36,8 @@
36#include "xfs_dir2_data.h" 36#include "xfs_dir2_data.h"
37#include "xfs_dir2_leaf.h" 37#include "xfs_dir2_leaf.h"
38#include "xfs_dir2_block.h" 38#include "xfs_dir2_block.h"
39#include "xfs_dir2_trace.h"
40#include "xfs_error.h" 39#include "xfs_error.h"
40#include "xfs_trace.h"
41 41
42/* 42/*
43 * Local function prototypes. 43 * Local function prototypes.
@@ -57,8 +57,8 @@ static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot;
57void 57void
58xfs_dir_startup(void) 58xfs_dir_startup(void)
59{ 59{
60 xfs_dir_hash_dot = xfs_da_hashname(".", 1); 60 xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1);
61 xfs_dir_hash_dotdot = xfs_da_hashname("..", 2); 61 xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
62} 62}
63 63
64/* 64/*
@@ -94,7 +94,8 @@ xfs_dir2_block_addname(
94 __be16 *tagp; /* pointer to tag value */ 94 __be16 *tagp; /* pointer to tag value */
95 xfs_trans_t *tp; /* transaction structure */ 95 xfs_trans_t *tp; /* transaction structure */
96 96
97 xfs_dir2_trace_args("block_addname", args); 97 trace_xfs_dir2_block_addname(args);
98
98 dp = args->dp; 99 dp = args->dp;
99 tp = args->trans; 100 tp = args->trans;
100 mp = dp->i_mount; 101 mp = dp->i_mount;
@@ -512,8 +513,9 @@ xfs_dir2_block_getdents(
512 /* 513 /*
513 * If it didn't fit, set the final offset to here & return. 514 * If it didn't fit, set the final offset to here & return.
514 */ 515 */
515 if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff, 516 if (filldir(dirent, (char *)dep->name, dep->namelen,
516 be64_to_cpu(dep->inumber), DT_UNKNOWN)) { 517 cook & 0x7fffffff, be64_to_cpu(dep->inumber),
518 DT_UNKNOWN)) {
517 *offset = cook & 0x7fffffff; 519 *offset = cook & 0x7fffffff;
518 xfs_da_brelse(NULL, bp); 520 xfs_da_brelse(NULL, bp);
519 return 0; 521 return 0;
@@ -590,7 +592,8 @@ xfs_dir2_block_lookup(
590 int error; /* error return value */ 592 int error; /* error return value */
591 xfs_mount_t *mp; /* filesystem mount point */ 593 xfs_mount_t *mp; /* filesystem mount point */
592 594
593 xfs_dir2_trace_args("block_lookup", args); 595 trace_xfs_dir2_block_lookup(args);
596
594 /* 597 /*
595 * Get the buffer, look up the entry. 598 * Get the buffer, look up the entry.
596 * If not found (ENOENT) then return, have no buffer. 599 * If not found (ENOENT) then return, have no buffer.
@@ -747,7 +750,8 @@ xfs_dir2_block_removename(
747 int size; /* shortform size */ 750 int size; /* shortform size */
748 xfs_trans_t *tp; /* transaction pointer */ 751 xfs_trans_t *tp; /* transaction pointer */
749 752
750 xfs_dir2_trace_args("block_removename", args); 753 trace_xfs_dir2_block_removename(args);
754
751 /* 755 /*
752 * Look up the entry in the block. Gets the buffer and entry index. 756 * Look up the entry in the block. Gets the buffer and entry index.
753 * It will always be there, the vnodeops level does a lookup first. 757 * It will always be there, the vnodeops level does a lookup first.
@@ -823,7 +827,8 @@ xfs_dir2_block_replace(
823 int error; /* error return value */ 827 int error; /* error return value */
824 xfs_mount_t *mp; /* filesystem mount point */ 828 xfs_mount_t *mp; /* filesystem mount point */
825 829
826 xfs_dir2_trace_args("block_replace", args); 830 trace_xfs_dir2_block_replace(args);
831
827 /* 832 /*
828 * Lookup the entry in the directory. Get buffer and entry index. 833 * Lookup the entry in the directory. Get buffer and entry index.
829 * This will always succeed since the caller has already done a lookup. 834 * This will always succeed since the caller has already done a lookup.
@@ -897,7 +902,8 @@ xfs_dir2_leaf_to_block(
897 int to; /* block/leaf to index */ 902 int to; /* block/leaf to index */
898 xfs_trans_t *tp; /* transaction pointer */ 903 xfs_trans_t *tp; /* transaction pointer */
899 904
900 xfs_dir2_trace_args_bb("leaf_to_block", args, lbp, dbp); 905 trace_xfs_dir2_leaf_to_block(args);
906
901 dp = args->dp; 907 dp = args->dp;
902 tp = args->trans; 908 tp = args->trans;
903 mp = dp->i_mount; 909 mp = dp->i_mount;
@@ -1044,7 +1050,8 @@ xfs_dir2_sf_to_block(
1044 xfs_trans_t *tp; /* transaction pointer */ 1050 xfs_trans_t *tp; /* transaction pointer */
1045 struct xfs_name name; 1051 struct xfs_name name;
1046 1052
1047 xfs_dir2_trace_args("sf_to_block", args); 1053 trace_xfs_dir2_sf_to_block(args);
1054
1048 dp = args->dp; 1055 dp = args->dp;
1049 tp = args->trans; 1056 tp = args->trans;
1050 mp = dp->i_mount; 1057 mp = dp->i_mount;
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 41ad537c49e9..e2d89854ec9e 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -38,8 +38,8 @@
38#include "xfs_dir2_leaf.h" 38#include "xfs_dir2_leaf.h"
39#include "xfs_dir2_block.h" 39#include "xfs_dir2_block.h"
40#include "xfs_dir2_node.h" 40#include "xfs_dir2_node.h"
41#include "xfs_dir2_trace.h"
42#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_trace.h"
43 43
44/* 44/*
45 * Local function declarations. 45 * Local function declarations.
@@ -80,7 +80,8 @@ xfs_dir2_block_to_leaf(
80 int needscan; /* need to rescan bestfree */ 80 int needscan; /* need to rescan bestfree */
81 xfs_trans_t *tp; /* transaction pointer */ 81 xfs_trans_t *tp; /* transaction pointer */
82 82
83 xfs_dir2_trace_args_b("block_to_leaf", args, dbp); 83 trace_xfs_dir2_block_to_leaf(args);
84
84 dp = args->dp; 85 dp = args->dp;
85 mp = dp->i_mount; 86 mp = dp->i_mount;
86 tp = args->trans; 87 tp = args->trans;
@@ -188,7 +189,8 @@ xfs_dir2_leaf_addname(
188 xfs_trans_t *tp; /* transaction pointer */ 189 xfs_trans_t *tp; /* transaction pointer */
189 xfs_dir2_db_t use_block; /* data block number */ 190 xfs_dir2_db_t use_block; /* data block number */
190 191
191 xfs_dir2_trace_args("leaf_addname", args); 192 trace_xfs_dir2_leaf_addname(args);
193
192 dp = args->dp; 194 dp = args->dp;
193 tp = args->trans; 195 tp = args->trans;
194 mp = dp->i_mount; 196 mp = dp->i_mount;
@@ -1079,7 +1081,7 @@ xfs_dir2_leaf_getdents(
1079 dep = (xfs_dir2_data_entry_t *)ptr; 1081 dep = (xfs_dir2_data_entry_t *)ptr;
1080 length = xfs_dir2_data_entsize(dep->namelen); 1082 length = xfs_dir2_data_entsize(dep->namelen);
1081 1083
1082 if (filldir(dirent, dep->name, dep->namelen, 1084 if (filldir(dirent, (char *)dep->name, dep->namelen,
1083 xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, 1085 xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
1084 be64_to_cpu(dep->inumber), DT_UNKNOWN)) 1086 be64_to_cpu(dep->inumber), DT_UNKNOWN))
1085 break; 1087 break;
@@ -1266,7 +1268,8 @@ xfs_dir2_leaf_lookup(
1266 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1268 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1267 xfs_trans_t *tp; /* transaction pointer */ 1269 xfs_trans_t *tp; /* transaction pointer */
1268 1270
1269 xfs_dir2_trace_args("leaf_lookup", args); 1271 trace_xfs_dir2_leaf_lookup(args);
1272
1270 /* 1273 /*
1271 * Look up name in the leaf block, returning both buffers and index. 1274 * Look up name in the leaf block, returning both buffers and index.
1272 */ 1275 */
@@ -1454,7 +1457,8 @@ xfs_dir2_leaf_removename(
1454 xfs_dir2_data_off_t oldbest; /* old value of best free */ 1457 xfs_dir2_data_off_t oldbest; /* old value of best free */
1455 xfs_trans_t *tp; /* transaction pointer */ 1458 xfs_trans_t *tp; /* transaction pointer */
1456 1459
1457 xfs_dir2_trace_args("leaf_removename", args); 1460 trace_xfs_dir2_leaf_removename(args);
1461
1458 /* 1462 /*
1459 * Lookup the leaf entry, get the leaf and data blocks read in. 1463 * Lookup the leaf entry, get the leaf and data blocks read in.
1460 */ 1464 */
@@ -1586,7 +1590,8 @@ xfs_dir2_leaf_replace(
1586 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 1590 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1587 xfs_trans_t *tp; /* transaction pointer */ 1591 xfs_trans_t *tp; /* transaction pointer */
1588 1592
1589 xfs_dir2_trace_args("leaf_replace", args); 1593 trace_xfs_dir2_leaf_replace(args);
1594
1590 /* 1595 /*
1591 * Look up the entry. 1596 * Look up the entry.
1592 */ 1597 */
@@ -1766,7 +1771,9 @@ xfs_dir2_node_to_leaf(
1766 if (state->path.active > 1) 1771 if (state->path.active > 1)
1767 return 0; 1772 return 0;
1768 args = state->args; 1773 args = state->args;
1769 xfs_dir2_trace_args("node_to_leaf", args); 1774
1775 trace_xfs_dir2_node_to_leaf(args);
1776
1770 mp = state->mp; 1777 mp = state->mp;
1771 dp = args->dp; 1778 dp = args->dp;
1772 tp = args->trans; 1779 tp = args->trans;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5a81ccd1045b..78fc4d9ae756 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -37,8 +37,8 @@
37#include "xfs_dir2_leaf.h" 37#include "xfs_dir2_leaf.h"
38#include "xfs_dir2_block.h" 38#include "xfs_dir2_block.h"
39#include "xfs_dir2_node.h" 39#include "xfs_dir2_node.h"
40#include "xfs_dir2_trace.h"
41#include "xfs_error.h" 40#include "xfs_error.h"
41#include "xfs_trace.h"
42 42
43/* 43/*
44 * Function declarations. 44 * Function declarations.
@@ -65,7 +65,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
65/* 65/*
66 * Log entries from a freespace block. 66 * Log entries from a freespace block.
67 */ 67 */
68void 68STATIC void
69xfs_dir2_free_log_bests( 69xfs_dir2_free_log_bests(
70 xfs_trans_t *tp, /* transaction pointer */ 70 xfs_trans_t *tp, /* transaction pointer */
71 xfs_dabuf_t *bp, /* freespace buffer */ 71 xfs_dabuf_t *bp, /* freespace buffer */
@@ -123,7 +123,8 @@ xfs_dir2_leaf_to_node(
123 __be16 *to; /* pointer to freespace entry */ 123 __be16 *to; /* pointer to freespace entry */
124 xfs_trans_t *tp; /* transaction pointer */ 124 xfs_trans_t *tp; /* transaction pointer */
125 125
126 xfs_dir2_trace_args_b("leaf_to_node", args, lbp); 126 trace_xfs_dir2_leaf_to_node(args);
127
127 dp = args->dp; 128 dp = args->dp;
128 mp = dp->i_mount; 129 mp = dp->i_mount;
129 tp = args->trans; 130 tp = args->trans;
@@ -196,7 +197,8 @@ xfs_dir2_leafn_add(
196 xfs_mount_t *mp; /* filesystem mount point */ 197 xfs_mount_t *mp; /* filesystem mount point */
197 xfs_trans_t *tp; /* transaction pointer */ 198 xfs_trans_t *tp; /* transaction pointer */
198 199
199 xfs_dir2_trace_args_sb("leafn_add", args, index, bp); 200 trace_xfs_dir2_leafn_add(args, index);
201
200 dp = args->dp; 202 dp = args->dp;
201 mp = dp->i_mount; 203 mp = dp->i_mount;
202 tp = args->trans; 204 tp = args->trans;
@@ -711,8 +713,8 @@ xfs_dir2_leafn_moveents(
711 int stale; /* count stale leaves copied */ 713 int stale; /* count stale leaves copied */
712 xfs_trans_t *tp; /* transaction pointer */ 714 xfs_trans_t *tp; /* transaction pointer */
713 715
714 xfs_dir2_trace_args_bibii("leafn_moveents", args, bp_s, start_s, bp_d, 716 trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count);
715 start_d, count); 717
716 /* 718 /*
717 * Silently return if nothing to do. 719 * Silently return if nothing to do.
718 */ 720 */
@@ -933,7 +935,8 @@ xfs_dir2_leafn_remove(
933 int needscan; /* need to rescan data frees */ 935 int needscan; /* need to rescan data frees */
934 xfs_trans_t *tp; /* transaction pointer */ 936 xfs_trans_t *tp; /* transaction pointer */
935 937
936 xfs_dir2_trace_args_sb("leafn_remove", args, index, bp); 938 trace_xfs_dir2_leafn_remove(args, index);
939
937 dp = args->dp; 940 dp = args->dp;
938 tp = args->trans; 941 tp = args->trans;
939 mp = dp->i_mount; 942 mp = dp->i_mount;
@@ -1363,7 +1366,8 @@ xfs_dir2_node_addname(
1363 int rval; /* sub-return value */ 1366 int rval; /* sub-return value */
1364 xfs_da_state_t *state; /* btree cursor */ 1367 xfs_da_state_t *state; /* btree cursor */
1365 1368
1366 xfs_dir2_trace_args("node_addname", args); 1369 trace_xfs_dir2_node_addname(args);
1370
1367 /* 1371 /*
1368 * Allocate and initialize the state (btree cursor). 1372 * Allocate and initialize the state (btree cursor).
1369 */ 1373 */
@@ -1822,7 +1826,8 @@ xfs_dir2_node_lookup(
1822 int rval; /* operation return value */ 1826 int rval; /* operation return value */
1823 xfs_da_state_t *state; /* btree cursor */ 1827 xfs_da_state_t *state; /* btree cursor */
1824 1828
1825 xfs_dir2_trace_args("node_lookup", args); 1829 trace_xfs_dir2_node_lookup(args);
1830
1826 /* 1831 /*
1827 * Allocate and initialize the btree cursor. 1832 * Allocate and initialize the btree cursor.
1828 */ 1833 */
@@ -1875,7 +1880,8 @@ xfs_dir2_node_removename(
1875 int rval; /* operation return value */ 1880 int rval; /* operation return value */
1876 xfs_da_state_t *state; /* btree cursor */ 1881 xfs_da_state_t *state; /* btree cursor */
1877 1882
1878 xfs_dir2_trace_args("node_removename", args); 1883 trace_xfs_dir2_node_removename(args);
1884
1879 /* 1885 /*
1880 * Allocate and initialize the btree cursor. 1886 * Allocate and initialize the btree cursor.
1881 */ 1887 */
@@ -1944,7 +1950,8 @@ xfs_dir2_node_replace(
1944 int rval; /* internal return value */ 1950 int rval; /* internal return value */
1945 xfs_da_state_t *state; /* btree cursor */ 1951 xfs_da_state_t *state; /* btree cursor */
1946 1952
1947 xfs_dir2_trace_args("node_replace", args); 1953 trace_xfs_dir2_node_replace(args);
1954
1948 /* 1955 /*
1949 * Allocate and initialize the btree cursor. 1956 * Allocate and initialize the btree cursor.
1950 */ 1957 */
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
index dde72db3d695..82dfe7147195 100644
--- a/fs/xfs/xfs_dir2_node.h
+++ b/fs/xfs/xfs_dir2_node.h
@@ -75,8 +75,6 @@ xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
75 return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)); 75 return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
76} 76}
77 77
78extern void xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
79 int first, int last);
80extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, 78extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
81 struct xfs_dabuf *lbp); 79 struct xfs_dabuf *lbp);
82extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); 80extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index e89734e84646..c1a5945d463a 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -37,7 +37,7 @@
37#include "xfs_dir2_data.h" 37#include "xfs_dir2_data.h"
38#include "xfs_dir2_leaf.h" 38#include "xfs_dir2_leaf.h"
39#include "xfs_dir2_block.h" 39#include "xfs_dir2_block.h"
40#include "xfs_dir2_trace.h" 40#include "xfs_trace.h"
41 41
42/* 42/*
43 * Prototypes for internal functions. 43 * Prototypes for internal functions.
@@ -169,7 +169,8 @@ xfs_dir2_block_to_sf(
169 xfs_dir2_sf_t *sfp; /* shortform structure */ 169 xfs_dir2_sf_t *sfp; /* shortform structure */
170 xfs_ino_t temp; 170 xfs_ino_t temp;
171 171
172 xfs_dir2_trace_args_sb("block_to_sf", args, size, bp); 172 trace_xfs_dir2_block_to_sf(args);
173
173 dp = args->dp; 174 dp = args->dp;
174 mp = dp->i_mount; 175 mp = dp->i_mount;
175 176
@@ -281,7 +282,8 @@ xfs_dir2_sf_addname(
281 xfs_dir2_sf_t *sfp; /* shortform structure */ 282 xfs_dir2_sf_t *sfp; /* shortform structure */
282 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ 283 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */
283 284
284 xfs_dir2_trace_args("sf_addname", args); 285 trace_xfs_dir2_sf_addname(args);
286
285 ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); 287 ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
286 dp = args->dp; 288 dp = args->dp;
287 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 289 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
@@ -654,7 +656,8 @@ xfs_dir2_sf_create(
654 xfs_dir2_sf_t *sfp; /* shortform structure */ 656 xfs_dir2_sf_t *sfp; /* shortform structure */
655 int size; /* directory size */ 657 int size; /* directory size */
656 658
657 xfs_dir2_trace_args_i("sf_create", args, pino); 659 trace_xfs_dir2_sf_create(args);
660
658 dp = args->dp; 661 dp = args->dp;
659 662
660 ASSERT(dp != NULL); 663 ASSERT(dp != NULL);
@@ -779,7 +782,7 @@ xfs_dir2_sf_getdents(
779 } 782 }
780 783
781 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); 784 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
782 if (filldir(dirent, sfep->name, sfep->namelen, 785 if (filldir(dirent, (char *)sfep->name, sfep->namelen,
783 off & 0x7fffffff, ino, DT_UNKNOWN)) { 786 off & 0x7fffffff, ino, DT_UNKNOWN)) {
784 *offset = off & 0x7fffffff; 787 *offset = off & 0x7fffffff;
785 return 0; 788 return 0;
@@ -808,7 +811,8 @@ xfs_dir2_sf_lookup(
808 enum xfs_dacmp cmp; /* comparison result */ 811 enum xfs_dacmp cmp; /* comparison result */
809 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ 812 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
810 813
811 xfs_dir2_trace_args("sf_lookup", args); 814 trace_xfs_dir2_sf_lookup(args);
815
812 xfs_dir2_sf_check(args); 816 xfs_dir2_sf_check(args);
813 dp = args->dp; 817 dp = args->dp;
814 818
@@ -891,7 +895,8 @@ xfs_dir2_sf_removename(
891 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 895 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
892 xfs_dir2_sf_t *sfp; /* shortform structure */ 896 xfs_dir2_sf_t *sfp; /* shortform structure */
893 897
894 xfs_dir2_trace_args("sf_removename", args); 898 trace_xfs_dir2_sf_removename(args);
899
895 dp = args->dp; 900 dp = args->dp;
896 901
897 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 902 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
@@ -982,7 +987,8 @@ xfs_dir2_sf_replace(
982 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 987 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
983 xfs_dir2_sf_t *sfp; /* shortform structure */ 988 xfs_dir2_sf_t *sfp; /* shortform structure */
984 989
985 xfs_dir2_trace_args("sf_replace", args); 990 trace_xfs_dir2_sf_replace(args);
991
986 dp = args->dp; 992 dp = args->dp;
987 993
988 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 994 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
@@ -1125,7 +1131,8 @@ xfs_dir2_sf_toino4(
1125 xfs_dir2_sf_entry_t *sfep; /* new sf entry */ 1131 xfs_dir2_sf_entry_t *sfep; /* new sf entry */
1126 xfs_dir2_sf_t *sfp; /* new sf directory */ 1132 xfs_dir2_sf_t *sfp; /* new sf directory */
1127 1133
1128 xfs_dir2_trace_args("sf_toino4", args); 1134 trace_xfs_dir2_sf_toino4(args);
1135
1129 dp = args->dp; 1136 dp = args->dp;
1130 1137
1131 /* 1138 /*
@@ -1202,7 +1209,8 @@ xfs_dir2_sf_toino8(
1202 xfs_dir2_sf_entry_t *sfep; /* new sf entry */ 1209 xfs_dir2_sf_entry_t *sfep; /* new sf entry */
1203 xfs_dir2_sf_t *sfp; /* new sf directory */ 1210 xfs_dir2_sf_t *sfp; /* new sf directory */
1204 1211
1205 xfs_dir2_trace_args("sf_toino8", args); 1212 trace_xfs_dir2_sf_toino8(args);
1213
1206 dp = args->dp; 1214 dp = args->dp;
1207 1215
1208 /* 1216 /*
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
deleted file mode 100644
index 6cc7c0c681ac..000000000000
--- a/fs/xfs/xfs_dir2_trace.c
+++ /dev/null
@@ -1,216 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_inum.h"
22#include "xfs_dir2.h"
23#include "xfs_da_btree.h"
24#include "xfs_bmap_btree.h"
25#include "xfs_dir2_sf.h"
26#include "xfs_attr_sf.h"
27#include "xfs_dinode.h"
28#include "xfs_inode.h"
29#include "xfs_dir2_trace.h"
30
31#ifdef XFS_DIR2_TRACE
32ktrace_t *xfs_dir2_trace_buf;
33
34/*
35 * Enter something in the trace buffers.
36 */
37static void
38xfs_dir2_trace_enter(
39 xfs_inode_t *dp,
40 int type,
41 char *where,
42 char *name,
43 int namelen,
44 void *a0,
45 void *a1,
46 void *a2,
47 void *a3,
48 void *a4,
49 void *a5,
50 void *a6,
51 void *a7)
52{
53 void *n[5];
54
55 ASSERT(xfs_dir2_trace_buf);
56 ASSERT(dp->i_dir_trace);
57 if (name)
58 memcpy(n, name, min((int)sizeof(n), namelen));
59 else
60 memset((char *)n, 0, sizeof(n));
61 ktrace_enter(xfs_dir2_trace_buf,
62 (void *)(long)type, (void *)where,
63 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
64 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
65 (void *)(long)namelen,
66 (void *)n[0], (void *)n[1], (void *)n[2],
67 (void *)n[3], (void *)n[4]);
68 ktrace_enter(dp->i_dir_trace,
69 (void *)(long)type, (void *)where,
70 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
71 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
72 (void *)(long)namelen,
73 (void *)n[0], (void *)n[1], (void *)n[2],
74 (void *)n[3], (void *)n[4]);
75}
76
77void
78xfs_dir2_trace_args(
79 char *where,
80 xfs_da_args_t *args)
81{
82 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS, where,
83 (char *)args->name, (int)args->namelen,
84 (void *)(unsigned long)args->hashval,
85 (void *)((unsigned long)(args->inumber >> 32)),
86 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
87 (void *)args->dp, (void *)args->trans,
88 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
89 NULL, NULL);
90}
91
92void
93xfs_dir2_trace_args_b(
94 char *where,
95 xfs_da_args_t *args,
96 xfs_dabuf_t *bp)
97{
98 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_B, where,
99 (char *)args->name, (int)args->namelen,
100 (void *)(unsigned long)args->hashval,
101 (void *)((unsigned long)(args->inumber >> 32)),
102 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
103 (void *)args->dp, (void *)args->trans,
104 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
105 (void *)(bp ? bp->bps[0] : NULL), NULL);
106}
107
108void
109xfs_dir2_trace_args_bb(
110 char *where,
111 xfs_da_args_t *args,
112 xfs_dabuf_t *lbp,
113 xfs_dabuf_t *dbp)
114{
115 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_BB, where,
116 (char *)args->name, (int)args->namelen,
117 (void *)(unsigned long)args->hashval,
118 (void *)((unsigned long)(args->inumber >> 32)),
119 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
120 (void *)args->dp, (void *)args->trans,
121 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
122 (void *)(lbp ? lbp->bps[0] : NULL),
123 (void *)(dbp ? dbp->bps[0] : NULL));
124}
125
126void
127xfs_dir2_trace_args_bibii(
128 char *where,
129 xfs_da_args_t *args,
130 xfs_dabuf_t *bs,
131 int ss,
132 xfs_dabuf_t *bd,
133 int sd,
134 int c)
135{
136 xfs_buf_t *bpbs = bs ? bs->bps[0] : NULL;
137 xfs_buf_t *bpbd = bd ? bd->bps[0] : NULL;
138
139 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_BIBII, where,
140 (char *)args->name, (int)args->namelen,
141 (void *)args->dp, (void *)args->trans,
142 (void *)bpbs, (void *)(long)ss, (void *)bpbd, (void *)(long)sd,
143 (void *)(long)c, NULL);
144}
145
146void
147xfs_dir2_trace_args_db(
148 char *where,
149 xfs_da_args_t *args,
150 xfs_dir2_db_t db,
151 xfs_dabuf_t *bp)
152{
153 xfs_buf_t *dbp = bp ? bp->bps[0] : NULL;
154
155 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_DB, where,
156 (char *)args->name, (int)args->namelen,
157 (void *)(unsigned long)args->hashval,
158 (void *)((unsigned long)(args->inumber >> 32)),
159 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
160 (void *)args->dp, (void *)args->trans,
161 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
162 (void *)(long)db, (void *)dbp);
163}
164
165void
166xfs_dir2_trace_args_i(
167 char *where,
168 xfs_da_args_t *args,
169 xfs_ino_t i)
170{
171 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_I, where,
172 (char *)args->name, (int)args->namelen,
173 (void *)(unsigned long)args->hashval,
174 (void *)((unsigned long)(args->inumber >> 32)),
175 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
176 (void *)args->dp, (void *)args->trans,
177 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
178 (void *)((unsigned long)(i >> 32)),
179 (void *)((unsigned long)(i & 0xFFFFFFFF)));
180}
181
182void
183xfs_dir2_trace_args_s(
184 char *where,
185 xfs_da_args_t *args,
186 int s)
187{
188 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_S, where,
189 (char *)args->name, (int)args->namelen,
190 (void *)(unsigned long)args->hashval,
191 (void *)((unsigned long)(args->inumber >> 32)),
192 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
193 (void *)args->dp, (void *)args->trans,
194 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
195 (void *)(long)s, NULL);
196}
197
198void
199xfs_dir2_trace_args_sb(
200 char *where,
201 xfs_da_args_t *args,
202 int s,
203 xfs_dabuf_t *bp)
204{
205 xfs_buf_t *dbp = bp ? bp->bps[0] : NULL;
206
207 xfs_dir2_trace_enter(args->dp, XFS_DIR2_KTRACE_ARGS_SB, where,
208 (char *)args->name, (int)args->namelen,
209 (void *)(unsigned long)args->hashval,
210 (void *)((unsigned long)(args->inumber >> 32)),
211 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
212 (void *)args->dp, (void *)args->trans,
213 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
214 (void *)(long)s, (void *)dbp);
215}
216#endif /* XFS_DIR2_TRACE */
diff --git a/fs/xfs/xfs_dir2_trace.h b/fs/xfs/xfs_dir2_trace.h
deleted file mode 100644
index ca3c754f4822..000000000000
--- a/fs/xfs/xfs_dir2_trace.h
+++ /dev/null
@@ -1,72 +0,0 @@
1/*
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_TRACE_H__
19#define __XFS_DIR2_TRACE_H__
20
21/*
22 * Tracing for xfs v2 directories.
23 */
24
25#if defined(XFS_DIR2_TRACE)
26
27struct ktrace;
28struct xfs_dabuf;
29struct xfs_da_args;
30
31#define XFS_DIR2_GTRACE_SIZE 4096 /* global buffer */
32#define XFS_DIR2_KTRACE_SIZE 32 /* per-inode buffer */
33extern struct ktrace *xfs_dir2_trace_buf;
34
35#define XFS_DIR2_KTRACE_ARGS 1 /* args only */
36#define XFS_DIR2_KTRACE_ARGS_B 2 /* args + buffer */
37#define XFS_DIR2_KTRACE_ARGS_BB 3 /* args + 2 buffers */
38#define XFS_DIR2_KTRACE_ARGS_DB 4 /* args, db, buffer */
39#define XFS_DIR2_KTRACE_ARGS_I 5 /* args, inum */
40#define XFS_DIR2_KTRACE_ARGS_S 6 /* args, int */
41#define XFS_DIR2_KTRACE_ARGS_SB 7 /* args, int, buffer */
42#define XFS_DIR2_KTRACE_ARGS_BIBII 8 /* args, buf/int/buf/int/int */
43
44void xfs_dir2_trace_args(char *where, struct xfs_da_args *args);
45void xfs_dir2_trace_args_b(char *where, struct xfs_da_args *args,
46 struct xfs_dabuf *bp);
47void xfs_dir2_trace_args_bb(char *where, struct xfs_da_args *args,
48 struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
49void xfs_dir2_trace_args_bibii(char *where, struct xfs_da_args *args,
50 struct xfs_dabuf *bs, int ss,
51 struct xfs_dabuf *bd, int sd, int c);
52void xfs_dir2_trace_args_db(char *where, struct xfs_da_args *args,
53 xfs_dir2_db_t db, struct xfs_dabuf *bp);
54void xfs_dir2_trace_args_i(char *where, struct xfs_da_args *args, xfs_ino_t i);
55void xfs_dir2_trace_args_s(char *where, struct xfs_da_args *args, int s);
56void xfs_dir2_trace_args_sb(char *where, struct xfs_da_args *args, int s,
57 struct xfs_dabuf *bp);
58
59#else /* XFS_DIR2_TRACE */
60
61#define xfs_dir2_trace_args(where, args)
62#define xfs_dir2_trace_args_b(where, args, bp)
63#define xfs_dir2_trace_args_bb(where, args, lbp, dbp)
64#define xfs_dir2_trace_args_bibii(where, args, bs, ss, bd, sd, c)
65#define xfs_dir2_trace_args_db(where, args, db, bp)
66#define xfs_dir2_trace_args_i(where, args, i)
67#define xfs_dir2_trace_args_s(where, args, s)
68#define xfs_dir2_trace_args_sb(where, args, s, bp)
69
70#endif /* XFS_DIR2_TRACE */
71
72#endif /* __XFS_DIR2_TRACE_H__ */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 05a4bdd4be39..6f35ed1b39b9 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -82,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
82 82
83 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); 83 log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format);
84 log_vector->i_len = size; 84 log_vector->i_len = size;
85 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT); 85 log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
86 ASSERT(size >= sizeof(xfs_efi_log_format_t)); 86 ASSERT(size >= sizeof(xfs_efi_log_format_t));
87} 87}
88 88
@@ -406,7 +406,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp,
406 406
407 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); 407 log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format);
408 log_vector->i_len = size; 408 log_vector->i_len = size;
409 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT); 409 log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
410 ASSERT(size >= sizeof(xfs_efd_log_format_t)); 410 ASSERT(size >= sizeof(xfs_efd_log_format_t));
411} 411}
412 412
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index edf8bdf4141f..390850ee6603 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -34,6 +34,7 @@
34#include "xfs_utils.h" 34#include "xfs_utils.h"
35#include "xfs_mru_cache.h" 35#include "xfs_mru_cache.h"
36#include "xfs_filestream.h" 36#include "xfs_filestream.h"
37#include "xfs_trace.h"
37 38
38#ifdef XFS_FILESTREAMS_TRACE 39#ifdef XFS_FILESTREAMS_TRACE
39 40
@@ -139,6 +140,7 @@ _xfs_filestream_pick_ag(
139 int flags, 140 int flags,
140 xfs_extlen_t minlen) 141 xfs_extlen_t minlen)
141{ 142{
143 int streams, max_streams;
142 int err, trylock, nscan; 144 int err, trylock, nscan;
143 xfs_extlen_t longest, free, minfree, maxfree = 0; 145 xfs_extlen_t longest, free, minfree, maxfree = 0;
144 xfs_agnumber_t ag, max_ag = NULLAGNUMBER; 146 xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
@@ -154,15 +156,15 @@ _xfs_filestream_pick_ag(
154 trylock = XFS_ALLOC_FLAG_TRYLOCK; 156 trylock = XFS_ALLOC_FLAG_TRYLOCK;
155 157
156 for (nscan = 0; 1; nscan++) { 158 for (nscan = 0; 1; nscan++) {
157 159 pag = xfs_perag_get(mp, ag);
158 TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag)); 160 TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));
159
160 pag = mp->m_perag + ag;
161 161
162 if (!pag->pagf_init) { 162 if (!pag->pagf_init) {
163 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); 163 err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
164 if (err && !trylock) 164 if (err && !trylock) {
165 xfs_perag_put(pag);
165 return err; 166 return err;
167 }
166 } 168 }
167 169
168 /* Might fail sometimes during the 1st pass with trylock set. */ 170 /* Might fail sometimes during the 1st pass with trylock set. */
@@ -172,6 +174,7 @@ _xfs_filestream_pick_ag(
172 /* Keep track of the AG with the most free blocks. */ 174 /* Keep track of the AG with the most free blocks. */
173 if (pag->pagf_freeblks > maxfree) { 175 if (pag->pagf_freeblks > maxfree) {
174 maxfree = pag->pagf_freeblks; 176 maxfree = pag->pagf_freeblks;
177 max_streams = atomic_read(&pag->pagf_fstrms);
175 max_ag = ag; 178 max_ag = ag;
176 } 179 }
177 180
@@ -194,6 +197,8 @@ _xfs_filestream_pick_ag(
194 197
195 /* Break out, retaining the reference on the AG. */ 198 /* Break out, retaining the reference on the AG. */
196 free = pag->pagf_freeblks; 199 free = pag->pagf_freeblks;
200 streams = atomic_read(&pag->pagf_fstrms);
201 xfs_perag_put(pag);
197 *agp = ag; 202 *agp = ag;
198 break; 203 break;
199 } 204 }
@@ -201,6 +206,7 @@ _xfs_filestream_pick_ag(
201 /* Drop the reference on this AG, it's not usable. */ 206 /* Drop the reference on this AG, it's not usable. */
202 xfs_filestream_put_ag(mp, ag); 207 xfs_filestream_put_ag(mp, ag);
203next_ag: 208next_ag:
209 xfs_perag_put(pag);
204 /* Move to the next AG, wrapping to AG 0 if necessary. */ 210 /* Move to the next AG, wrapping to AG 0 if necessary. */
205 if (++ag >= mp->m_sb.sb_agcount) 211 if (++ag >= mp->m_sb.sb_agcount)
206 ag = 0; 212 ag = 0;
@@ -228,6 +234,7 @@ next_ag:
228 if (max_ag != NULLAGNUMBER) { 234 if (max_ag != NULLAGNUMBER) {
229 xfs_filestream_get_ag(mp, max_ag); 235 xfs_filestream_get_ag(mp, max_ag);
230 TRACE_AG_PICK1(mp, max_ag, maxfree); 236 TRACE_AG_PICK1(mp, max_ag, maxfree);
237 streams = max_streams;
231 free = maxfree; 238 free = maxfree;
232 *agp = max_ag; 239 *agp = max_ag;
233 break; 240 break;
@@ -239,16 +246,14 @@ next_ag:
239 return 0; 246 return 0;
240 } 247 }
241 248
242 TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp), 249 TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags);
243 free, nscan, flags);
244 250
245 return 0; 251 return 0;
246} 252}
247 253
248/* 254/*
249 * Set the allocation group number for a file or a directory, updating inode 255 * Set the allocation group number for a file or a directory, updating inode
250 * references and per-AG references as appropriate. Must be called with the 256 * references and per-AG references as appropriate.
251 * m_peraglock held in read mode.
252 */ 257 */
253static int 258static int
254_xfs_filestream_update_ag( 259_xfs_filestream_update_ag(
@@ -394,9 +399,7 @@ xfs_filestream_init(void)
394 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); 399 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
395 if (!item_zone) 400 if (!item_zone)
396 return -ENOMEM; 401 return -ENOMEM;
397#ifdef XFS_FILESTREAMS_TRACE 402
398 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS);
399#endif
400 return 0; 403 return 0;
401} 404}
402 405
@@ -407,9 +410,6 @@ xfs_filestream_init(void)
407void 410void
408xfs_filestream_uninit(void) 411xfs_filestream_uninit(void)
409{ 412{
410#ifdef XFS_FILESTREAMS_TRACE
411 ktrace_free(xfs_filestreams_trace_buf);
412#endif
413 kmem_zone_destroy(item_zone); 413 kmem_zone_destroy(item_zone);
414} 414}
415 415
@@ -455,20 +455,6 @@ xfs_filestream_unmount(
455} 455}
456 456
457/* 457/*
458 * If the mount point's m_perag array is going to be reallocated, all
459 * outstanding cache entries must be flushed to avoid accessing reference count
460 * addresses that have been freed. The call to xfs_filestream_flush() must be
461 * made inside the block that holds the m_peraglock in write mode to do the
462 * reallocation.
463 */
464void
465xfs_filestream_flush(
466 xfs_mount_t *mp)
467{
468 xfs_mru_cache_flush(mp->m_filestream);
469}
470
471/*
472 * Return the AG of the filestream the file or directory belongs to, or 458 * Return the AG of the filestream the file or directory belongs to, or
473 * NULLAGNUMBER otherwise. 459 * NULLAGNUMBER otherwise.
474 */ 460 */
@@ -530,7 +516,6 @@ xfs_filestream_associate(
530 516
531 mp = pip->i_mount; 517 mp = pip->i_mount;
532 cache = mp->m_filestream; 518 cache = mp->m_filestream;
533 down_read(&mp->m_peraglock);
534 519
535 /* 520 /*
536 * We have a problem, Houston. 521 * We have a problem, Houston.
@@ -547,10 +532,8 @@ xfs_filestream_associate(
547 * 532 *
548 * So, if we can't get the iolock without sleeping then just give up 533 * So, if we can't get the iolock without sleeping then just give up
549 */ 534 */
550 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) { 535 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
551 up_read(&mp->m_peraglock);
552 return 1; 536 return 1;
553 }
554 537
555 /* If the parent directory is already in the cache, use its AG. */ 538 /* If the parent directory is already in the cache, use its AG. */
556 item = xfs_mru_cache_lookup(cache, pip->i_ino); 539 item = xfs_mru_cache_lookup(cache, pip->i_ino);
@@ -605,7 +588,6 @@ exit_did_pick:
605 588
606exit: 589exit:
607 xfs_iunlock(pip, XFS_IOLOCK_EXCL); 590 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
608 up_read(&mp->m_peraglock);
609 return -err; 591 return -err;
610} 592}
611 593
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index f655f7dc334c..260f757bbc5d 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -79,28 +79,49 @@ extern ktrace_t *xfs_filestreams_trace_buf;
79 * the cache that reference per-ag array elements that have since been 79 * the cache that reference per-ag array elements that have since been
80 * reallocated. 80 * reallocated.
81 */ 81 */
82STATIC_INLINE int 82/*
83 * xfs_filestream_peek_ag is only used in tracing code
84 */
85static inline int
83xfs_filestream_peek_ag( 86xfs_filestream_peek_ag(
84 xfs_mount_t *mp, 87 xfs_mount_t *mp,
85 xfs_agnumber_t agno) 88 xfs_agnumber_t agno)
86{ 89{
87 return atomic_read(&mp->m_perag[agno].pagf_fstrms); 90 struct xfs_perag *pag;
91 int ret;
92
93 pag = xfs_perag_get(mp, agno);
94 ret = atomic_read(&pag->pagf_fstrms);
95 xfs_perag_put(pag);
96 return ret;
88} 97}
89 98
90STATIC_INLINE int 99static inline int
91xfs_filestream_get_ag( 100xfs_filestream_get_ag(
92 xfs_mount_t *mp, 101 xfs_mount_t *mp,
93 xfs_agnumber_t agno) 102 xfs_agnumber_t agno)
94{ 103{
95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms); 104 struct xfs_perag *pag;
105 int ret;
106
107 pag = xfs_perag_get(mp, agno);
108 ret = atomic_inc_return(&pag->pagf_fstrms);
109 xfs_perag_put(pag);
110 return ret;
96} 111}
97 112
98STATIC_INLINE int 113static inline int
99xfs_filestream_put_ag( 114xfs_filestream_put_ag(
100 xfs_mount_t *mp, 115 xfs_mount_t *mp,
101 xfs_agnumber_t agno) 116 xfs_agnumber_t agno)
102{ 117{
103 return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms); 118 struct xfs_perag *pag;
119 int ret;
120
121 pag = xfs_perag_get(mp, agno);
122 ret = atomic_dec_return(&pag->pagf_fstrms);
123 xfs_perag_put(pag);
124 return ret;
104} 125}
105 126
106/* allocation selection flags */ 127/* allocation selection flags */
@@ -114,7 +135,6 @@ int xfs_filestream_init(void);
114void xfs_filestream_uninit(void); 135void xfs_filestream_uninit(void);
115int xfs_filestream_mount(struct xfs_mount *mp); 136int xfs_filestream_mount(struct xfs_mount *mp);
116void xfs_filestream_unmount(struct xfs_mount *mp); 137void xfs_filestream_unmount(struct xfs_mount *mp);
117void xfs_filestream_flush(struct xfs_mount *mp);
118xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); 138xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);
119int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); 139int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);
120void xfs_filestream_deassociate(struct xfs_inode *ip); 140void xfs_filestream_deassociate(struct xfs_inode *ip);
@@ -122,7 +142,7 @@ int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
122 142
123 143
124/* filestreams for the inode? */ 144/* filestreams for the inode? */
125STATIC_INLINE int 145static inline int
126xfs_inode_is_filestream( 146xfs_inode_is_filestream(
127 struct xfs_inode *ip) 147 struct xfs_inode *ip)
128{ 148{
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index f52ac276277e..7cf7220e7d5f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -292,7 +292,8 @@ typedef struct xfs_bstat {
292 __s32 bs_extents; /* number of extents */ 292 __s32 bs_extents; /* number of extents */
293 __u32 bs_gen; /* generation count */ 293 __u32 bs_gen; /* generation count */
294 __u16 bs_projid; /* project id */ 294 __u16 bs_projid; /* project id */
295 unsigned char bs_pad[14]; /* pad space, unused */ 295 __u16 bs_forkoff; /* inode fork offset in bytes */
296 unsigned char bs_pad[12]; /* pad space, unused */
296 __u32 bs_dmevmask; /* DMIG event mask */ 297 __u32 bs_dmevmask; /* DMIG event mask */
297 __u16 bs_dmstate; /* DMIG state info */ 298 __u16 bs_dmstate; /* DMIG state info */
298 __u16 bs_aextents; /* attribute number of extents */ 299 __u16 bs_aextents; /* attribute number of extents */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 2d0b3e1da9e6..37a6f62c57b6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -45,6 +45,7 @@
45#include "xfs_rtalloc.h" 45#include "xfs_rtalloc.h"
46#include "xfs_rw.h" 46#include "xfs_rw.h"
47#include "xfs_filestream.h" 47#include "xfs_filestream.h"
48#include "xfs_trace.h"
48 49
49/* 50/*
50 * File system operations 51 * File system operations
@@ -166,27 +167,14 @@ xfs_growfs_data_private(
166 } 167 }
167 new = nb - mp->m_sb.sb_dblocks; 168 new = nb - mp->m_sb.sb_dblocks;
168 oagcount = mp->m_sb.sb_agcount; 169 oagcount = mp->m_sb.sb_agcount;
169 if (nagcount > oagcount) {
170 void *new_perag, *old_perag;
171
172 xfs_filestream_flush(mp);
173
174 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
175 KM_MAYFAIL);
176 if (!new_perag)
177 return XFS_ERROR(ENOMEM);
178
179 down_write(&mp->m_peraglock);
180 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
181 old_perag = mp->m_perag;
182 mp->m_perag = new_perag;
183
184 mp->m_flags |= XFS_MOUNT_32BITINODES;
185 nagimax = xfs_initialize_perag(mp, nagcount);
186 up_write(&mp->m_peraglock);
187 170
188 kmem_free(old_perag); 171 /* allocate the new per-ag structures */
172 if (nagcount > oagcount) {
173 error = xfs_initialize_perag(mp, nagcount, &nagimax);
174 if (error)
175 return error;
189 } 176 }
177
190 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 178 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
191 tp->t_flags |= XFS_TRANS_RESERVE; 179 tp->t_flags |= XFS_TRANS_RESERVE;
192 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), 180 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
@@ -195,14 +183,19 @@ xfs_growfs_data_private(
195 return error; 183 return error;
196 } 184 }
197 185
186 /*
187 * Write new AG headers to disk. Non-transactional, but written
188 * synchronously so they are completed prior to the growfs transaction
189 * being logged.
190 */
198 nfree = 0; 191 nfree = 0;
199 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { 192 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
200 /* 193 /*
201 * AG freelist header block 194 * AG freelist header block
202 */ 195 */
203 bp = xfs_buf_get(mp->m_ddev_targp, 196 bp = xfs_buf_get(mp->m_ddev_targp,
204 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 197 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
205 XFS_FSS_TO_BB(mp, 1), 0); 198 XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
206 agf = XFS_BUF_TO_AGF(bp); 199 agf = XFS_BUF_TO_AGF(bp);
207 memset(agf, 0, mp->m_sb.sb_sectsize); 200 memset(agf, 0, mp->m_sb.sb_sectsize);
208 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); 201 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
@@ -233,8 +226,8 @@ xfs_growfs_data_private(
233 * AG inode header block 226 * AG inode header block
234 */ 227 */
235 bp = xfs_buf_get(mp->m_ddev_targp, 228 bp = xfs_buf_get(mp->m_ddev_targp,
236 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 229 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
237 XFS_FSS_TO_BB(mp, 1), 0); 230 XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
238 agi = XFS_BUF_TO_AGI(bp); 231 agi = XFS_BUF_TO_AGI(bp);
239 memset(agi, 0, mp->m_sb.sb_sectsize); 232 memset(agi, 0, mp->m_sb.sb_sectsize);
240 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); 233 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
@@ -257,8 +250,9 @@ xfs_growfs_data_private(
257 * BNO btree root block 250 * BNO btree root block
258 */ 251 */
259 bp = xfs_buf_get(mp->m_ddev_targp, 252 bp = xfs_buf_get(mp->m_ddev_targp,
260 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), 253 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
261 BTOBB(mp->m_sb.sb_blocksize), 0); 254 BTOBB(mp->m_sb.sb_blocksize),
255 XBF_LOCK | XBF_MAPPED);
262 block = XFS_BUF_TO_BLOCK(bp); 256 block = XFS_BUF_TO_BLOCK(bp);
263 memset(block, 0, mp->m_sb.sb_blocksize); 257 memset(block, 0, mp->m_sb.sb_blocksize);
264 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); 258 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
@@ -278,8 +272,9 @@ xfs_growfs_data_private(
278 * CNT btree root block 272 * CNT btree root block
279 */ 273 */
280 bp = xfs_buf_get(mp->m_ddev_targp, 274 bp = xfs_buf_get(mp->m_ddev_targp,
281 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), 275 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
282 BTOBB(mp->m_sb.sb_blocksize), 0); 276 BTOBB(mp->m_sb.sb_blocksize),
277 XBF_LOCK | XBF_MAPPED);
283 block = XFS_BUF_TO_BLOCK(bp); 278 block = XFS_BUF_TO_BLOCK(bp);
284 memset(block, 0, mp->m_sb.sb_blocksize); 279 memset(block, 0, mp->m_sb.sb_blocksize);
285 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); 280 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
@@ -300,8 +295,9 @@ xfs_growfs_data_private(
300 * INO btree root block 295 * INO btree root block
301 */ 296 */
302 bp = xfs_buf_get(mp->m_ddev_targp, 297 bp = xfs_buf_get(mp->m_ddev_targp,
303 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), 298 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
304 BTOBB(mp->m_sb.sb_blocksize), 0); 299 BTOBB(mp->m_sb.sb_blocksize),
300 XBF_LOCK | XBF_MAPPED);
305 block = XFS_BUF_TO_BLOCK(bp); 301 block = XFS_BUF_TO_BLOCK(bp);
306 memset(block, 0, mp->m_sb.sb_blocksize); 302 memset(block, 0, mp->m_sb.sb_blocksize);
307 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); 303 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
@@ -344,6 +340,7 @@ xfs_growfs_data_private(
344 be32_add_cpu(&agf->agf_length, new); 340 be32_add_cpu(&agf->agf_length, new);
345 ASSERT(be32_to_cpu(agf->agf_length) == 341 ASSERT(be32_to_cpu(agf->agf_length) ==
346 be32_to_cpu(agi->agi_length)); 342 be32_to_cpu(agi->agi_length));
343
347 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); 344 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
348 /* 345 /*
349 * Free the new space. 346 * Free the new space.
@@ -354,6 +351,12 @@ xfs_growfs_data_private(
354 goto error0; 351 goto error0;
355 } 352 }
356 } 353 }
354
355 /*
356 * Update changed superblock fields transactionally. These are not
357 * seen by the rest of the world until the transaction commit applies
358 * them atomically to the superblock.
359 */
357 if (nagcount > oagcount) 360 if (nagcount > oagcount)
358 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); 361 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
359 if (nb > mp->m_sb.sb_dblocks) 362 if (nb > mp->m_sb.sb_dblocks)
@@ -364,9 +367,9 @@ xfs_growfs_data_private(
364 if (dpct) 367 if (dpct)
365 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 368 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
366 error = xfs_trans_commit(tp, 0); 369 error = xfs_trans_commit(tp, 0);
367 if (error) { 370 if (error)
368 return error; 371 return error;
369 } 372
370 /* New allocation groups fully initialized, so update mount struct */ 373 /* New allocation groups fully initialized, so update mount struct */
371 if (nagimax) 374 if (nagimax)
372 mp->m_maxagi = nagimax; 375 mp->m_maxagi = nagimax;
@@ -376,6 +379,8 @@ xfs_growfs_data_private(
376 mp->m_maxicount = icount << mp->m_sb.sb_inopblog; 379 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
377 } else 380 } else
378 mp->m_maxicount = 0; 381 mp->m_maxicount = 0;
382
383 /* update secondary superblocks. */
379 for (agno = 1; agno < nagcount; agno++) { 384 for (agno = 1; agno < nagcount; agno++) {
380 error = xfs_read_buf(mp, mp->m_ddev_targp, 385 error = xfs_read_buf(mp, mp->m_ddev_targp,
381 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 386 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
@@ -611,7 +616,7 @@ xfs_fs_log_dummy(
611 xfs_inode_t *ip; 616 xfs_inode_t *ip;
612 int error; 617 int error;
613 618
614 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); 619 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
615 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); 620 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
616 if (error) { 621 if (error) {
617 xfs_trans_cancel(tp, 0); 622 xfs_trans_cancel(tp, 0);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 0785797db828..9d884c127bb9 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -205,7 +205,7 @@ xfs_ialloc_inode_init(
205 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); 205 d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
206 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 206 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
207 mp->m_bsize * blks_per_cluster, 207 mp->m_bsize * blks_per_cluster,
208 XFS_BUF_LOCK); 208 XBF_LOCK);
209 ASSERT(fbuf); 209 ASSERT(fbuf);
210 ASSERT(!XFS_BUF_GETERROR(fbuf)); 210 ASSERT(!XFS_BUF_GETERROR(fbuf));
211 211
@@ -253,6 +253,7 @@ xfs_ialloc_ag_alloc(
253 xfs_agino_t thisino; /* current inode number, for loop */ 253 xfs_agino_t thisino; /* current inode number, for loop */
254 int isaligned = 0; /* inode allocation at stripe unit */ 254 int isaligned = 0; /* inode allocation at stripe unit */
255 /* boundary */ 255 /* boundary */
256 struct xfs_perag *pag;
256 257
257 args.tp = tp; 258 args.tp = tp;
258 args.mp = tp->t_mountp; 259 args.mp = tp->t_mountp;
@@ -382,9 +383,9 @@ xfs_ialloc_ag_alloc(
382 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 383 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
383 be32_add_cpu(&agi->agi_count, newlen); 384 be32_add_cpu(&agi->agi_count, newlen);
384 be32_add_cpu(&agi->agi_freecount, newlen); 385 be32_add_cpu(&agi->agi_freecount, newlen);
385 down_read(&args.mp->m_peraglock); 386 pag = xfs_perag_get(args.mp, agno);
386 args.mp->m_perag[agno].pagi_freecount += newlen; 387 pag->pagi_freecount += newlen;
387 up_read(&args.mp->m_peraglock); 388 xfs_perag_put(pag);
388 agi->agi_newino = cpu_to_be32(newino); 389 agi->agi_newino = cpu_to_be32(newino);
389 390
390 /* 391 /*
@@ -425,7 +426,7 @@ xfs_ialloc_ag_alloc(
425 return 0; 426 return 0;
426} 427}
427 428
428STATIC_INLINE xfs_agnumber_t 429STATIC xfs_agnumber_t
429xfs_ialloc_next_ag( 430xfs_ialloc_next_ag(
430 xfs_mount_t *mp) 431 xfs_mount_t *mp)
431{ 432{
@@ -486,9 +487,8 @@ xfs_ialloc_ag_select(
486 */ 487 */
487 agno = pagno; 488 agno = pagno;
488 flags = XFS_ALLOC_FLAG_TRYLOCK; 489 flags = XFS_ALLOC_FLAG_TRYLOCK;
489 down_read(&mp->m_peraglock);
490 for (;;) { 490 for (;;) {
491 pag = &mp->m_perag[agno]; 491 pag = xfs_perag_get(mp, agno);
492 if (!pag->pagi_init) { 492 if (!pag->pagi_init) {
493 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { 493 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
494 agbp = NULL; 494 agbp = NULL;
@@ -527,7 +527,7 @@ xfs_ialloc_ag_select(
527 agbp = NULL; 527 agbp = NULL;
528 goto nextag; 528 goto nextag;
529 } 529 }
530 up_read(&mp->m_peraglock); 530 xfs_perag_put(pag);
531 return agbp; 531 return agbp;
532 } 532 }
533 } 533 }
@@ -535,22 +535,19 @@ unlock_nextag:
535 if (agbp) 535 if (agbp)
536 xfs_trans_brelse(tp, agbp); 536 xfs_trans_brelse(tp, agbp);
537nextag: 537nextag:
538 xfs_perag_put(pag);
538 /* 539 /*
539 * No point in iterating over the rest, if we're shutting 540 * No point in iterating over the rest, if we're shutting
540 * down. 541 * down.
541 */ 542 */
542 if (XFS_FORCED_SHUTDOWN(mp)) { 543 if (XFS_FORCED_SHUTDOWN(mp))
543 up_read(&mp->m_peraglock);
544 return NULL; 544 return NULL;
545 }
546 agno++; 545 agno++;
547 if (agno >= agcount) 546 if (agno >= agcount)
548 agno = 0; 547 agno = 0;
549 if (agno == pagno) { 548 if (agno == pagno) {
550 if (flags == 0) { 549 if (flags == 0)
551 up_read(&mp->m_peraglock);
552 return NULL; 550 return NULL;
553 }
554 flags = 0; 551 flags = 0;
555 } 552 }
556 } 553 }
@@ -672,6 +669,7 @@ xfs_dialloc(
672 xfs_agnumber_t tagno; /* testing allocation group number */ 669 xfs_agnumber_t tagno; /* testing allocation group number */
673 xfs_btree_cur_t *tcur; /* temp cursor */ 670 xfs_btree_cur_t *tcur; /* temp cursor */
674 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ 671 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
672 struct xfs_perag *pag;
675 673
676 674
677 if (*IO_agbp == NULL) { 675 if (*IO_agbp == NULL) {
@@ -771,13 +769,13 @@ nextag:
771 *inop = NULLFSINO; 769 *inop = NULLFSINO;
772 return noroom ? ENOSPC : 0; 770 return noroom ? ENOSPC : 0;
773 } 771 }
774 down_read(&mp->m_peraglock); 772 pag = xfs_perag_get(mp, tagno);
775 if (mp->m_perag[tagno].pagi_inodeok == 0) { 773 if (pag->pagi_inodeok == 0) {
776 up_read(&mp->m_peraglock); 774 xfs_perag_put(pag);
777 goto nextag; 775 goto nextag;
778 } 776 }
779 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); 777 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
780 up_read(&mp->m_peraglock); 778 xfs_perag_put(pag);
781 if (error) 779 if (error)
782 goto nextag; 780 goto nextag;
783 agi = XFS_BUF_TO_AGI(agbp); 781 agi = XFS_BUF_TO_AGI(agbp);
@@ -790,6 +788,7 @@ nextag:
790 */ 788 */
791 agno = tagno; 789 agno = tagno;
792 *IO_agbp = NULL; 790 *IO_agbp = NULL;
791 pag = xfs_perag_get(mp, agno);
793 792
794 restart_pagno: 793 restart_pagno:
795 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); 794 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
@@ -808,7 +807,6 @@ nextag:
808 * If in the same AG as the parent, try to get near the parent. 807 * If in the same AG as the parent, try to get near the parent.
809 */ 808 */
810 if (pagno == agno) { 809 if (pagno == agno) {
811 xfs_perag_t *pag = &mp->m_perag[agno];
812 int doneleft; /* done, to the left */ 810 int doneleft; /* done, to the left */
813 int doneright; /* done, to the right */ 811 int doneright; /* done, to the right */
814 int searchdistance = 10; 812 int searchdistance = 10;
@@ -1006,9 +1004,7 @@ alloc_inode:
1006 goto error0; 1004 goto error0;
1007 be32_add_cpu(&agi->agi_freecount, -1); 1005 be32_add_cpu(&agi->agi_freecount, -1);
1008 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1006 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1009 down_read(&mp->m_peraglock); 1007 pag->pagi_freecount--;
1010 mp->m_perag[tagno].pagi_freecount--;
1011 up_read(&mp->m_peraglock);
1012 1008
1013 error = xfs_check_agi_freecount(cur, agi); 1009 error = xfs_check_agi_freecount(cur, agi);
1014 if (error) 1010 if (error)
@@ -1016,12 +1012,14 @@ alloc_inode:
1016 1012
1017 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1013 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1018 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 1014 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
1015 xfs_perag_put(pag);
1019 *inop = ino; 1016 *inop = ino;
1020 return 0; 1017 return 0;
1021error1: 1018error1:
1022 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 1019 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1023error0: 1020error0:
1024 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1021 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1022 xfs_perag_put(pag);
1025 return error; 1023 return error;
1026} 1024}
1027 1025
@@ -1052,6 +1050,7 @@ xfs_difree(
1052 xfs_mount_t *mp; /* mount structure for filesystem */ 1050 xfs_mount_t *mp; /* mount structure for filesystem */
1053 int off; /* offset of inode in inode chunk */ 1051 int off; /* offset of inode in inode chunk */
1054 xfs_inobt_rec_incore_t rec; /* btree record */ 1052 xfs_inobt_rec_incore_t rec; /* btree record */
1053 struct xfs_perag *pag;
1055 1054
1056 mp = tp->t_mountp; 1055 mp = tp->t_mountp;
1057 1056
@@ -1088,9 +1087,7 @@ xfs_difree(
1088 /* 1087 /*
1089 * Get the allocation group header. 1088 * Get the allocation group header.
1090 */ 1089 */
1091 down_read(&mp->m_peraglock);
1092 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1090 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1093 up_read(&mp->m_peraglock);
1094 if (error) { 1091 if (error) {
1095 cmn_err(CE_WARN, 1092 cmn_err(CE_WARN,
1096 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", 1093 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
@@ -1157,9 +1154,9 @@ xfs_difree(
1157 be32_add_cpu(&agi->agi_count, -ilen); 1154 be32_add_cpu(&agi->agi_count, -ilen);
1158 be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1155 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
1159 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1156 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
1160 down_read(&mp->m_peraglock); 1157 pag = xfs_perag_get(mp, agno);
1161 mp->m_perag[agno].pagi_freecount -= ilen - 1; 1158 pag->pagi_freecount -= ilen - 1;
1162 up_read(&mp->m_peraglock); 1159 xfs_perag_put(pag);
1163 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1160 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1164 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1161 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1165 1162
@@ -1188,9 +1185,9 @@ xfs_difree(
1188 */ 1185 */
1189 be32_add_cpu(&agi->agi_freecount, 1); 1186 be32_add_cpu(&agi->agi_freecount, 1);
1190 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1187 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1191 down_read(&mp->m_peraglock); 1188 pag = xfs_perag_get(mp, agno);
1192 mp->m_perag[agno].pagi_freecount++; 1189 pag->pagi_freecount++;
1193 up_read(&mp->m_peraglock); 1190 xfs_perag_put(pag);
1194 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1191 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1195 } 1192 }
1196 1193
@@ -1312,9 +1309,7 @@ xfs_imap(
1312 xfs_buf_t *agbp; /* agi buffer */ 1309 xfs_buf_t *agbp; /* agi buffer */
1313 int i; /* temp state */ 1310 int i; /* temp state */
1314 1311
1315 down_read(&mp->m_peraglock);
1316 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1312 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1317 up_read(&mp->m_peraglock);
1318 if (error) { 1313 if (error) {
1319 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1314 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1320 "xfs_ialloc_read_agi() returned " 1315 "xfs_ialloc_read_agi() returned "
@@ -1379,7 +1374,6 @@ xfs_imap(
1379 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1374 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1380 return XFS_ERROR(EINVAL); 1375 return XFS_ERROR(EINVAL);
1381 } 1376 }
1382
1383 return 0; 1377 return 0;
1384} 1378}
1385 1379
@@ -1523,8 +1517,7 @@ xfs_ialloc_read_agi(
1523 return error; 1517 return error;
1524 1518
1525 agi = XFS_BUF_TO_AGI(*bpp); 1519 agi = XFS_BUF_TO_AGI(*bpp);
1526 pag = &mp->m_perag[agno]; 1520 pag = xfs_perag_get(mp, agno);
1527
1528 if (!pag->pagi_init) { 1521 if (!pag->pagi_init) {
1529 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1522 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1530 pag->pagi_count = be32_to_cpu(agi->agi_count); 1523 pag->pagi_count = be32_to_cpu(agi->agi_count);
@@ -1537,6 +1530,7 @@ xfs_ialloc_read_agi(
1537 */ 1530 */
1538 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 1531 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1539 XFS_FORCED_SHUTDOWN(mp)); 1532 XFS_FORCED_SHUTDOWN(mp));
1533 xfs_perag_put(pag);
1540 return 0; 1534 return 0;
1541} 1535}
1542 1536
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 80e526489be5..6845db90818f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -43,7 +43,7 @@
43#include "xfs_inode_item.h" 43#include "xfs_inode_item.h"
44#include "xfs_bmap.h" 44#include "xfs_bmap.h"
45#include "xfs_btree_trace.h" 45#include "xfs_btree_trace.h"
46#include "xfs_dir2_trace.h" 46#include "xfs_trace.h"
47 47
48 48
49/* 49/*
@@ -74,6 +74,8 @@ xfs_inode_alloc(
74 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 74 ASSERT(!spin_is_locked(&ip->i_flags_lock));
75 ASSERT(completion_done(&ip->i_flush)); 75 ASSERT(completion_done(&ip->i_flush));
76 76
77 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
78
77 /* initialise the xfs inode */ 79 /* initialise the xfs inode */
78 ip->i_ino = ino; 80 ip->i_ino = ino;
79 ip->i_mount = mp; 81 ip->i_mount = mp;
@@ -87,30 +89,8 @@ xfs_inode_alloc(
87 ip->i_size = 0; 89 ip->i_size = 0;
88 ip->i_new_size = 0; 90 ip->i_new_size = 0;
89 91
90 /*
91 * Initialize inode's trace buffers.
92 */
93#ifdef XFS_INODE_TRACE
94 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
95#endif
96#ifdef XFS_BMAP_TRACE
97 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
98#endif
99#ifdef XFS_BTREE_TRACE
100 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
101#endif
102#ifdef XFS_RW_TRACE
103 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
104#endif
105#ifdef XFS_ILOCK_TRACE
106 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
107#endif
108#ifdef XFS_DIR2_TRACE
109 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
110#endif
111
112 /* prevent anyone from using this yet */ 92 /* prevent anyone from using this yet */
113 VFS_I(ip)->i_state = I_NEW|I_LOCK; 93 VFS_I(ip)->i_state = I_NEW;
114 94
115 return ip; 95 return ip;
116} 96}
@@ -130,25 +110,6 @@ xfs_inode_free(
130 if (ip->i_afp) 110 if (ip->i_afp)
131 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 111 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
132 112
133#ifdef XFS_INODE_TRACE
134 ktrace_free(ip->i_trace);
135#endif
136#ifdef XFS_BMAP_TRACE
137 ktrace_free(ip->i_xtrace);
138#endif
139#ifdef XFS_BTREE_TRACE
140 ktrace_free(ip->i_btrace);
141#endif
142#ifdef XFS_RW_TRACE
143 ktrace_free(ip->i_rwtrace);
144#endif
145#ifdef XFS_ILOCK_TRACE
146 ktrace_free(ip->i_lock_trace);
147#endif
148#ifdef XFS_DIR2_TRACE
149 ktrace_free(ip->i_dir_trace);
150#endif
151
152 if (ip->i_itemp) { 113 if (ip->i_itemp) {
153 /* 114 /*
154 * Only if we are shutting down the fs will we see an 115 * Only if we are shutting down the fs will we see an
@@ -207,6 +168,7 @@ xfs_iget_cache_hit(
207 * instead of polling for it. 168 * instead of polling for it.
208 */ 169 */
209 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 170 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
171 trace_xfs_iget_skip(ip);
210 XFS_STATS_INC(xs_ig_frecycle); 172 XFS_STATS_INC(xs_ig_frecycle);
211 error = EAGAIN; 173 error = EAGAIN;
212 goto out_error; 174 goto out_error;
@@ -225,16 +187,15 @@ xfs_iget_cache_hit(
225 * Need to carefully get it back into useable state. 187 * Need to carefully get it back into useable state.
226 */ 188 */
227 if (ip->i_flags & XFS_IRECLAIMABLE) { 189 if (ip->i_flags & XFS_IRECLAIMABLE) {
228 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 190 trace_xfs_iget_reclaim(ip);
229 191
230 /* 192 /*
231 * We need to set XFS_INEW atomically with clearing the 193 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
232 * reclaimable tag so that we do have an indicator of the 194 * from stomping over us while we recycle the inode. We can't
233 * inode still being initialized. 195 * clear the radix tree reclaimable tag yet as it requires
196 * pag_ici_lock to be held exclusive.
234 */ 197 */
235 ip->i_flags |= XFS_INEW; 198 ip->i_flags |= XFS_IRECLAIM;
236 ip->i_flags &= ~XFS_IRECLAIMABLE;
237 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
238 199
239 spin_unlock(&ip->i_flags_lock); 200 spin_unlock(&ip->i_flags_lock);
240 read_unlock(&pag->pag_ici_lock); 201 read_unlock(&pag->pag_ici_lock);
@@ -251,9 +212,18 @@ xfs_iget_cache_hit(
251 ip->i_flags &= ~XFS_INEW; 212 ip->i_flags &= ~XFS_INEW;
252 ip->i_flags |= XFS_IRECLAIMABLE; 213 ip->i_flags |= XFS_IRECLAIMABLE;
253 __xfs_inode_set_reclaim_tag(pag, ip); 214 __xfs_inode_set_reclaim_tag(pag, ip);
215 trace_xfs_iget_reclaim(ip);
254 goto out_error; 216 goto out_error;
255 } 217 }
256 inode->i_state = I_LOCK|I_NEW; 218
219 write_lock(&pag->pag_ici_lock);
220 spin_lock(&ip->i_flags_lock);
221 ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
222 ip->i_flags |= XFS_INEW;
223 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
224 inode->i_state = I_NEW;
225 spin_unlock(&ip->i_flags_lock);
226 write_unlock(&pag->pag_ici_lock);
257 } else { 227 } else {
258 /* If the VFS inode is being torn down, pause and try again. */ 228 /* If the VFS inode is being torn down, pause and try again. */
259 if (!igrab(inode)) { 229 if (!igrab(inode)) {
@@ -270,8 +240,9 @@ xfs_iget_cache_hit(
270 xfs_ilock(ip, lock_flags); 240 xfs_ilock(ip, lock_flags);
271 241
272 xfs_iflags_clear(ip, XFS_ISTALE); 242 xfs_iflags_clear(ip, XFS_ISTALE);
273 xfs_itrace_exit_tag(ip, "xfs_iget.found");
274 XFS_STATS_INC(xs_ig_found); 243 XFS_STATS_INC(xs_ig_found);
244
245 trace_xfs_iget_found(ip);
275 return 0; 246 return 0;
276 247
277out_error: 248out_error:
@@ -290,7 +261,7 @@ xfs_iget_cache_miss(
290 struct xfs_inode **ipp, 261 struct xfs_inode **ipp,
291 xfs_daddr_t bno, 262 xfs_daddr_t bno,
292 int flags, 263 int flags,
293 int lock_flags) __releases(pag->pag_ici_lock) 264 int lock_flags)
294{ 265{
295 struct xfs_inode *ip; 266 struct xfs_inode *ip;
296 int error; 267 int error;
@@ -305,7 +276,7 @@ xfs_iget_cache_miss(
305 if (error) 276 if (error)
306 goto out_destroy; 277 goto out_destroy;
307 278
308 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 279 xfs_itrace_entry(ip);
309 280
310 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 281 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
311 error = ENOENT; 282 error = ENOENT;
@@ -350,6 +321,8 @@ xfs_iget_cache_miss(
350 321
351 write_unlock(&pag->pag_ici_lock); 322 write_unlock(&pag->pag_ici_lock);
352 radix_tree_preload_end(); 323 radix_tree_preload_end();
324
325 trace_xfs_iget_alloc(ip);
353 *ipp = ip; 326 *ipp = ip;
354 return 0; 327 return 0;
355 328
@@ -408,7 +381,7 @@ xfs_iget(
408 return EINVAL; 381 return EINVAL;
409 382
410 /* get the perag structure and ensure that it's inode capable */ 383 /* get the perag structure and ensure that it's inode capable */
411 pag = xfs_get_perag(mp, ino); 384 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
412 if (!pag->pagi_inodeok) 385 if (!pag->pagi_inodeok)
413 return EINVAL; 386 return EINVAL;
414 ASSERT(pag->pag_ici_init); 387 ASSERT(pag->pag_ici_init);
@@ -432,7 +405,7 @@ again:
432 if (error) 405 if (error)
433 goto out_error_or_again; 406 goto out_error_or_again;
434 } 407 }
435 xfs_put_perag(mp, pag); 408 xfs_perag_put(pag);
436 409
437 *ipp = ip; 410 *ipp = ip;
438 411
@@ -451,7 +424,7 @@ out_error_or_again:
451 delay(1); 424 delay(1);
452 goto again; 425 goto again;
453 } 426 }
454 xfs_put_perag(mp, pag); 427 xfs_perag_put(pag);
455 return error; 428 return error;
456} 429}
457 430
@@ -511,19 +484,23 @@ xfs_ireclaim(
511{ 484{
512 struct xfs_mount *mp = ip->i_mount; 485 struct xfs_mount *mp = ip->i_mount;
513 struct xfs_perag *pag; 486 struct xfs_perag *pag;
487 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
514 488
515 XFS_STATS_INC(xs_ig_reclaims); 489 XFS_STATS_INC(xs_ig_reclaims);
516 490
517 /* 491 /*
518 * Remove the inode from the per-AG radix tree. It doesn't matter 492 * Remove the inode from the per-AG radix tree.
519 * if it was never added to it because radix_tree_delete can deal 493 *
520 * with that case just fine. 494 * Because radix_tree_delete won't complain even if the item was never
495 * added to the tree assert that it's been there before to catch
496 * problems with the inode life time early on.
521 */ 497 */
522 pag = xfs_get_perag(mp, ip->i_ino); 498 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
523 write_lock(&pag->pag_ici_lock); 499 write_lock(&pag->pag_ici_lock);
524 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 500 if (!radix_tree_delete(&pag->pag_ici_root, agino))
501 ASSERT(0);
525 write_unlock(&pag->pag_ici_lock); 502 write_unlock(&pag->pag_ici_lock);
526 xfs_put_perag(mp, pag); 503 xfs_perag_put(pag);
527 504
528 /* 505 /*
529 * Here we do an (almost) spurious inode lock in order to coordinate 506 * Here we do an (almost) spurious inode lock in order to coordinate
@@ -636,7 +613,7 @@ xfs_ilock(
636 else if (lock_flags & XFS_ILOCK_SHARED) 613 else if (lock_flags & XFS_ILOCK_SHARED)
637 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 614 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
638 615
639 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); 616 trace_xfs_ilock(ip, lock_flags, _RET_IP_);
640} 617}
641 618
642/* 619/*
@@ -681,7 +658,7 @@ xfs_ilock_nowait(
681 if (!mrtryaccess(&ip->i_lock)) 658 if (!mrtryaccess(&ip->i_lock))
682 goto out_undo_iolock; 659 goto out_undo_iolock;
683 } 660 }
684 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); 661 trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
685 return 1; 662 return 1;
686 663
687 out_undo_iolock: 664 out_undo_iolock:
@@ -743,7 +720,7 @@ xfs_iunlock(
743 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, 720 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
744 (xfs_log_item_t*)(ip->i_itemp)); 721 (xfs_log_item_t*)(ip->i_itemp));
745 } 722 }
746 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 723 trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
747} 724}
748 725
749/* 726/*
@@ -762,6 +739,8 @@ xfs_ilock_demote(
762 mrdemote(&ip->i_lock); 739 mrdemote(&ip->i_lock);
763 if (lock_flags & XFS_IOLOCK_EXCL) 740 if (lock_flags & XFS_IOLOCK_EXCL)
764 mrdemote(&ip->i_iolock); 741 mrdemote(&ip->i_iolock);
742
743 trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
765} 744}
766 745
767#ifdef DEBUG 746#ifdef DEBUG
@@ -792,52 +771,3 @@ xfs_isilocked(
792 return 1; 771 return 1;
793} 772}
794#endif 773#endif
795
796#ifdef XFS_INODE_TRACE
797
798#define KTRACE_ENTER(ip, vk, s, line, ra) \
799 ktrace_enter((ip)->i_trace, \
800/* 0 */ (void *)(__psint_t)(vk), \
801/* 1 */ (void *)(s), \
802/* 2 */ (void *)(__psint_t) line, \
803/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
804/* 4 */ (void *)(ra), \
805/* 5 */ NULL, \
806/* 6 */ (void *)(__psint_t)current_cpu(), \
807/* 7 */ (void *)(__psint_t)current_pid(), \
808/* 8 */ (void *)__return_address, \
809/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
810
811/*
812 * Vnode tracing code.
813 */
814void
815_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
816{
817 KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
818}
819
820void
821_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
822{
823 KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
824}
825
826void
827xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
828{
829 KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
830}
831
832void
833_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
834{
835 KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
836}
837
838void
839xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
840{
841 KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
842}
843#endif /* XFS_INODE_TRACE */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b92a4fa2a0a1..0ffd56447045 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -47,10 +47,10 @@
47#include "xfs_rw.h" 47#include "xfs_rw.h"
48#include "xfs_error.h" 48#include "xfs_error.h"
49#include "xfs_utils.h" 49#include "xfs_utils.h"
50#include "xfs_dir2_trace.h"
51#include "xfs_quota.h" 50#include "xfs_quota.h"
52#include "xfs_filestream.h" 51#include "xfs_filestream.h"
53#include "xfs_vnodeops.h" 52#include "xfs_vnodeops.h"
53#include "xfs_trace.h"
54 54
55kmem_zone_t *xfs_ifork_zone; 55kmem_zone_t *xfs_ifork_zone;
56kmem_zone_t *xfs_inode_zone; 56kmem_zone_t *xfs_inode_zone;
@@ -151,7 +151,7 @@ xfs_imap_to_bp(
151 "an error %d on %s. Returning error.", 151 "an error %d on %s. Returning error.",
152 error, mp->m_fsname); 152 error, mp->m_fsname);
153 } else { 153 } else {
154 ASSERT(buf_flags & XFS_BUF_TRYLOCK); 154 ASSERT(buf_flags & XBF_TRYLOCK);
155 } 155 }
156 return error; 156 return error;
157 } 157 }
@@ -239,7 +239,7 @@ xfs_inotobp(
239 if (error) 239 if (error)
240 return error; 240 return error;
241 241
242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); 242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
243 if (error) 243 if (error)
244 return error; 244 return error;
245 245
@@ -285,7 +285,7 @@ xfs_itobp(
285 return error; 285 return error;
286 286
287 if (!bp) { 287 if (!bp) {
288 ASSERT(buf_flags & XFS_BUF_TRYLOCK); 288 ASSERT(buf_flags & XBF_TRYLOCK);
289 ASSERT(tp == NULL); 289 ASSERT(tp == NULL);
290 *bpp = NULL; 290 *bpp = NULL;
291 return EAGAIN; 291 return EAGAIN;
@@ -807,7 +807,7 @@ xfs_iread(
807 * Get pointers to the on-disk inode and the buffer containing it. 807 * Get pointers to the on-disk inode and the buffer containing it.
808 */ 808 */
809 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 809 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
810 XFS_BUF_LOCK, iget_flags); 810 XBF_LOCK, iget_flags);
811 if (error) 811 if (error)
812 return error; 812 return error;
813 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 813 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
@@ -1291,42 +1291,6 @@ xfs_file_last_byte(
1291 return last_byte; 1291 return last_byte;
1292} 1292}
1293 1293
1294#if defined(XFS_RW_TRACE)
1295STATIC void
1296xfs_itrunc_trace(
1297 int tag,
1298 xfs_inode_t *ip,
1299 int flag,
1300 xfs_fsize_t new_size,
1301 xfs_off_t toss_start,
1302 xfs_off_t toss_finish)
1303{
1304 if (ip->i_rwtrace == NULL) {
1305 return;
1306 }
1307
1308 ktrace_enter(ip->i_rwtrace,
1309 (void*)((long)tag),
1310 (void*)ip,
1311 (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff),
1312 (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff),
1313 (void*)((long)flag),
1314 (void*)(unsigned long)((new_size >> 32) & 0xffffffff),
1315 (void*)(unsigned long)(new_size & 0xffffffff),
1316 (void*)(unsigned long)((toss_start >> 32) & 0xffffffff),
1317 (void*)(unsigned long)(toss_start & 0xffffffff),
1318 (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff),
1319 (void*)(unsigned long)(toss_finish & 0xffffffff),
1320 (void*)(unsigned long)current_cpu(),
1321 (void*)(unsigned long)current_pid(),
1322 (void*)NULL,
1323 (void*)NULL,
1324 (void*)NULL);
1325}
1326#else
1327#define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish)
1328#endif
1329
1330/* 1294/*
1331 * Start the truncation of the file to new_size. The new size 1295 * Start the truncation of the file to new_size. The new size
1332 * must be smaller than the current size. This routine will 1296 * must be smaller than the current size. This routine will
@@ -1409,8 +1373,7 @@ xfs_itruncate_start(
1409 return 0; 1373 return 0;
1410 } 1374 }
1411 last_byte = xfs_file_last_byte(ip); 1375 last_byte = xfs_file_last_byte(ip);
1412 xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, 1376 trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte);
1413 last_byte);
1414 if (last_byte > toss_start) { 1377 if (last_byte > toss_start) {
1415 if (flags & XFS_ITRUNC_DEFINITE) { 1378 if (flags & XFS_ITRUNC_DEFINITE) {
1416 xfs_tosspages(ip, toss_start, 1379 xfs_tosspages(ip, toss_start,
@@ -1514,7 +1477,8 @@ xfs_itruncate_finish(
1514 new_size = 0LL; 1477 new_size = 0LL;
1515 } 1478 }
1516 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1479 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1517 xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); 1480 trace_xfs_itruncate_finish_start(ip, new_size);
1481
1518 /* 1482 /*
1519 * The first thing we do is set the size to new_size permanently 1483 * The first thing we do is set the size to new_size permanently
1520 * on disk. This way we don't have to worry about anyone ever 1484 * on disk. This way we don't have to worry about anyone ever
@@ -1731,7 +1695,7 @@ xfs_itruncate_finish(
1731 ASSERT((new_size != 0) || 1695 ASSERT((new_size != 0) ||
1732 (fork == XFS_ATTR_FORK) || 1696 (fork == XFS_ATTR_FORK) ||
1733 (ip->i_d.di_nextents == 0)); 1697 (ip->i_d.di_nextents == 0));
1734 xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); 1698 trace_xfs_itruncate_finish_end(ip, new_size);
1735 return 0; 1699 return 0;
1736} 1700}
1737 1701
@@ -1787,7 +1751,7 @@ xfs_iunlink(
1787 * Here we put the head pointer into our next pointer, 1751 * Here we put the head pointer into our next pointer,
1788 * and then we fall through to point the head at us. 1752 * and then we fall through to point the head at us.
1789 */ 1753 */
1790 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1754 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1791 if (error) 1755 if (error)
1792 return error; 1756 return error;
1793 1757
@@ -1869,7 +1833,7 @@ xfs_iunlink_remove(
1869 * of dealing with the buffer when there is no need to 1833 * of dealing with the buffer when there is no need to
1870 * change it. 1834 * change it.
1871 */ 1835 */
1872 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1836 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1873 if (error) { 1837 if (error) {
1874 cmn_err(CE_WARN, 1838 cmn_err(CE_WARN,
1875 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1839 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -1931,7 +1895,7 @@ xfs_iunlink_remove(
1931 * Now last_ibp points to the buffer previous to us on 1895 * Now last_ibp points to the buffer previous to us on
1932 * the unlinked list. Pull us from the list. 1896 * the unlinked list. Pull us from the list.
1933 */ 1897 */
1934 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 1898 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1935 if (error) { 1899 if (error) {
1936 cmn_err(CE_WARN, 1900 cmn_err(CE_WARN,
1937 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1901 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -1982,8 +1946,9 @@ xfs_ifree_cluster(
1982 xfs_inode_t *ip, **ip_found; 1946 xfs_inode_t *ip, **ip_found;
1983 xfs_inode_log_item_t *iip; 1947 xfs_inode_log_item_t *iip;
1984 xfs_log_item_t *lip; 1948 xfs_log_item_t *lip;
1985 xfs_perag_t *pag = xfs_get_perag(mp, inum); 1949 struct xfs_perag *pag;
1986 1950
1951 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
1987 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 1952 if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
1988 blks_per_cluster = 1; 1953 blks_per_cluster = 1;
1989 ninodes = mp->m_sb.sb_inopblock; 1954 ninodes = mp->m_sb.sb_inopblock;
@@ -2075,7 +2040,7 @@ xfs_ifree_cluster(
2075 2040
2076 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2041 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
2077 mp->m_bsize * blks_per_cluster, 2042 mp->m_bsize * blks_per_cluster,
2078 XFS_BUF_LOCK); 2043 XBF_LOCK);
2079 2044
2080 pre_flushed = 0; 2045 pre_flushed = 0;
2081 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 2046 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
@@ -2124,7 +2089,7 @@ xfs_ifree_cluster(
2124 } 2089 }
2125 2090
2126 kmem_free(ip_found); 2091 kmem_free(ip_found);
2127 xfs_put_perag(mp, pag); 2092 xfs_perag_put(pag);
2128} 2093}
2129 2094
2130/* 2095/*
@@ -2186,7 +2151,7 @@ xfs_ifree(
2186 2151
2187 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2152 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2188 2153
2189 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); 2154 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK);
2190 if (error) 2155 if (error)
2191 return error; 2156 return error;
2192 2157
@@ -2474,72 +2439,31 @@ xfs_idestroy_fork(
2474} 2439}
2475 2440
2476/* 2441/*
2477 * Increment the pin count of the given buffer. 2442 * This is called to unpin an inode. The caller must have the inode locked
2478 * This value is protected by ipinlock spinlock in the mount structure. 2443 * in at least shared mode so that the buffer cannot be subsequently pinned
2479 */ 2444 * once someone is waiting for it to be unpinned.
2480void
2481xfs_ipin(
2482 xfs_inode_t *ip)
2483{
2484 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2485
2486 atomic_inc(&ip->i_pincount);
2487}
2488
2489/*
2490 * Decrement the pin count of the given inode, and wake up
2491 * anyone in xfs_iwait_unpin() if the count goes to 0. The
2492 * inode must have been previously pinned with a call to xfs_ipin().
2493 */ 2445 */
2494void 2446static void
2495xfs_iunpin( 2447xfs_iunpin_nowait(
2496 xfs_inode_t *ip) 2448 struct xfs_inode *ip)
2497{
2498 ASSERT(atomic_read(&ip->i_pincount) > 0);
2499
2500 if (atomic_dec_and_test(&ip->i_pincount))
2501 wake_up(&ip->i_ipin_wait);
2502}
2503
2504/*
2505 * This is called to unpin an inode. It can be directed to wait or to return
2506 * immediately without waiting for the inode to be unpinned. The caller must
2507 * have the inode locked in at least shared mode so that the buffer cannot be
2508 * subsequently pinned once someone is waiting for it to be unpinned.
2509 */
2510STATIC void
2511__xfs_iunpin_wait(
2512 xfs_inode_t *ip,
2513 int wait)
2514{ 2449{
2515 xfs_inode_log_item_t *iip = ip->i_itemp;
2516
2517 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2450 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2518 if (atomic_read(&ip->i_pincount) == 0)
2519 return;
2520 2451
2521 /* Give the log a push to start the unpinning I/O */ 2452 /* Give the log a push to start the unpinning I/O */
2522 xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? 2453 xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
2523 iip->ili_last_lsn : 0, XFS_LOG_FORCE);
2524 if (wait)
2525 wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
2526}
2527 2454
2528static inline void
2529xfs_iunpin_wait(
2530 xfs_inode_t *ip)
2531{
2532 __xfs_iunpin_wait(ip, 1);
2533} 2455}
2534 2456
2535static inline void 2457void
2536xfs_iunpin_nowait( 2458xfs_iunpin_wait(
2537 xfs_inode_t *ip) 2459 struct xfs_inode *ip)
2538{ 2460{
2539 __xfs_iunpin_wait(ip, 0); 2461 if (xfs_ipincount(ip)) {
2462 xfs_iunpin_nowait(ip);
2463 wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
2464 }
2540} 2465}
2541 2466
2542
2543/* 2467/*
2544 * xfs_iextents_copy() 2468 * xfs_iextents_copy()
2545 * 2469 *
@@ -2711,7 +2635,7 @@ xfs_iflush_cluster(
2711 xfs_buf_t *bp) 2635 xfs_buf_t *bp)
2712{ 2636{
2713 xfs_mount_t *mp = ip->i_mount; 2637 xfs_mount_t *mp = ip->i_mount;
2714 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 2638 struct xfs_perag *pag;
2715 unsigned long first_index, mask; 2639 unsigned long first_index, mask;
2716 unsigned long inodes_per_cluster; 2640 unsigned long inodes_per_cluster;
2717 int ilist_size; 2641 int ilist_size;
@@ -2722,6 +2646,7 @@ xfs_iflush_cluster(
2722 int bufwasdelwri; 2646 int bufwasdelwri;
2723 int i; 2647 int i;
2724 2648
2649 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2725 ASSERT(pag->pagi_inodeok); 2650 ASSERT(pag->pagi_inodeok);
2726 ASSERT(pag->pag_ici_init); 2651 ASSERT(pag->pag_ici_init);
2727 2652
@@ -2729,7 +2654,7 @@ xfs_iflush_cluster(
2729 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 2654 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
2730 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2655 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
2731 if (!ilist) 2656 if (!ilist)
2732 return 0; 2657 goto out_put;
2733 2658
2734 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2659 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2735 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 2660 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
@@ -2798,6 +2723,8 @@ xfs_iflush_cluster(
2798out_free: 2723out_free:
2799 read_unlock(&pag->pag_ici_lock); 2724 read_unlock(&pag->pag_ici_lock);
2800 kmem_free(ilist); 2725 kmem_free(ilist);
2726out_put:
2727 xfs_perag_put(pag);
2801 return 0; 2728 return 0;
2802 2729
2803 2730
@@ -2841,6 +2768,7 @@ cluster_corrupt_out:
2841 */ 2768 */
2842 xfs_iflush_abort(iq); 2769 xfs_iflush_abort(iq);
2843 kmem_free(ilist); 2770 kmem_free(ilist);
2771 xfs_perag_put(pag);
2844 return XFS_ERROR(EFSCORRUPTED); 2772 return XFS_ERROR(EFSCORRUPTED);
2845} 2773}
2846 2774
@@ -2863,8 +2791,6 @@ xfs_iflush(
2863 xfs_dinode_t *dip; 2791 xfs_dinode_t *dip;
2864 xfs_mount_t *mp; 2792 xfs_mount_t *mp;
2865 int error; 2793 int error;
2866 int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
2867 enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
2868 2794
2869 XFS_STATS_INC(xs_iflush_count); 2795 XFS_STATS_INC(xs_iflush_count);
2870 2796
@@ -2877,15 +2803,6 @@ xfs_iflush(
2877 mp = ip->i_mount; 2803 mp = ip->i_mount;
2878 2804
2879 /* 2805 /*
2880 * If the inode isn't dirty, then just release the inode
2881 * flush lock and do nothing.
2882 */
2883 if (xfs_inode_clean(ip)) {
2884 xfs_ifunlock(ip);
2885 return 0;
2886 }
2887
2888 /*
2889 * We can't flush the inode until it is unpinned, so wait for it if we 2806 * We can't flush the inode until it is unpinned, so wait for it if we
2890 * are allowed to block. We know noone new can pin it, because we are 2807 * are allowed to block. We know noone new can pin it, because we are
2891 * holding the inode lock shared and you need to hold it exclusively to 2808 * holding the inode lock shared and you need to hold it exclusively to
@@ -2896,7 +2813,7 @@ xfs_iflush(
2896 * in the same cluster are dirty, they will probably write the inode 2813 * in the same cluster are dirty, they will probably write the inode
2897 * out for us if they occur after the log force completes. 2814 * out for us if they occur after the log force completes.
2898 */ 2815 */
2899 if (noblock && xfs_ipincount(ip)) { 2816 if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
2900 xfs_iunpin_nowait(ip); 2817 xfs_iunpin_nowait(ip);
2901 xfs_ifunlock(ip); 2818 xfs_ifunlock(ip);
2902 return EAGAIN; 2819 return EAGAIN;
@@ -2904,6 +2821,19 @@ xfs_iflush(
2904 xfs_iunpin_wait(ip); 2821 xfs_iunpin_wait(ip);
2905 2822
2906 /* 2823 /*
2824 * For stale inodes we cannot rely on the backing buffer remaining
2825 * stale in cache for the remaining life of the stale inode and so
2826 * xfs_itobp() below may give us a buffer that no longer contains
2827 * inodes below. We have to check this after ensuring the inode is
2828 * unpinned so that it is safe to reclaim the stale inode after the
2829 * flush call.
2830 */
2831 if (xfs_iflags_test(ip, XFS_ISTALE)) {
2832 xfs_ifunlock(ip);
2833 return 0;
2834 }
2835
2836 /*
2907 * This may have been unpinned because the filesystem is shutting 2837 * This may have been unpinned because the filesystem is shutting
2908 * down forcibly. If that's the case we must not write this inode 2838 * down forcibly. If that's the case we must not write this inode
2909 * to disk, because the log record didn't make it to disk! 2839 * to disk, because the log record didn't make it to disk!
@@ -2917,60 +2847,10 @@ xfs_iflush(
2917 } 2847 }
2918 2848
2919 /* 2849 /*
2920 * Decide how buffer will be flushed out. This is done before
2921 * the call to xfs_iflush_int because this field is zeroed by it.
2922 */
2923 if (iip != NULL && iip->ili_format.ilf_fields != 0) {
2924 /*
2925 * Flush out the inode buffer according to the directions
2926 * of the caller. In the cases where the caller has given
2927 * us a choice choose the non-delwri case. This is because
2928 * the inode is in the AIL and we need to get it out soon.
2929 */
2930 switch (flags) {
2931 case XFS_IFLUSH_SYNC:
2932 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
2933 flags = 0;
2934 break;
2935 case XFS_IFLUSH_ASYNC_NOBLOCK:
2936 case XFS_IFLUSH_ASYNC:
2937 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
2938 flags = INT_ASYNC;
2939 break;
2940 case XFS_IFLUSH_DELWRI:
2941 flags = INT_DELWRI;
2942 break;
2943 default:
2944 ASSERT(0);
2945 flags = 0;
2946 break;
2947 }
2948 } else {
2949 switch (flags) {
2950 case XFS_IFLUSH_DELWRI_ELSE_SYNC:
2951 case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
2952 case XFS_IFLUSH_DELWRI:
2953 flags = INT_DELWRI;
2954 break;
2955 case XFS_IFLUSH_ASYNC_NOBLOCK:
2956 case XFS_IFLUSH_ASYNC:
2957 flags = INT_ASYNC;
2958 break;
2959 case XFS_IFLUSH_SYNC:
2960 flags = 0;
2961 break;
2962 default:
2963 ASSERT(0);
2964 flags = 0;
2965 break;
2966 }
2967 }
2968
2969 /*
2970 * Get the buffer containing the on-disk inode. 2850 * Get the buffer containing the on-disk inode.
2971 */ 2851 */
2972 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 2852 error = xfs_itobp(mp, NULL, ip, &dip, &bp,
2973 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); 2853 (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK);
2974 if (error || !bp) { 2854 if (error || !bp) {
2975 xfs_ifunlock(ip); 2855 xfs_ifunlock(ip);
2976 return error; 2856 return error;
@@ -2988,7 +2868,7 @@ xfs_iflush(
2988 * get stuck waiting in the write for too long. 2868 * get stuck waiting in the write for too long.
2989 */ 2869 */
2990 if (XFS_BUF_ISPINNED(bp)) 2870 if (XFS_BUF_ISPINNED(bp))
2991 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 2871 xfs_log_force(mp, 0);
2992 2872
2993 /* 2873 /*
2994 * inode clustering: 2874 * inode clustering:
@@ -2998,13 +2878,10 @@ xfs_iflush(
2998 if (error) 2878 if (error)
2999 goto cluster_corrupt_out; 2879 goto cluster_corrupt_out;
3000 2880
3001 if (flags & INT_DELWRI) { 2881 if (flags & SYNC_WAIT)
3002 xfs_bdwrite(mp, bp);
3003 } else if (flags & INT_ASYNC) {
3004 error = xfs_bawrite(mp, bp);
3005 } else {
3006 error = xfs_bwrite(mp, bp); 2882 error = xfs_bwrite(mp, bp);
3007 } 2883 else
2884 xfs_bdwrite(mp, bp);
3008 return error; 2885 return error;
3009 2886
3010corrupt_out: 2887corrupt_out:
@@ -3039,16 +2916,6 @@ xfs_iflush_int(
3039 iip = ip->i_itemp; 2916 iip = ip->i_itemp;
3040 mp = ip->i_mount; 2917 mp = ip->i_mount;
3041 2918
3042
3043 /*
3044 * If the inode isn't dirty, then just release the inode
3045 * flush lock and do nothing.
3046 */
3047 if (xfs_inode_clean(ip)) {
3048 xfs_ifunlock(ip);
3049 return 0;
3050 }
3051
3052 /* set *dip = inode's place in the buffer */ 2919 /* set *dip = inode's place in the buffer */
3053 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2920 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
3054 2921
@@ -3252,23 +3119,6 @@ corrupt_out:
3252 return XFS_ERROR(EFSCORRUPTED); 3119 return XFS_ERROR(EFSCORRUPTED);
3253} 3120}
3254 3121
3255
3256
3257#ifdef XFS_ILOCK_TRACE
3258void
3259xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra)
3260{
3261 ktrace_enter(ip->i_lock_trace,
3262 (void *)ip,
3263 (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */
3264 (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */
3265 (void *)ra, /* caller of ilock */
3266 (void *)(unsigned long)current_cpu(),
3267 (void *)(unsigned long)current_pid(),
3268 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);
3269}
3270#endif
3271
3272/* 3122/*
3273 * Return a pointer to the extent record at file index idx. 3123 * Return a pointer to the extent record at file index idx.
3274 */ 3124 */
@@ -3300,13 +3150,17 @@ xfs_iext_get_ext(
3300 */ 3150 */
3301void 3151void
3302xfs_iext_insert( 3152xfs_iext_insert(
3303 xfs_ifork_t *ifp, /* inode fork pointer */ 3153 xfs_inode_t *ip, /* incore inode pointer */
3304 xfs_extnum_t idx, /* starting index of new items */ 3154 xfs_extnum_t idx, /* starting index of new items */
3305 xfs_extnum_t count, /* number of inserted items */ 3155 xfs_extnum_t count, /* number of inserted items */
3306 xfs_bmbt_irec_t *new) /* items to insert */ 3156 xfs_bmbt_irec_t *new, /* items to insert */
3157 int state) /* type of extent conversion */
3307{ 3158{
3159 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
3308 xfs_extnum_t i; /* extent record index */ 3160 xfs_extnum_t i; /* extent record index */
3309 3161
3162 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
3163
3310 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3164 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3311 xfs_iext_add(ifp, idx, count); 3165 xfs_iext_add(ifp, idx, count);
3312 for (i = idx; i < idx + count; i++, new++) 3166 for (i = idx; i < idx + count; i++, new++)
@@ -3549,13 +3403,17 @@ xfs_iext_add_indirect_multi(
3549 */ 3403 */
3550void 3404void
3551xfs_iext_remove( 3405xfs_iext_remove(
3552 xfs_ifork_t *ifp, /* inode fork pointer */ 3406 xfs_inode_t *ip, /* incore inode pointer */
3553 xfs_extnum_t idx, /* index to begin removing exts */ 3407 xfs_extnum_t idx, /* index to begin removing exts */
3554 int ext_diff) /* number of extents to remove */ 3408 int ext_diff, /* number of extents to remove */
3409 int state) /* type of extent conversion */
3555{ 3410{
3411 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
3556 xfs_extnum_t nextents; /* number of extents in file */ 3412 xfs_extnum_t nextents; /* number of extents in file */
3557 int new_size; /* size of extents after removal */ 3413 int new_size; /* size of extents after removal */
3558 3414
3415 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
3416
3559 ASSERT(ext_diff > 0); 3417 ASSERT(ext_diff > 0);
3560 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3418 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3561 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 3419 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 41555de1d1db..9965e40a4615 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -213,7 +213,6 @@ typedef struct xfs_icdinode {
213 213
214struct bhv_desc; 214struct bhv_desc;
215struct cred; 215struct cred;
216struct ktrace;
217struct xfs_buf; 216struct xfs_buf;
218struct xfs_bmap_free; 217struct xfs_bmap_free;
219struct xfs_bmbt_irec; 218struct xfs_bmbt_irec;
@@ -222,13 +221,6 @@ struct xfs_mount;
222struct xfs_trans; 221struct xfs_trans;
223struct xfs_dquot; 222struct xfs_dquot;
224 223
225#if defined(XFS_ILOCK_TRACE)
226#define XFS_ILOCK_KTRACE_SIZE 32
227extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
228#else
229#define xfs_ilock_trace(i,n,f,ra)
230#endif
231
232typedef struct dm_attrs_s { 224typedef struct dm_attrs_s {
233 __uint32_t da_dmevmask; /* DMIG event mask */ 225 __uint32_t da_dmevmask; /* DMIG event mask */
234 __uint16_t da_dmstate; /* DMIG state info */ 226 __uint16_t da_dmstate; /* DMIG state info */
@@ -271,26 +263,6 @@ typedef struct xfs_inode {
271 263
272 /* VFS inode */ 264 /* VFS inode */
273 struct inode i_vnode; /* embedded VFS inode */ 265 struct inode i_vnode; /* embedded VFS inode */
274
275 /* Trace buffers per inode. */
276#ifdef XFS_INODE_TRACE
277 struct ktrace *i_trace; /* general inode trace */
278#endif
279#ifdef XFS_BMAP_TRACE
280 struct ktrace *i_xtrace; /* inode extent list trace */
281#endif
282#ifdef XFS_BTREE_TRACE
283 struct ktrace *i_btrace; /* inode bmap btree trace */
284#endif
285#ifdef XFS_RW_TRACE
286 struct ktrace *i_rwtrace; /* inode read/write trace */
287#endif
288#ifdef XFS_ILOCK_TRACE
289 struct ktrace *i_lock_trace; /* inode lock/unlock trace */
290#endif
291#ifdef XFS_DIR2_TRACE
292 struct ktrace *i_dir_trace; /* inode directory trace */
293#endif
294} xfs_inode_t; 266} xfs_inode_t;
295 267
296#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ 268#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
@@ -406,6 +378,14 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
406#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 378#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
407 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 379 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
408 380
381#define XFS_LOCK_FLAGS \
382 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
383 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
384 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
385 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
386 { XFS_IUNLOCK_NONOTIFY, "IUNLOCK_NONOTIFY" }
387
388
409/* 389/*
410 * Flags for lockdep annotations. 390 * Flags for lockdep annotations.
411 * 391 *
@@ -440,21 +420,15 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
440#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) 420#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
441 421
442/* 422/*
443 * Flags for xfs_iflush()
444 */
445#define XFS_IFLUSH_DELWRI_ELSE_SYNC 1
446#define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2
447#define XFS_IFLUSH_SYNC 3
448#define XFS_IFLUSH_ASYNC 4
449#define XFS_IFLUSH_DELWRI 5
450#define XFS_IFLUSH_ASYNC_NOBLOCK 6
451
452/*
453 * Flags for xfs_itruncate_start(). 423 * Flags for xfs_itruncate_start().
454 */ 424 */
455#define XFS_ITRUNC_DEFINITE 0x1 425#define XFS_ITRUNC_DEFINITE 0x1
456#define XFS_ITRUNC_MAYBE 0x2 426#define XFS_ITRUNC_MAYBE 0x2
457 427
428#define XFS_ITRUNC_FLAGS \
429 { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \
430 { XFS_ITRUNC_MAYBE, "MAYBE" }
431
458/* 432/*
459 * For multiple groups support: if S_ISGID bit is set in the parent 433 * For multiple groups support: if S_ISGID bit is set in the parent
460 * directory, group of new file is set to that of the parent, and 434 * directory, group of new file is set to that of the parent, and
@@ -497,58 +471,26 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
497int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 471int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
498 472
499void xfs_iext_realloc(xfs_inode_t *, int, int); 473void xfs_iext_realloc(xfs_inode_t *, int, int);
500void xfs_ipin(xfs_inode_t *); 474void xfs_iunpin_wait(xfs_inode_t *);
501void xfs_iunpin(xfs_inode_t *);
502int xfs_iflush(xfs_inode_t *, uint); 475int xfs_iflush(xfs_inode_t *, uint);
503void xfs_ichgtime(xfs_inode_t *, int); 476void xfs_ichgtime(xfs_inode_t *, int);
504void xfs_lock_inodes(xfs_inode_t **, int, uint); 477void xfs_lock_inodes(xfs_inode_t **, int, uint);
505void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 478void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
506 479
507void xfs_synchronize_times(xfs_inode_t *); 480void xfs_synchronize_times(xfs_inode_t *);
481void xfs_mark_inode_dirty(xfs_inode_t *);
508void xfs_mark_inode_dirty_sync(xfs_inode_t *); 482void xfs_mark_inode_dirty_sync(xfs_inode_t *);
509 483
510#if defined(XFS_INODE_TRACE)
511
512#define INODE_TRACE_SIZE 16 /* number of trace entries */
513#define INODE_KTRACE_ENTRY 1
514#define INODE_KTRACE_EXIT 2
515#define INODE_KTRACE_HOLD 3
516#define INODE_KTRACE_REF 4
517#define INODE_KTRACE_RELE 5
518
519extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
520extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
521extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
522extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
523extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
524#define xfs_itrace_entry(ip) \
525 _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
526#define xfs_itrace_exit(ip) \
527 _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
528#define xfs_itrace_exit_tag(ip, tag) \
529 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
530#define xfs_itrace_ref(ip) \
531 _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
532
533#else
534#define xfs_itrace_entry(a)
535#define xfs_itrace_exit(a)
536#define xfs_itrace_exit_tag(a, b)
537#define xfs_itrace_hold(a, b, c, d)
538#define xfs_itrace_ref(a)
539#define xfs_itrace_rele(a, b, c, d)
540#endif
541
542#define IHOLD(ip) \ 484#define IHOLD(ip) \
543do { \ 485do { \
544 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 486 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
545 atomic_inc(&(VFS_I(ip)->i_count)); \ 487 atomic_inc(&(VFS_I(ip)->i_count)); \
546 xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ 488 trace_xfs_ihold(ip, _THIS_IP_); \
547} while (0) 489} while (0)
548 490
549#define IRELE(ip) \ 491#define IRELE(ip) \
550do { \ 492do { \
551 xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ 493 trace_xfs_irele(ip, _THIS_IP_); \
552 iput(VFS_I(ip)); \ 494 iput(VFS_I(ip)); \
553} while (0) 495} while (0)
554 496
@@ -577,11 +519,11 @@ int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
577int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int); 519int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
578 520
579xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); 521xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
580void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 522void xfs_iext_insert(xfs_inode_t *, xfs_extnum_t, xfs_extnum_t,
581 xfs_bmbt_irec_t *); 523 xfs_bmbt_irec_t *, int);
582void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int); 524void xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
583void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int); 525void xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
584void xfs_iext_remove(xfs_ifork_t *, xfs_extnum_t, int); 526void xfs_iext_remove(xfs_inode_t *, xfs_extnum_t, int, int);
585void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); 527void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
586void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); 528void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
587void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); 529void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 9794b876d6ff..7bfea8540159 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -41,6 +41,7 @@
41#include "xfs_ialloc.h" 41#include "xfs_ialloc.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_trace.h"
44 45
45 46
46kmem_zone_t *xfs_ili_zone; /* inode log item zone */ 47kmem_zone_t *xfs_ili_zone; /* inode log item zone */
@@ -227,7 +228,7 @@ xfs_inode_item_format(
227 228
228 vecp->i_addr = (xfs_caddr_t)&iip->ili_format; 229 vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
229 vecp->i_len = sizeof(xfs_inode_log_format_t); 230 vecp->i_len = sizeof(xfs_inode_log_format_t);
230 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); 231 vecp->i_type = XLOG_REG_TYPE_IFORMAT;
231 vecp++; 232 vecp++;
232 nvecs = 1; 233 nvecs = 1;
233 234
@@ -278,7 +279,7 @@ xfs_inode_item_format(
278 279
279 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 280 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
280 vecp->i_len = sizeof(struct xfs_icdinode); 281 vecp->i_len = sizeof(struct xfs_icdinode);
281 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 282 vecp->i_type = XLOG_REG_TYPE_ICORE;
282 vecp++; 283 vecp++;
283 nvecs++; 284 nvecs++;
284 iip->ili_format.ilf_fields |= XFS_ILOG_CORE; 285 iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
@@ -335,7 +336,7 @@ xfs_inode_item_format(
335 vecp->i_addr = 336 vecp->i_addr =
336 (char *)(ip->i_df.if_u1.if_extents); 337 (char *)(ip->i_df.if_u1.if_extents);
337 vecp->i_len = ip->i_df.if_bytes; 338 vecp->i_len = ip->i_df.if_bytes;
338 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 339 vecp->i_type = XLOG_REG_TYPE_IEXT;
339 } else 340 } else
340#endif 341#endif
341 { 342 {
@@ -354,7 +355,7 @@ xfs_inode_item_format(
354 vecp->i_addr = (xfs_caddr_t)ext_buffer; 355 vecp->i_addr = (xfs_caddr_t)ext_buffer;
355 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 356 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
356 XFS_DATA_FORK); 357 XFS_DATA_FORK);
357 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); 358 vecp->i_type = XLOG_REG_TYPE_IEXT;
358 } 359 }
359 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 360 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
360 iip->ili_format.ilf_dsize = vecp->i_len; 361 iip->ili_format.ilf_dsize = vecp->i_len;
@@ -372,7 +373,7 @@ xfs_inode_item_format(
372 ASSERT(ip->i_df.if_broot != NULL); 373 ASSERT(ip->i_df.if_broot != NULL);
373 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; 374 vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
374 vecp->i_len = ip->i_df.if_broot_bytes; 375 vecp->i_len = ip->i_df.if_broot_bytes;
375 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); 376 vecp->i_type = XLOG_REG_TYPE_IBROOT;
376 vecp++; 377 vecp++;
377 nvecs++; 378 nvecs++;
378 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; 379 iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
@@ -398,7 +399,7 @@ xfs_inode_item_format(
398 ASSERT((ip->i_df.if_real_bytes == 0) || 399 ASSERT((ip->i_df.if_real_bytes == 0) ||
399 (ip->i_df.if_real_bytes == data_bytes)); 400 (ip->i_df.if_real_bytes == data_bytes));
400 vecp->i_len = (int)data_bytes; 401 vecp->i_len = (int)data_bytes;
401 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); 402 vecp->i_type = XLOG_REG_TYPE_ILOCAL;
402 vecp++; 403 vecp++;
403 nvecs++; 404 nvecs++;
404 iip->ili_format.ilf_dsize = (unsigned)data_bytes; 405 iip->ili_format.ilf_dsize = (unsigned)data_bytes;
@@ -476,7 +477,7 @@ xfs_inode_item_format(
476 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 477 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
477 XFS_ATTR_FORK); 478 XFS_ATTR_FORK);
478#endif 479#endif
479 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); 480 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
480 iip->ili_format.ilf_asize = vecp->i_len; 481 iip->ili_format.ilf_asize = vecp->i_len;
481 vecp++; 482 vecp++;
482 nvecs++; 483 nvecs++;
@@ -491,7 +492,7 @@ xfs_inode_item_format(
491 ASSERT(ip->i_afp->if_broot != NULL); 492 ASSERT(ip->i_afp->if_broot != NULL);
492 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; 493 vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
493 vecp->i_len = ip->i_afp->if_broot_bytes; 494 vecp->i_len = ip->i_afp->if_broot_bytes;
494 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); 495 vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
495 vecp++; 496 vecp++;
496 nvecs++; 497 nvecs++;
497 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; 498 iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
@@ -515,7 +516,7 @@ xfs_inode_item_format(
515 ASSERT((ip->i_afp->if_real_bytes == 0) || 516 ASSERT((ip->i_afp->if_real_bytes == 0) ||
516 (ip->i_afp->if_real_bytes == data_bytes)); 517 (ip->i_afp->if_real_bytes == data_bytes));
517 vecp->i_len = (int)data_bytes; 518 vecp->i_len = (int)data_bytes;
518 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); 519 vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL;
519 vecp++; 520 vecp++;
520 nvecs++; 521 nvecs++;
521 iip->ili_format.ilf_asize = (unsigned)data_bytes; 522 iip->ili_format.ilf_asize = (unsigned)data_bytes;
@@ -534,23 +535,23 @@ xfs_inode_item_format(
534 535
535/* 536/*
536 * This is called to pin the inode associated with the inode log 537 * This is called to pin the inode associated with the inode log
537 * item in memory so it cannot be written out. Do this by calling 538 * item in memory so it cannot be written out.
538 * xfs_ipin() to bump the pin count in the inode while holding the
539 * inode pin lock.
540 */ 539 */
541STATIC void 540STATIC void
542xfs_inode_item_pin( 541xfs_inode_item_pin(
543 xfs_inode_log_item_t *iip) 542 xfs_inode_log_item_t *iip)
544{ 543{
545 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 544 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
546 xfs_ipin(iip->ili_inode); 545
546 atomic_inc(&iip->ili_inode->i_pincount);
547} 547}
548 548
549 549
550/* 550/*
551 * This is called to unpin the inode associated with the inode log 551 * This is called to unpin the inode associated with the inode log
552 * item which was previously pinned with a call to xfs_inode_item_pin(). 552 * item which was previously pinned with a call to xfs_inode_item_pin().
553 * Just call xfs_iunpin() on the inode to do this. 553 *
554 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
554 */ 555 */
555/* ARGSUSED */ 556/* ARGSUSED */
556STATIC void 557STATIC void
@@ -558,7 +559,11 @@ xfs_inode_item_unpin(
558 xfs_inode_log_item_t *iip, 559 xfs_inode_log_item_t *iip,
559 int stale) 560 int stale)
560{ 561{
561 xfs_iunpin(iip->ili_inode); 562 struct xfs_inode *ip = iip->ili_inode;
563
564 ASSERT(atomic_read(&ip->i_pincount) > 0);
565 if (atomic_dec_and_test(&ip->i_pincount))
566 wake_up(&ip->i_ipin_wait);
562} 567}
563 568
564/* ARGSUSED */ 569/* ARGSUSED */
@@ -567,7 +572,7 @@ xfs_inode_item_unpin_remove(
567 xfs_inode_log_item_t *iip, 572 xfs_inode_log_item_t *iip,
568 xfs_trans_t *tp) 573 xfs_trans_t *tp)
569{ 574{
570 xfs_iunpin(iip->ili_inode); 575 xfs_inode_item_unpin(iip, 0);
571} 576}
572 577
573/* 578/*
@@ -601,33 +606,20 @@ xfs_inode_item_trylock(
601 606
602 if (!xfs_iflock_nowait(ip)) { 607 if (!xfs_iflock_nowait(ip)) {
603 /* 608 /*
604 * If someone else isn't already trying to push the inode 609 * inode has already been flushed to the backing buffer,
605 * buffer, we get to do it. 610 * leave it locked in shared mode, pushbuf routine will
611 * unlock it.
606 */ 612 */
607 if (iip->ili_pushbuf_flag == 0) { 613 return XFS_ITEM_PUSHBUF;
608 iip->ili_pushbuf_flag = 1;
609#ifdef DEBUG
610 iip->ili_push_owner = current_pid();
611#endif
612 /*
613 * Inode is left locked in shared mode.
614 * Pushbuf routine gets to unlock it.
615 */
616 return XFS_ITEM_PUSHBUF;
617 } else {
618 /*
619 * We hold the AIL lock, so we must specify the
620 * NONOTIFY flag so that we won't double trip.
621 */
622 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
623 return XFS_ITEM_FLUSHING;
624 }
625 /* NOTREACHED */
626 } 614 }
627 615
628 /* Stale items should force out the iclog */ 616 /* Stale items should force out the iclog */
629 if (ip->i_flags & XFS_ISTALE) { 617 if (ip->i_flags & XFS_ISTALE) {
630 xfs_ifunlock(ip); 618 xfs_ifunlock(ip);
619 /*
620 * we hold the AIL lock - notify the unlock routine of this
621 * so it doesn't try to get the lock again.
622 */
631 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); 623 xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
632 return XFS_ITEM_PINNED; 624 return XFS_ITEM_PINNED;
633 } 625 }
@@ -745,11 +737,8 @@ xfs_inode_item_committed(
745 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK 737 * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
746 * failed to get the inode flush lock but did get the inode locked SHARED. 738 * failed to get the inode flush lock but did get the inode locked SHARED.
747 * Here we're trying to see if the inode buffer is incore, and if so whether it's 739 * Here we're trying to see if the inode buffer is incore, and if so whether it's
748 * marked delayed write. If that's the case, we'll initiate a bawrite on that 740 * marked delayed write. If that's the case, we'll promote it and that will
749 * buffer to expedite the process. 741 * allow the caller to write the buffer by triggering the xfsbufd to run.
750 *
751 * We aren't holding the AIL lock (or the flush lock) when this gets called,
752 * so it is inherently race-y.
753 */ 742 */
754STATIC void 743STATIC void
755xfs_inode_item_pushbuf( 744xfs_inode_item_pushbuf(
@@ -758,80 +747,30 @@ xfs_inode_item_pushbuf(
758 xfs_inode_t *ip; 747 xfs_inode_t *ip;
759 xfs_mount_t *mp; 748 xfs_mount_t *mp;
760 xfs_buf_t *bp; 749 xfs_buf_t *bp;
761 uint dopush;
762 750
763 ip = iip->ili_inode; 751 ip = iip->ili_inode;
764
765 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 752 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
766 753
767 /* 754 /*
768 * The ili_pushbuf_flag keeps others from
769 * trying to duplicate our effort.
770 */
771 ASSERT(iip->ili_pushbuf_flag != 0);
772 ASSERT(iip->ili_push_owner == current_pid());
773
774 /*
775 * If a flush is not in progress anymore, chances are that the 755 * If a flush is not in progress anymore, chances are that the
776 * inode was taken off the AIL. So, just get out. 756 * inode was taken off the AIL. So, just get out.
777 */ 757 */
778 if (completion_done(&ip->i_flush) || 758 if (completion_done(&ip->i_flush) ||
779 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { 759 ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
780 iip->ili_pushbuf_flag = 0;
781 xfs_iunlock(ip, XFS_ILOCK_SHARED); 760 xfs_iunlock(ip, XFS_ILOCK_SHARED);
782 return; 761 return;
783 } 762 }
784 763
785 mp = ip->i_mount; 764 mp = ip->i_mount;
786 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, 765 bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno,
787 iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); 766 iip->ili_format.ilf_len, XBF_TRYLOCK);
788 767
789 if (bp != NULL) {
790 if (XFS_BUF_ISDELAYWRITE(bp)) {
791 /*
792 * We were racing with iflush because we don't hold
793 * the AIL lock or the flush lock. However, at this point,
794 * we have the buffer, and we know that it's dirty.
795 * So, it's possible that iflush raced with us, and
796 * this item is already taken off the AIL.
797 * If not, we can flush it async.
798 */
799 dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
800 !completion_done(&ip->i_flush));
801 iip->ili_pushbuf_flag = 0;
802 xfs_iunlock(ip, XFS_ILOCK_SHARED);
803 xfs_buftrace("INODE ITEM PUSH", bp);
804 if (XFS_BUF_ISPINNED(bp)) {
805 xfs_log_force(mp, (xfs_lsn_t)0,
806 XFS_LOG_FORCE);
807 }
808 if (dopush) {
809 int error;
810 error = xfs_bawrite(mp, bp);
811 if (error)
812 xfs_fs_cmn_err(CE_WARN, mp,
813 "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p",
814 error, iip, bp);
815 } else {
816 xfs_buf_relse(bp);
817 }
818 } else {
819 iip->ili_pushbuf_flag = 0;
820 xfs_iunlock(ip, XFS_ILOCK_SHARED);
821 xfs_buf_relse(bp);
822 }
823 return;
824 }
825 /*
826 * We have to be careful about resetting pushbuf flag too early (above).
827 * Even though in theory we can do it as soon as we have the buflock,
828 * we don't want others to be doing work needlessly. They'll come to
829 * this function thinking that pushing the buffer is their
830 * responsibility only to find that the buffer is still locked by
831 * another doing the same thing
832 */
833 iip->ili_pushbuf_flag = 0;
834 xfs_iunlock(ip, XFS_ILOCK_SHARED); 768 xfs_iunlock(ip, XFS_ILOCK_SHARED);
769 if (!bp)
770 return;
771 if (XFS_BUF_ISDELAYWRITE(bp))
772 xfs_buf_delwri_promote(bp);
773 xfs_buf_relse(bp);
835 return; 774 return;
836} 775}
837 776
@@ -864,10 +803,14 @@ xfs_inode_item_push(
864 iip->ili_format.ilf_fields != 0); 803 iip->ili_format.ilf_fields != 0);
865 804
866 /* 805 /*
867 * Write out the inode. The completion routine ('iflush_done') will 806 * Push the inode to it's backing buffer. This will not remove the
868 * pull it from the AIL, mark it clean, unlock the flush lock. 807 * inode from the AIL - a further push will be required to trigger a
808 * buffer push. However, this allows all the dirty inodes to be pushed
809 * to the buffer before it is pushed to disk. THe buffer IO completion
810 * will pull th einode from the AIL, mark it clean and unlock the flush
811 * lock.
869 */ 812 */
870 (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); 813 (void) xfs_iflush(ip, 0);
871 xfs_iunlock(ip, XFS_ILOCK_SHARED); 814 xfs_iunlock(ip, XFS_ILOCK_SHARED);
872 815
873 return; 816 return;
@@ -931,7 +874,6 @@ xfs_inode_item_init(
931 /* 874 /*
932 We have zeroed memory. No need ... 875 We have zeroed memory. No need ...
933 iip->ili_extents_buf = NULL; 876 iip->ili_extents_buf = NULL;
934 iip->ili_pushbuf_flag = 0;
935 */ 877 */
936 878
937 iip->ili_format.ilf_type = XFS_LI_INODE; 879 iip->ili_format.ilf_type = XFS_LI_INODE;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 65bae4c9b8bf..9a467958ecdd 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -127,7 +127,7 @@ static inline int xfs_ilog_fdata(int w)
127#ifdef __KERNEL__ 127#ifdef __KERNEL__
128 128
129struct xfs_buf; 129struct xfs_buf;
130struct xfs_bmbt_rec_64; 130struct xfs_bmbt_rec;
131struct xfs_inode; 131struct xfs_inode;
132struct xfs_mount; 132struct xfs_mount;
133 133
@@ -140,16 +140,10 @@ typedef struct xfs_inode_log_item {
140 unsigned short ili_flags; /* misc flags */ 140 unsigned short ili_flags; /* misc flags */
141 unsigned short ili_logged; /* flushed logged data */ 141 unsigned short ili_logged; /* flushed logged data */
142 unsigned int ili_last_fields; /* fields when flushed */ 142 unsigned int ili_last_fields; /* fields when flushed */
143 struct xfs_bmbt_rec_64 *ili_extents_buf; /* array of logged 143 struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
144 data exts */ 144 data exts */
145 struct xfs_bmbt_rec_64 *ili_aextents_buf; /* array of logged 145 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
146 attr exts */ 146 attr exts */
147 unsigned int ili_pushbuf_flag; /* one bit used in push_ail */
148
149#ifdef DEBUG
150 uint64_t ili_push_owner; /* one who sets pushbuf_flag
151 above gets to push the buf */
152#endif
153#ifdef XFS_TRANS_DEBUG 147#ifdef XFS_TRANS_DEBUG
154 int ili_root_size; 148 int ili_root_size;
155 char *ili_orig_root; 149 char *ili_orig_root;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 67ae5555a30a..0b65039951a0 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -47,72 +47,8 @@
47#include "xfs_trans_space.h" 47#include "xfs_trans_space.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_iomap.h" 49#include "xfs_iomap.h"
50#include "xfs_trace.h"
50 51
51#if defined(XFS_RW_TRACE)
52void
53xfs_iomap_enter_trace(
54 int tag,
55 xfs_inode_t *ip,
56 xfs_off_t offset,
57 ssize_t count)
58{
59 if (!ip->i_rwtrace)
60 return;
61
62 ktrace_enter(ip->i_rwtrace,
63 (void *)((unsigned long)tag),
64 (void *)ip,
65 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
66 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
67 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
68 (void *)((unsigned long)(offset & 0xffffffff)),
69 (void *)((unsigned long)count),
70 (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
71 (void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
72 (void *)((unsigned long)current_pid()),
73 (void *)NULL,
74 (void *)NULL,
75 (void *)NULL,
76 (void *)NULL,
77 (void *)NULL,
78 (void *)NULL);
79}
80
81void
82xfs_iomap_map_trace(
83 int tag,
84 xfs_inode_t *ip,
85 xfs_off_t offset,
86 ssize_t count,
87 xfs_iomap_t *iomapp,
88 xfs_bmbt_irec_t *imapp,
89 int flags)
90{
91 if (!ip->i_rwtrace)
92 return;
93
94 ktrace_enter(ip->i_rwtrace,
95 (void *)((unsigned long)tag),
96 (void *)ip,
97 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
98 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
99 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
100 (void *)((unsigned long)(offset & 0xffffffff)),
101 (void *)((unsigned long)count),
102 (void *)((unsigned long)flags),
103 (void *)((unsigned long)((iomapp->iomap_offset >> 32) & 0xffffffff)),
104 (void *)((unsigned long)(iomapp->iomap_offset & 0xffffffff)),
105 (void *)((unsigned long)(iomapp->iomap_delta)),
106 (void *)((unsigned long)(iomapp->iomap_bsize)),
107 (void *)((unsigned long)(iomapp->iomap_bn)),
108 (void *)(__psint_t)(imapp->br_startoff),
109 (void *)((unsigned long)(imapp->br_blockcount)),
110 (void *)(__psint_t)(imapp->br_startblock));
111}
112#else
113#define xfs_iomap_enter_trace(tag, io, offset, count)
114#define xfs_iomap_map_trace(tag, io, offset, count, iomapp, imapp, flags)
115#endif
116 52
117#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 53#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
118 << mp->m_writeio_log) 54 << mp->m_writeio_log)
@@ -187,21 +123,20 @@ xfs_iomap(
187 if (XFS_FORCED_SHUTDOWN(mp)) 123 if (XFS_FORCED_SHUTDOWN(mp))
188 return XFS_ERROR(EIO); 124 return XFS_ERROR(EIO);
189 125
126 trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
127
190 switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { 128 switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
191 case BMAPI_READ: 129 case BMAPI_READ:
192 xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, ip, offset, count);
193 lockmode = xfs_ilock_map_shared(ip); 130 lockmode = xfs_ilock_map_shared(ip);
194 bmapi_flags = XFS_BMAPI_ENTIRE; 131 bmapi_flags = XFS_BMAPI_ENTIRE;
195 break; 132 break;
196 case BMAPI_WRITE: 133 case BMAPI_WRITE:
197 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
198 lockmode = XFS_ILOCK_EXCL; 134 lockmode = XFS_ILOCK_EXCL;
199 if (flags & BMAPI_IGNSTATE) 135 if (flags & BMAPI_IGNSTATE)
200 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; 136 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
201 xfs_ilock(ip, lockmode); 137 xfs_ilock(ip, lockmode);
202 break; 138 break;
203 case BMAPI_ALLOCATE: 139 case BMAPI_ALLOCATE:
204 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
205 lockmode = XFS_ILOCK_SHARED; 140 lockmode = XFS_ILOCK_SHARED;
206 bmapi_flags = XFS_BMAPI_ENTIRE; 141 bmapi_flags = XFS_BMAPI_ENTIRE;
207 142
@@ -237,8 +172,7 @@ xfs_iomap(
237 if (nimaps && 172 if (nimaps &&
238 (imap.br_startblock != HOLESTARTBLOCK) && 173 (imap.br_startblock != HOLESTARTBLOCK) &&
239 (imap.br_startblock != DELAYSTARTBLOCK)) { 174 (imap.br_startblock != DELAYSTARTBLOCK)) {
240 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, 175 trace_xfs_iomap_found(ip, offset, count, flags, &imap);
241 offset, count, iomapp, &imap, flags);
242 break; 176 break;
243 } 177 }
244 178
@@ -250,8 +184,7 @@ xfs_iomap(
250 &imap, &nimaps); 184 &imap, &nimaps);
251 } 185 }
252 if (!error) { 186 if (!error) {
253 xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, ip, 187 trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
254 offset, count, iomapp, &imap, flags);
255 } 188 }
256 iomap_flags = IOMAP_NEW; 189 iomap_flags = IOMAP_NEW;
257 break; 190 break;
@@ -261,8 +194,7 @@ xfs_iomap(
261 lockmode = 0; 194 lockmode = 0;
262 195
263 if (nimaps && !isnullstartblock(imap.br_startblock)) { 196 if (nimaps && !isnullstartblock(imap.br_startblock)) {
264 xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, 197 trace_xfs_iomap_found(ip, offset, count, flags, &imap);
265 offset, count, iomapp, &imap, flags);
266 break; 198 break;
267 } 199 }
268 200
@@ -623,8 +555,7 @@ retry:
623 * delalloc blocks and retry without EOF preallocation. 555 * delalloc blocks and retry without EOF preallocation.
624 */ 556 */
625 if (nimaps == 0) { 557 if (nimaps == 0) {
626 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, 558 trace_xfs_delalloc_enospc(ip, offset, count);
627 ip, offset, count);
628 if (flushed) 559 if (flushed)
629 return XFS_ERROR(ENOSPC); 560 return XFS_ERROR(ENOSPC);
630 561
@@ -837,7 +768,7 @@ xfs_iomap_write_unwritten(
837 int committed; 768 int committed;
838 int error; 769 int error;
839 770
840 xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count); 771 trace_xfs_unwritten_convert(ip, offset, count);
841 772
842 offset_fsb = XFS_B_TO_FSBT(mp, offset); 773 offset_fsb = XFS_B_TO_FSBT(mp, offset);
843 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 774 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
@@ -860,8 +791,15 @@ xfs_iomap_write_unwritten(
860 * set up a transaction to convert the range of extents 791 * set up a transaction to convert the range of extents
861 * from unwritten to real. Do allocations in a loop until 792 * from unwritten to real. Do allocations in a loop until
862 * we have covered the range passed in. 793 * we have covered the range passed in.
794 *
795 * Note that we open code the transaction allocation here
796 * to pass KM_NOFS--we can't risk to recursing back into
797 * the filesystem here as we might be asked to write out
798 * the same inode that we complete here and might deadlock
799 * on the iolock.
863 */ 800 */
864 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 801 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
802 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
865 tp->t_flags |= XFS_TRANS_RESERVE; 803 tp->t_flags |= XFS_TRANS_RESERVE;
866 error = xfs_trans_reserve(tp, resblks, 804 error = xfs_trans_reserve(tp, resblks,
867 XFS_WRITE_LOG_RES(mp), 0, 805 XFS_WRITE_LOG_RES(mp), 0,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index fdcf7b82747f..174f29990991 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -43,6 +43,14 @@ typedef enum {
43 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ 43 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */
44} bmapi_flags_t; 44} bmapi_flags_t;
45 45
46#define BMAPI_FLAGS \
47 { BMAPI_READ, "READ" }, \
48 { BMAPI_WRITE, "WRITE" }, \
49 { BMAPI_ALLOCATE, "ALLOCATE" }, \
50 { BMAPI_IGNSTATE, "IGNSTATE" }, \
51 { BMAPI_DIRECT, "DIRECT" }, \
52 { BMAPI_MMAP, "MMAP" }, \
53 { BMAPI_TRYLOCK, "TRYLOCK" }
46 54
47/* 55/*
48 * xfs_iomap_t: File system I/O map 56 * xfs_iomap_t: File system I/O map
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 62efab2f3839..b1b801e4a28e 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -106,6 +106,7 @@ xfs_bulkstat_one_iget(
106 buf->bs_dmevmask = dic->di_dmevmask; 106 buf->bs_dmevmask = dic->di_dmevmask;
107 buf->bs_dmstate = dic->di_dmstate; 107 buf->bs_dmstate = dic->di_dmstate;
108 buf->bs_aextents = dic->di_anextents; 108 buf->bs_aextents = dic->di_anextents;
109 buf->bs_forkoff = XFS_IFORK_BOFF(ip);
109 110
110 switch (dic->di_format) { 111 switch (dic->di_format) {
111 case XFS_DINODE_FMT_DEV: 112 case XFS_DINODE_FMT_DEV:
@@ -176,6 +177,7 @@ xfs_bulkstat_one_dinode(
176 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); 177 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
177 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); 178 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
178 buf->bs_aextents = be16_to_cpu(dic->di_anextents); 179 buf->bs_aextents = be16_to_cpu(dic->di_anextents);
180 buf->bs_forkoff = XFS_DFORK_BOFF(dic);
179 181
180 switch (dic->di_format) { 182 switch (dic->di_format) {
181 case XFS_DINODE_FMT_DEV: 183 case XFS_DINODE_FMT_DEV:
@@ -408,8 +410,10 @@ xfs_bulkstat(
408 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); 410 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
409 nimask = ~(nicluster - 1); 411 nimask = ~(nicluster - 1);
410 nbcluster = nicluster >> mp->m_sb.sb_inopblog; 412 nbcluster = nicluster >> mp->m_sb.sb_inopblog;
411 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, 413 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
412 KM_SLEEP | KM_MAYFAIL | KM_LARGE); 414 if (!irbuf)
415 return ENOMEM;
416
413 nirbuf = irbsize / sizeof(*irbuf); 417 nirbuf = irbsize / sizeof(*irbuf);
414 418
415 /* 419 /*
@@ -420,9 +424,7 @@ xfs_bulkstat(
420 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { 424 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
421 cond_resched(); 425 cond_resched();
422 bp = NULL; 426 bp = NULL;
423 down_read(&mp->m_peraglock);
424 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 427 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
425 up_read(&mp->m_peraglock);
426 if (error) { 428 if (error) {
427 /* 429 /*
428 * Skip this allocation group and go to the next one. 430 * Skip this allocation group and go to the next one.
@@ -729,7 +731,7 @@ xfs_bulkstat(
729 /* 731 /*
730 * Done, we're either out of filesystem or space to put the data. 732 * Done, we're either out of filesystem or space to put the data.
731 */ 733 */
732 kmem_free(irbuf); 734 kmem_free_large(irbuf);
733 *ubcountp = ubelem; 735 *ubcountp = ubelem;
734 /* 736 /*
735 * Found some inodes, return them now and return the error next time. 737 * Found some inodes, return them now and return the error next time.
@@ -849,9 +851,7 @@ xfs_inumbers(
849 agbp = NULL; 851 agbp = NULL;
850 while (left > 0 && agno < mp->m_sb.sb_agcount) { 852 while (left > 0 && agno < mp->m_sb.sb_agcount) {
851 if (agbp == NULL) { 853 if (agbp == NULL) {
852 down_read(&mp->m_peraglock);
853 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 854 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
854 up_read(&mp->m_peraglock);
855 if (error) { 855 if (error) {
856 /* 856 /*
857 * If we can't read the AGI of this ag, 857 * If we can't read the AGI of this ag,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9dbdff3ea484..2be019136287 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -40,6 +40,7 @@
40#include "xfs_dinode.h" 40#include "xfs_dinode.h"
41#include "xfs_inode.h" 41#include "xfs_inode.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43#include "xfs_trace.h"
43 44
44kmem_zone_t *xfs_log_ticket_zone; 45kmem_zone_t *xfs_log_ticket_zone;
45 46
@@ -49,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone;
49 (off) += (bytes);} 50 (off) += (bytes);}
50 51
51/* Local miscellaneous function prototypes */ 52/* Local miscellaneous function prototypes */
52STATIC int xlog_bdstrat_cb(struct xfs_buf *);
53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, 53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
54 xlog_in_core_t **, xfs_lsn_t *); 54 xlog_in_core_t **, xfs_lsn_t *);
55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, 55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
@@ -60,7 +60,7 @@ STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
60STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 60STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
61STATIC void xlog_dealloc_log(xlog_t *log); 61STATIC void xlog_dealloc_log(xlog_t *log);
62STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], 62STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
63 int nentries, xfs_log_ticket_t tic, 63 int nentries, struct xlog_ticket *tic,
64 xfs_lsn_t *start_lsn, 64 xfs_lsn_t *start_lsn,
65 xlog_in_core_t **commit_iclog, 65 xlog_in_core_t **commit_iclog,
66 uint flags); 66 uint flags);
@@ -79,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log,
79STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
80 xlog_in_core_t *iclog, 80 xlog_in_core_t *iclog,
81 int eventual_size); 81 int eventual_size);
82STATIC int xlog_state_sync(xlog_t *log,
83 xfs_lsn_t lsn,
84 uint flags,
85 int *log_flushed);
86STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
87STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 82STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
88 83
89/* local functions to manipulate grant head */ 84/* local functions to manipulate grant head */
@@ -122,85 +117,6 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
122 117
123STATIC int xlog_iclogs_empty(xlog_t *log); 118STATIC int xlog_iclogs_empty(xlog_t *log);
124 119
125#if defined(XFS_LOG_TRACE)
126
127#define XLOG_TRACE_LOGGRANT_SIZE 2048
128#define XLOG_TRACE_ICLOG_SIZE 256
129
130void
131xlog_trace_loggrant_alloc(xlog_t *log)
132{
133 log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
134}
135
136void
137xlog_trace_loggrant_dealloc(xlog_t *log)
138{
139 ktrace_free(log->l_grant_trace);
140}
141
142void
143xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
144{
145 unsigned long cnts;
146
147 /* ticket counts are 1 byte each */
148 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
149
150 ktrace_enter(log->l_grant_trace,
151 (void *)tic,
152 (void *)log->l_reserve_headq,
153 (void *)log->l_write_headq,
154 (void *)((unsigned long)log->l_grant_reserve_cycle),
155 (void *)((unsigned long)log->l_grant_reserve_bytes),
156 (void *)((unsigned long)log->l_grant_write_cycle),
157 (void *)((unsigned long)log->l_grant_write_bytes),
158 (void *)((unsigned long)log->l_curr_cycle),
159 (void *)((unsigned long)log->l_curr_block),
160 (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
161 (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
162 (void *)string,
163 (void *)((unsigned long)tic->t_trans_type),
164 (void *)cnts,
165 (void *)((unsigned long)tic->t_curr_res),
166 (void *)((unsigned long)tic->t_unit_res));
167}
168
169void
170xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
171{
172 iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
173}
174
175void
176xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
177{
178 ktrace_free(iclog->ic_trace);
179}
180
181void
182xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
183{
184 ktrace_enter(iclog->ic_trace,
185 (void *)((unsigned long)state),
186 (void *)((unsigned long)current_pid()),
187 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
188 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
189 (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
190 (void *)NULL, (void *)NULL);
191}
192#else
193
194#define xlog_trace_loggrant_alloc(log)
195#define xlog_trace_loggrant_dealloc(log)
196#define xlog_trace_loggrant(log,tic,string)
197
198#define xlog_trace_iclog_alloc(iclog)
199#define xlog_trace_iclog_dealloc(iclog)
200#define xlog_trace_iclog(iclog,state)
201
202#endif /* XFS_LOG_TRACE */
203
204 120
205static void 121static void
206xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic) 122xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
@@ -327,14 +243,14 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
327 * out when the next write occurs. 243 * out when the next write occurs.
328 */ 244 */
329xfs_lsn_t 245xfs_lsn_t
330xfs_log_done(xfs_mount_t *mp, 246xfs_log_done(
331 xfs_log_ticket_t xtic, 247 struct xfs_mount *mp,
332 void **iclog, 248 struct xlog_ticket *ticket,
333 uint flags) 249 struct xlog_in_core **iclog,
250 uint flags)
334{ 251{
335 xlog_t *log = mp->m_log; 252 struct log *log = mp->m_log;
336 xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic; 253 xfs_lsn_t lsn = 0;
337 xfs_lsn_t lsn = 0;
338 254
339 if (XLOG_FORCED_SHUTDOWN(log) || 255 if (XLOG_FORCED_SHUTDOWN(log) ||
340 /* 256 /*
@@ -342,8 +258,7 @@ xfs_log_done(xfs_mount_t *mp,
342 * If we get an error, just continue and give back the log ticket. 258 * If we get an error, just continue and give back the log ticket.
343 */ 259 */
344 (((ticket->t_flags & XLOG_TIC_INITED) == 0) && 260 (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
345 (xlog_commit_record(mp, ticket, 261 (xlog_commit_record(mp, ticket, iclog, &lsn)))) {
346 (xlog_in_core_t **)iclog, &lsn)))) {
347 lsn = (xfs_lsn_t) -1; 262 lsn = (xfs_lsn_t) -1;
348 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { 263 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
349 flags |= XFS_LOG_REL_PERM_RESERV; 264 flags |= XFS_LOG_REL_PERM_RESERV;
@@ -353,15 +268,17 @@ xfs_log_done(xfs_mount_t *mp,
353 268
354 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || 269 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
355 (flags & XFS_LOG_REL_PERM_RESERV)) { 270 (flags & XFS_LOG_REL_PERM_RESERV)) {
271 trace_xfs_log_done_nonperm(log, ticket);
272
356 /* 273 /*
357 * Release ticket if not permanent reservation or a specific 274 * Release ticket if not permanent reservation or a specific
358 * request has been made to release a permanent reservation. 275 * request has been made to release a permanent reservation.
359 */ 276 */
360 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
361 xlog_ungrant_log_space(log, ticket); 277 xlog_ungrant_log_space(log, ticket);
362 xfs_log_ticket_put(ticket); 278 xfs_log_ticket_put(ticket);
363 } else { 279 } else {
364 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 280 trace_xfs_log_done_perm(log, ticket);
281
365 xlog_regrant_reserve_log_space(log, ticket); 282 xlog_regrant_reserve_log_space(log, ticket);
366 /* If this ticket was a permanent reservation and we aren't 283 /* If this ticket was a permanent reservation and we aren't
367 * trying to release it, reset the inited flags; so next time 284 * trying to release it, reset the inited flags; so next time
@@ -371,67 +288,8 @@ xfs_log_done(xfs_mount_t *mp,
371 } 288 }
372 289
373 return lsn; 290 return lsn;
374} /* xfs_log_done */
375
376
377/*
378 * Force the in-core log to disk. If flags == XFS_LOG_SYNC,
379 * the force is done synchronously.
380 *
381 * Asynchronous forces are implemented by setting the WANT_SYNC
382 * bit in the appropriate in-core log and then returning.
383 *
384 * Synchronous forces are implemented with a signal variable. All callers
385 * to force a given lsn to disk will wait on a the sv attached to the
386 * specific in-core log. When given in-core log finally completes its
387 * write to disk, that thread will wake up all threads waiting on the
388 * sv.
389 */
390int
391_xfs_log_force(
392 xfs_mount_t *mp,
393 xfs_lsn_t lsn,
394 uint flags,
395 int *log_flushed)
396{
397 xlog_t *log = mp->m_log;
398 int dummy;
399
400 if (!log_flushed)
401 log_flushed = &dummy;
402
403 ASSERT(flags & XFS_LOG_FORCE);
404
405 XFS_STATS_INC(xs_log_force);
406
407 if (log->l_flags & XLOG_IO_ERROR)
408 return XFS_ERROR(EIO);
409 if (lsn == 0)
410 return xlog_state_sync_all(log, flags, log_flushed);
411 else
412 return xlog_state_sync(log, lsn, flags, log_flushed);
413} /* _xfs_log_force */
414
415/*
416 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
417 * about errors or whether the log was flushed or not. This is the normal
418 * interface to use when trying to unpin items or move the log forward.
419 */
420void
421xfs_log_force(
422 xfs_mount_t *mp,
423 xfs_lsn_t lsn,
424 uint flags)
425{
426 int error;
427 error = _xfs_log_force(mp, lsn, flags, NULL);
428 if (error) {
429 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
430 "error %d returned.", error);
431 }
432} 291}
433 292
434
435/* 293/*
436 * Attaches a new iclog I/O completion callback routine during 294 * Attaches a new iclog I/O completion callback routine during
437 * transaction commit. If the log is in error state, a non-zero 295 * transaction commit. If the log is in error state, a non-zero
@@ -439,11 +297,11 @@ xfs_log_force(
439 * executing the callback at an appropriate time. 297 * executing the callback at an appropriate time.
440 */ 298 */
441int 299int
442xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ 300xfs_log_notify(
443 void *iclog_hndl, /* iclog to hang callback off */ 301 struct xfs_mount *mp,
444 xfs_log_callback_t *cb) 302 struct xlog_in_core *iclog,
303 xfs_log_callback_t *cb)
445{ 304{
446 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
447 int abortflg; 305 int abortflg;
448 306
449 spin_lock(&iclog->ic_callback_lock); 307 spin_lock(&iclog->ic_callback_lock);
@@ -457,16 +315,14 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
457 } 315 }
458 spin_unlock(&iclog->ic_callback_lock); 316 spin_unlock(&iclog->ic_callback_lock);
459 return abortflg; 317 return abortflg;
460} /* xfs_log_notify */ 318}
461 319
462int 320int
463xfs_log_release_iclog(xfs_mount_t *mp, 321xfs_log_release_iclog(
464 void *iclog_hndl) 322 struct xfs_mount *mp,
323 struct xlog_in_core *iclog)
465{ 324{
466 xlog_t *log = mp->m_log; 325 if (xlog_state_release_iclog(mp->m_log, iclog)) {
467 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
468
469 if (xlog_state_release_iclog(log, iclog)) {
470 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 326 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
471 return EIO; 327 return EIO;
472 } 328 }
@@ -485,17 +341,18 @@ xfs_log_release_iclog(xfs_mount_t *mp,
485 * reservation, we prevent over allocation problems. 341 * reservation, we prevent over allocation problems.
486 */ 342 */
487int 343int
488xfs_log_reserve(xfs_mount_t *mp, 344xfs_log_reserve(
489 int unit_bytes, 345 struct xfs_mount *mp,
490 int cnt, 346 int unit_bytes,
491 xfs_log_ticket_t *ticket, 347 int cnt,
492 __uint8_t client, 348 struct xlog_ticket **ticket,
493 uint flags, 349 __uint8_t client,
494 uint t_type) 350 uint flags,
351 uint t_type)
495{ 352{
496 xlog_t *log = mp->m_log; 353 struct log *log = mp->m_log;
497 xlog_ticket_t *internal_ticket; 354 struct xlog_ticket *internal_ticket;
498 int retval = 0; 355 int retval = 0;
499 356
500 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); 357 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
501 ASSERT((flags & XFS_LOG_NOSLEEP) == 0); 358 ASSERT((flags & XFS_LOG_NOSLEEP) == 0);
@@ -505,10 +362,13 @@ xfs_log_reserve(xfs_mount_t *mp,
505 362
506 XFS_STATS_INC(xs_try_logspace); 363 XFS_STATS_INC(xs_try_logspace);
507 364
365
508 if (*ticket != NULL) { 366 if (*ticket != NULL) {
509 ASSERT(flags & XFS_LOG_PERM_RESERV); 367 ASSERT(flags & XFS_LOG_PERM_RESERV);
510 internal_ticket = (xlog_ticket_t *)*ticket; 368 internal_ticket = *ticket;
511 xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)"); 369
370 trace_xfs_log_reserve(log, internal_ticket);
371
512 xlog_grant_push_ail(mp, internal_ticket->t_unit_res); 372 xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
513 retval = xlog_regrant_write_log_space(log, internal_ticket); 373 retval = xlog_regrant_write_log_space(log, internal_ticket);
514 } else { 374 } else {
@@ -519,10 +379,9 @@ xfs_log_reserve(xfs_mount_t *mp,
519 return XFS_ERROR(ENOMEM); 379 return XFS_ERROR(ENOMEM);
520 internal_ticket->t_trans_type = t_type; 380 internal_ticket->t_trans_type = t_type;
521 *ticket = internal_ticket; 381 *ticket = internal_ticket;
522 xlog_trace_loggrant(log, internal_ticket, 382
523 (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ? 383 trace_xfs_log_reserve(log, internal_ticket);
524 "xfs_log_reserve: create new ticket (permanent trans)" : 384
525 "xfs_log_reserve: create new ticket");
526 xlog_grant_push_ail(mp, 385 xlog_grant_push_ail(mp,
527 (internal_ticket->t_unit_res * 386 (internal_ticket->t_unit_res *
528 internal_ticket->t_cnt)); 387 internal_ticket->t_cnt));
@@ -658,7 +517,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
658 xlog_in_core_t *first_iclog; 517 xlog_in_core_t *first_iclog;
659#endif 518#endif
660 xfs_log_iovec_t reg[1]; 519 xfs_log_iovec_t reg[1];
661 xfs_log_ticket_t tic = NULL; 520 xlog_ticket_t *tic = NULL;
662 xfs_lsn_t lsn; 521 xfs_lsn_t lsn;
663 int error; 522 int error;
664 523
@@ -676,7 +535,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
676 if (mp->m_flags & XFS_MOUNT_RDONLY) 535 if (mp->m_flags & XFS_MOUNT_RDONLY)
677 return 0; 536 return 0;
678 537
679 error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); 538 error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
680 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); 539 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
681 540
682#ifdef DEBUG 541#ifdef DEBUG
@@ -692,7 +551,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
692 if (! (XLOG_FORCED_SHUTDOWN(log))) { 551 if (! (XLOG_FORCED_SHUTDOWN(log))) {
693 reg[0].i_addr = (void*)&magic; 552 reg[0].i_addr = (void*)&magic;
694 reg[0].i_len = sizeof(magic); 553 reg[0].i_len = sizeof(magic);
695 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT); 554 reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
696 555
697 error = xfs_log_reserve(mp, 600, 1, &tic, 556 error = xfs_log_reserve(mp, 600, 1, &tic,
698 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); 557 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
@@ -734,7 +593,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
734 spin_unlock(&log->l_icloglock); 593 spin_unlock(&log->l_icloglock);
735 } 594 }
736 if (tic) { 595 if (tic) {
737 xlog_trace_loggrant(log, tic, "unmount rec"); 596 trace_xfs_log_umount_write(log, tic);
738 xlog_ungrant_log_space(log, tic); 597 xlog_ungrant_log_space(log, tic);
739 xfs_log_ticket_put(tic); 598 xfs_log_ticket_put(tic);
740 } 599 }
@@ -795,24 +654,24 @@ xfs_log_unmount(xfs_mount_t *mp)
795 * transaction occur with one call to xfs_log_write(). 654 * transaction occur with one call to xfs_log_write().
796 */ 655 */
797int 656int
798xfs_log_write(xfs_mount_t * mp, 657xfs_log_write(
799 xfs_log_iovec_t reg[], 658 struct xfs_mount *mp,
800 int nentries, 659 struct xfs_log_iovec reg[],
801 xfs_log_ticket_t tic, 660 int nentries,
802 xfs_lsn_t *start_lsn) 661 struct xlog_ticket *tic,
662 xfs_lsn_t *start_lsn)
803{ 663{
804 int error; 664 struct log *log = mp->m_log;
805 xlog_t *log = mp->m_log; 665 int error;
806 666
807 if (XLOG_FORCED_SHUTDOWN(log)) 667 if (XLOG_FORCED_SHUTDOWN(log))
808 return XFS_ERROR(EIO); 668 return XFS_ERROR(EIO);
809 669
810 if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) { 670 error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0);
671 if (error)
811 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 672 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
812 }
813 return error; 673 return error;
814} /* xfs_log_write */ 674}
815
816 675
817void 676void
818xfs_log_move_tail(xfs_mount_t *mp, 677xfs_log_move_tail(xfs_mount_t *mp,
@@ -886,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t *mp,
886 745
887/* 746/*
888 * Determine if we have a transaction that has gone to disk 747 * Determine if we have a transaction that has gone to disk
889 * that needs to be covered. Log activity needs to be idle (no AIL and 748 * that needs to be covered. To begin the transition to the idle state
890 * nothing in the iclogs). And, we need to be in the right state indicating 749 * firstly the log needs to be idle (no AIL and nothing in the iclogs).
891 * something has gone out. 750 * If we are then in a state where covering is needed, the caller is informed
751 * that dummy transactions are required to move the log into the idle state.
752 *
753 * Because this is called as part of the sync process, we should also indicate
754 * that dummy transactions should be issued in anything but the covered or
755 * idle states. This ensures that the log tail is accurately reflected in
756 * the log at the end of the sync, hence if a crash occurrs avoids replay
757 * of transactions where the metadata is already on disk.
892 */ 758 */
893int 759int
894xfs_log_need_covered(xfs_mount_t *mp) 760xfs_log_need_covered(xfs_mount_t *mp)
@@ -900,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp)
900 return 0; 766 return 0;
901 767
902 spin_lock(&log->l_icloglock); 768 spin_lock(&log->l_icloglock);
903 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || 769 switch (log->l_covered_state) {
904 (log->l_covered_state == XLOG_STATE_COVER_NEED2)) 770 case XLOG_STATE_COVER_DONE:
905 && !xfs_trans_ail_tail(log->l_ailp) 771 case XLOG_STATE_COVER_DONE2:
906 && xlog_iclogs_empty(log)) { 772 case XLOG_STATE_COVER_IDLE:
907 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 773 break;
908 log->l_covered_state = XLOG_STATE_COVER_DONE; 774 case XLOG_STATE_COVER_NEED:
909 else { 775 case XLOG_STATE_COVER_NEED2:
910 ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2); 776 if (!xfs_trans_ail_tail(log->l_ailp) &&
911 log->l_covered_state = XLOG_STATE_COVER_DONE2; 777 xlog_iclogs_empty(log)) {
778 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
779 log->l_covered_state = XLOG_STATE_COVER_DONE;
780 else
781 log->l_covered_state = XLOG_STATE_COVER_DONE2;
912 } 782 }
783 /* FALLTHRU */
784 default:
913 needed = 1; 785 needed = 1;
786 break;
914 } 787 }
915 spin_unlock(&log->l_icloglock); 788 spin_unlock(&log->l_icloglock);
916 return needed; 789 return needed;
@@ -1030,7 +903,6 @@ xlog_iodone(xfs_buf_t *bp)
1030 xfs_fs_cmn_err(CE_WARN, l->l_mp, 903 xfs_fs_cmn_err(CE_WARN, l->l_mp,
1031 "xlog_iodone: Barriers are no longer supported" 904 "xlog_iodone: Barriers are no longer supported"
1032 " by device. Disabling barriers\n"); 905 " by device. Disabling barriers\n");
1033 xfs_buftrace("XLOG_IODONE BARRIERS OFF", bp);
1034 } 906 }
1035 907
1036 /* 908 /*
@@ -1063,38 +935,6 @@ xlog_iodone(xfs_buf_t *bp)
1063} /* xlog_iodone */ 935} /* xlog_iodone */
1064 936
1065/* 937/*
1066 * The bdstrat callback function for log bufs. This gives us a central
1067 * place to trap bufs in case we get hit by a log I/O error and need to
1068 * shutdown. Actually, in practice, even when we didn't get a log error,
1069 * we transition the iclogs to IOERROR state *after* flushing all existing
1070 * iclogs to disk. This is because we don't want anymore new transactions to be
1071 * started or completed afterwards.
1072 */
1073STATIC int
1074xlog_bdstrat_cb(struct xfs_buf *bp)
1075{
1076 xlog_in_core_t *iclog;
1077
1078 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1079
1080 if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) {
1081 /* note for irix bstrat will need struct bdevsw passed
1082 * Fix the following macro if the code ever is merged
1083 */
1084 XFS_bdstrat(bp);
1085 return 0;
1086 }
1087
1088 xfs_buftrace("XLOG__BDSTRAT IOERROR", bp);
1089 XFS_BUF_ERROR(bp, EIO);
1090 XFS_BUF_STALE(bp);
1091 xfs_biodone(bp);
1092 return XFS_ERROR(EIO);
1093
1094
1095}
1096
1097/*
1098 * Return size of each in-core log record buffer. 938 * Return size of each in-core log record buffer.
1099 * 939 *
1100 * All machines get 8 x 32kB buffers by default, unless tuned otherwise. 940 * All machines get 8 x 32kB buffers by default, unless tuned otherwise.
@@ -1236,7 +1076,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1236 if (!bp) 1076 if (!bp)
1237 goto out_free_log; 1077 goto out_free_log;
1238 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1078 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1239 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1240 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1079 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1241 ASSERT(XFS_BUF_ISBUSY(bp)); 1080 ASSERT(XFS_BUF_ISBUSY(bp));
1242 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 1081 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
@@ -1246,7 +1085,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1246 spin_lock_init(&log->l_grant_lock); 1085 spin_lock_init(&log->l_grant_lock);
1247 sv_init(&log->l_flush_wait, 0, "flush_wait"); 1086 sv_init(&log->l_flush_wait, 0, "flush_wait");
1248 1087
1249 xlog_trace_loggrant_alloc(log);
1250 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1088 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1251 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1089 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
1252 1090
@@ -1275,7 +1113,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1275 if (!XFS_BUF_CPSEMA(bp)) 1113 if (!XFS_BUF_CPSEMA(bp))
1276 ASSERT(0); 1114 ASSERT(0);
1277 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1115 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1278 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1279 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1116 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1280 iclog->ic_bp = bp; 1117 iclog->ic_bp = bp;
1281 iclog->ic_data = bp->b_addr; 1118 iclog->ic_data = bp->b_addr;
@@ -1305,8 +1142,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1305 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); 1142 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1306 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); 1143 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1307 1144
1308 xlog_trace_iclog_alloc(iclog);
1309
1310 iclogp = &iclog->ic_next; 1145 iclogp = &iclog->ic_next;
1311 } 1146 }
1312 *iclogp = log->l_iclog; /* complete ring */ 1147 *iclogp = log->l_iclog; /* complete ring */
@@ -1321,13 +1156,11 @@ out_free_iclog:
1321 sv_destroy(&iclog->ic_force_wait); 1156 sv_destroy(&iclog->ic_force_wait);
1322 sv_destroy(&iclog->ic_write_wait); 1157 sv_destroy(&iclog->ic_write_wait);
1323 xfs_buf_free(iclog->ic_bp); 1158 xfs_buf_free(iclog->ic_bp);
1324 xlog_trace_iclog_dealloc(iclog);
1325 } 1159 }
1326 kmem_free(iclog); 1160 kmem_free(iclog);
1327 } 1161 }
1328 spinlock_destroy(&log->l_icloglock); 1162 spinlock_destroy(&log->l_icloglock);
1329 spinlock_destroy(&log->l_grant_lock); 1163 spinlock_destroy(&log->l_grant_lock);
1330 xlog_trace_loggrant_dealloc(log);
1331 xfs_buf_free(log->l_xbuf); 1164 xfs_buf_free(log->l_xbuf);
1332out_free_log: 1165out_free_log:
1333 kmem_free(log); 1166 kmem_free(log);
@@ -1351,7 +1184,7 @@ xlog_commit_record(xfs_mount_t *mp,
1351 1184
1352 reg[0].i_addr = NULL; 1185 reg[0].i_addr = NULL;
1353 reg[0].i_len = 0; 1186 reg[0].i_len = 0;
1354 XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT); 1187 reg[0].i_type = XLOG_REG_TYPE_COMMIT;
1355 1188
1356 ASSERT_ALWAYS(iclog); 1189 ASSERT_ALWAYS(iclog);
1357 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1190 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1426,6 +1259,37 @@ xlog_grant_push_ail(xfs_mount_t *mp,
1426 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1259 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1427} /* xlog_grant_push_ail */ 1260} /* xlog_grant_push_ail */
1428 1261
1262/*
1263 * The bdstrat callback function for log bufs. This gives us a central
1264 * place to trap bufs in case we get hit by a log I/O error and need to
1265 * shutdown. Actually, in practice, even when we didn't get a log error,
1266 * we transition the iclogs to IOERROR state *after* flushing all existing
1267 * iclogs to disk. This is because we don't want anymore new transactions to be
1268 * started or completed afterwards.
1269 */
1270STATIC int
1271xlog_bdstrat(
1272 struct xfs_buf *bp)
1273{
1274 struct xlog_in_core *iclog;
1275
1276 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1277 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1278 XFS_BUF_ERROR(bp, EIO);
1279 XFS_BUF_STALE(bp);
1280 xfs_biodone(bp);
1281 /*
1282 * It would seem logical to return EIO here, but we rely on
1283 * the log state machine to propagate I/O errors instead of
1284 * doing it here.
1285 */
1286 return 0;
1287 }
1288
1289 bp->b_flags |= _XBF_RUN_QUEUES;
1290 xfs_buf_iorequest(bp);
1291 return 0;
1292}
1429 1293
1430/* 1294/*
1431 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous 1295 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous
@@ -1524,6 +1388,7 @@ xlog_sync(xlog_t *log,
1524 XFS_BUF_ZEROFLAGS(bp); 1388 XFS_BUF_ZEROFLAGS(bp);
1525 XFS_BUF_BUSY(bp); 1389 XFS_BUF_BUSY(bp);
1526 XFS_BUF_ASYNC(bp); 1390 XFS_BUF_ASYNC(bp);
1391 bp->b_flags |= XBF_LOG_BUFFER;
1527 /* 1392 /*
1528 * Do an ordered write for the log block. 1393 * Do an ordered write for the log block.
1529 * Its unnecessary to flush the first split block in the log wrap case. 1394 * Its unnecessary to flush the first split block in the log wrap case.
@@ -1544,7 +1409,7 @@ xlog_sync(xlog_t *log,
1544 */ 1409 */
1545 XFS_BUF_WRITE(bp); 1410 XFS_BUF_WRITE(bp);
1546 1411
1547 if ((error = XFS_bwrite(bp))) { 1412 if ((error = xlog_bdstrat(bp))) {
1548 xfs_ioerror_alert("xlog_sync", log->l_mp, bp, 1413 xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
1549 XFS_BUF_ADDR(bp)); 1414 XFS_BUF_ADDR(bp));
1550 return error; 1415 return error;
@@ -1561,6 +1426,7 @@ xlog_sync(xlog_t *log,
1561 XFS_BUF_ZEROFLAGS(bp); 1426 XFS_BUF_ZEROFLAGS(bp);
1562 XFS_BUF_BUSY(bp); 1427 XFS_BUF_BUSY(bp);
1563 XFS_BUF_ASYNC(bp); 1428 XFS_BUF_ASYNC(bp);
1429 bp->b_flags |= XBF_LOG_BUFFER;
1564 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1430 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1565 XFS_BUF_ORDERED(bp); 1431 XFS_BUF_ORDERED(bp);
1566 dptr = XFS_BUF_PTR(bp); 1432 dptr = XFS_BUF_PTR(bp);
@@ -1583,7 +1449,7 @@ xlog_sync(xlog_t *log,
1583 /* account for internal log which doesn't start at block #0 */ 1449 /* account for internal log which doesn't start at block #0 */
1584 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1450 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1585 XFS_BUF_WRITE(bp); 1451 XFS_BUF_WRITE(bp);
1586 if ((error = XFS_bwrite(bp))) { 1452 if ((error = xlog_bdstrat(bp))) {
1587 xfs_ioerror_alert("xlog_sync (split)", log->l_mp, 1453 xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
1588 bp, XFS_BUF_ADDR(bp)); 1454 bp, XFS_BUF_ADDR(bp));
1589 return error; 1455 return error;
@@ -1607,7 +1473,6 @@ xlog_dealloc_log(xlog_t *log)
1607 sv_destroy(&iclog->ic_force_wait); 1473 sv_destroy(&iclog->ic_force_wait);
1608 sv_destroy(&iclog->ic_write_wait); 1474 sv_destroy(&iclog->ic_write_wait);
1609 xfs_buf_free(iclog->ic_bp); 1475 xfs_buf_free(iclog->ic_bp);
1610 xlog_trace_iclog_dealloc(iclog);
1611 next_iclog = iclog->ic_next; 1476 next_iclog = iclog->ic_next;
1612 kmem_free(iclog); 1477 kmem_free(iclog);
1613 iclog = next_iclog; 1478 iclog = next_iclog;
@@ -1616,7 +1481,6 @@ xlog_dealloc_log(xlog_t *log)
1616 spinlock_destroy(&log->l_grant_lock); 1481 spinlock_destroy(&log->l_grant_lock);
1617 1482
1618 xfs_buf_free(log->l_xbuf); 1483 xfs_buf_free(log->l_xbuf);
1619 xlog_trace_loggrant_dealloc(log);
1620 log->l_mp->m_log = NULL; 1484 log->l_mp->m_log = NULL;
1621 kmem_free(log); 1485 kmem_free(log);
1622} /* xlog_dealloc_log */ 1486} /* xlog_dealloc_log */
@@ -1790,16 +1654,16 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1790 * bytes have been written out. 1654 * bytes have been written out.
1791 */ 1655 */
1792STATIC int 1656STATIC int
1793xlog_write(xfs_mount_t * mp, 1657xlog_write(
1794 xfs_log_iovec_t reg[], 1658 struct xfs_mount *mp,
1795 int nentries, 1659 struct xfs_log_iovec reg[],
1796 xfs_log_ticket_t tic, 1660 int nentries,
1797 xfs_lsn_t *start_lsn, 1661 struct xlog_ticket *ticket,
1798 xlog_in_core_t **commit_iclog, 1662 xfs_lsn_t *start_lsn,
1799 uint flags) 1663 struct xlog_in_core **commit_iclog,
1664 uint flags)
1800{ 1665{
1801 xlog_t *log = mp->m_log; 1666 xlog_t *log = mp->m_log;
1802 xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
1803 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ 1667 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
1804 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1668 xlog_op_header_t *logop_head; /* ptr to log operation header */
1805 __psint_t ptr; /* copy address into data region */ 1669 __psint_t ptr; /* copy address into data region */
@@ -1913,7 +1777,7 @@ xlog_write(xfs_mount_t * mp,
1913 default: 1777 default:
1914 xfs_fs_cmn_err(CE_WARN, mp, 1778 xfs_fs_cmn_err(CE_WARN, mp,
1915 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1779 "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1916 logop_head->oh_clientid, tic); 1780 logop_head->oh_clientid, ticket);
1917 return XFS_ERROR(EIO); 1781 return XFS_ERROR(EIO);
1918 } 1782 }
1919 1783
@@ -2414,7 +2278,6 @@ restart:
2414 2278
2415 iclog = log->l_iclog; 2279 iclog = log->l_iclog;
2416 if (iclog->ic_state != XLOG_STATE_ACTIVE) { 2280 if (iclog->ic_state != XLOG_STATE_ACTIVE) {
2417 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
2418 XFS_STATS_INC(xs_log_noiclogs); 2281 XFS_STATS_INC(xs_log_noiclogs);
2419 2282
2420 /* Wait for log writes to have flushed */ 2283 /* Wait for log writes to have flushed */
@@ -2520,13 +2383,15 @@ xlog_grant_log_space(xlog_t *log,
2520 2383
2521 /* Is there space or do we need to sleep? */ 2384 /* Is there space or do we need to sleep? */
2522 spin_lock(&log->l_grant_lock); 2385 spin_lock(&log->l_grant_lock);
2523 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: enter"); 2386
2387 trace_xfs_log_grant_enter(log, tic);
2524 2388
2525 /* something is already sleeping; insert new transaction at end */ 2389 /* something is already sleeping; insert new transaction at end */
2526 if (log->l_reserve_headq) { 2390 if (log->l_reserve_headq) {
2527 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2391 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2528 xlog_trace_loggrant(log, tic, 2392
2529 "xlog_grant_log_space: sleep 1"); 2393 trace_xfs_log_grant_sleep1(log, tic);
2394
2530 /* 2395 /*
2531 * Gotta check this before going to sleep, while we're 2396 * Gotta check this before going to sleep, while we're
2532 * holding the grant lock. 2397 * holding the grant lock.
@@ -2540,8 +2405,7 @@ xlog_grant_log_space(xlog_t *log,
2540 * If we got an error, and the filesystem is shutting down, 2405 * If we got an error, and the filesystem is shutting down,
2541 * we'll catch it down below. So just continue... 2406 * we'll catch it down below. So just continue...
2542 */ 2407 */
2543 xlog_trace_loggrant(log, tic, 2408 trace_xfs_log_grant_wake1(log, tic);
2544 "xlog_grant_log_space: wake 1");
2545 spin_lock(&log->l_grant_lock); 2409 spin_lock(&log->l_grant_lock);
2546 } 2410 }
2547 if (tic->t_flags & XFS_LOG_PERM_RESERV) 2411 if (tic->t_flags & XFS_LOG_PERM_RESERV)
@@ -2558,8 +2422,9 @@ redo:
2558 if (free_bytes < need_bytes) { 2422 if (free_bytes < need_bytes) {
2559 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2423 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2560 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2424 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2561 xlog_trace_loggrant(log, tic, 2425
2562 "xlog_grant_log_space: sleep 2"); 2426 trace_xfs_log_grant_sleep2(log, tic);
2427
2563 spin_unlock(&log->l_grant_lock); 2428 spin_unlock(&log->l_grant_lock);
2564 xlog_grant_push_ail(log->l_mp, need_bytes); 2429 xlog_grant_push_ail(log->l_mp, need_bytes);
2565 spin_lock(&log->l_grant_lock); 2430 spin_lock(&log->l_grant_lock);
@@ -2571,8 +2436,8 @@ redo:
2571 if (XLOG_FORCED_SHUTDOWN(log)) 2436 if (XLOG_FORCED_SHUTDOWN(log))
2572 goto error_return; 2437 goto error_return;
2573 2438
2574 xlog_trace_loggrant(log, tic, 2439 trace_xfs_log_grant_wake2(log, tic);
2575 "xlog_grant_log_space: wake 2"); 2440
2576 goto redo; 2441 goto redo;
2577 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2442 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2578 xlog_del_ticketq(&log->l_reserve_headq, tic); 2443 xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2592,7 +2457,7 @@ redo:
2592 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn))); 2457 ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
2593 } 2458 }
2594#endif 2459#endif
2595 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: exit"); 2460 trace_xfs_log_grant_exit(log, tic);
2596 xlog_verify_grant_head(log, 1); 2461 xlog_verify_grant_head(log, 1);
2597 spin_unlock(&log->l_grant_lock); 2462 spin_unlock(&log->l_grant_lock);
2598 return 0; 2463 return 0;
@@ -2600,7 +2465,9 @@ redo:
2600 error_return: 2465 error_return:
2601 if (tic->t_flags & XLOG_TIC_IN_Q) 2466 if (tic->t_flags & XLOG_TIC_IN_Q)
2602 xlog_del_ticketq(&log->l_reserve_headq, tic); 2467 xlog_del_ticketq(&log->l_reserve_headq, tic);
2603 xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret"); 2468
2469 trace_xfs_log_grant_error(log, tic);
2470
2604 /* 2471 /*
2605 * If we are failing, make sure the ticket doesn't have any 2472 * If we are failing, make sure the ticket doesn't have any
2606 * current reservations. We don't want to add this back when 2473 * current reservations. We don't want to add this back when
@@ -2640,7 +2507,8 @@ xlog_regrant_write_log_space(xlog_t *log,
2640#endif 2507#endif
2641 2508
2642 spin_lock(&log->l_grant_lock); 2509 spin_lock(&log->l_grant_lock);
2643 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: enter"); 2510
2511 trace_xfs_log_regrant_write_enter(log, tic);
2644 2512
2645 if (XLOG_FORCED_SHUTDOWN(log)) 2513 if (XLOG_FORCED_SHUTDOWN(log))
2646 goto error_return; 2514 goto error_return;
@@ -2669,8 +2537,8 @@ xlog_regrant_write_log_space(xlog_t *log,
2669 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2537 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2670 xlog_ins_ticketq(&log->l_write_headq, tic); 2538 xlog_ins_ticketq(&log->l_write_headq, tic);
2671 2539
2672 xlog_trace_loggrant(log, tic, 2540 trace_xfs_log_regrant_write_sleep1(log, tic);
2673 "xlog_regrant_write_log_space: sleep 1"); 2541
2674 spin_unlock(&log->l_grant_lock); 2542 spin_unlock(&log->l_grant_lock);
2675 xlog_grant_push_ail(log->l_mp, need_bytes); 2543 xlog_grant_push_ail(log->l_mp, need_bytes);
2676 spin_lock(&log->l_grant_lock); 2544 spin_lock(&log->l_grant_lock);
@@ -2685,8 +2553,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2685 if (XLOG_FORCED_SHUTDOWN(log)) 2553 if (XLOG_FORCED_SHUTDOWN(log))
2686 goto error_return; 2554 goto error_return;
2687 2555
2688 xlog_trace_loggrant(log, tic, 2556 trace_xfs_log_regrant_write_wake1(log, tic);
2689 "xlog_regrant_write_log_space: wake 1");
2690 } 2557 }
2691 } 2558 }
2692 2559
@@ -2704,6 +2571,8 @@ redo:
2704 spin_lock(&log->l_grant_lock); 2571 spin_lock(&log->l_grant_lock);
2705 2572
2706 XFS_STATS_INC(xs_sleep_logspace); 2573 XFS_STATS_INC(xs_sleep_logspace);
2574 trace_xfs_log_regrant_write_sleep2(log, tic);
2575
2707 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2576 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2708 2577
2709 /* If we're shutting down, this tic is already off the queue */ 2578 /* If we're shutting down, this tic is already off the queue */
@@ -2711,8 +2580,7 @@ redo:
2711 if (XLOG_FORCED_SHUTDOWN(log)) 2580 if (XLOG_FORCED_SHUTDOWN(log))
2712 goto error_return; 2581 goto error_return;
2713 2582
2714 xlog_trace_loggrant(log, tic, 2583 trace_xfs_log_regrant_write_wake2(log, tic);
2715 "xlog_regrant_write_log_space: wake 2");
2716 goto redo; 2584 goto redo;
2717 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2585 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2718 xlog_del_ticketq(&log->l_write_headq, tic); 2586 xlog_del_ticketq(&log->l_write_headq, tic);
@@ -2727,7 +2595,8 @@ redo:
2727 } 2595 }
2728#endif 2596#endif
2729 2597
2730 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: exit"); 2598 trace_xfs_log_regrant_write_exit(log, tic);
2599
2731 xlog_verify_grant_head(log, 1); 2600 xlog_verify_grant_head(log, 1);
2732 spin_unlock(&log->l_grant_lock); 2601 spin_unlock(&log->l_grant_lock);
2733 return 0; 2602 return 0;
@@ -2736,7 +2605,9 @@ redo:
2736 error_return: 2605 error_return:
2737 if (tic->t_flags & XLOG_TIC_IN_Q) 2606 if (tic->t_flags & XLOG_TIC_IN_Q)
2738 xlog_del_ticketq(&log->l_reserve_headq, tic); 2607 xlog_del_ticketq(&log->l_reserve_headq, tic);
2739 xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret"); 2608
2609 trace_xfs_log_regrant_write_error(log, tic);
2610
2740 /* 2611 /*
2741 * If we are failing, make sure the ticket doesn't have any 2612 * If we are failing, make sure the ticket doesn't have any
2742 * current reservations. We don't want to add this back when 2613 * current reservations. We don't want to add this back when
@@ -2760,8 +2631,8 @@ STATIC void
2760xlog_regrant_reserve_log_space(xlog_t *log, 2631xlog_regrant_reserve_log_space(xlog_t *log,
2761 xlog_ticket_t *ticket) 2632 xlog_ticket_t *ticket)
2762{ 2633{
2763 xlog_trace_loggrant(log, ticket, 2634 trace_xfs_log_regrant_reserve_enter(log, ticket);
2764 "xlog_regrant_reserve_log_space: enter"); 2635
2765 if (ticket->t_cnt > 0) 2636 if (ticket->t_cnt > 0)
2766 ticket->t_cnt--; 2637 ticket->t_cnt--;
2767 2638
@@ -2769,8 +2640,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2769 xlog_grant_sub_space(log, ticket->t_curr_res); 2640 xlog_grant_sub_space(log, ticket->t_curr_res);
2770 ticket->t_curr_res = ticket->t_unit_res; 2641 ticket->t_curr_res = ticket->t_unit_res;
2771 xlog_tic_reset_res(ticket); 2642 xlog_tic_reset_res(ticket);
2772 xlog_trace_loggrant(log, ticket, 2643
2773 "xlog_regrant_reserve_log_space: sub current res"); 2644 trace_xfs_log_regrant_reserve_sub(log, ticket);
2645
2774 xlog_verify_grant_head(log, 1); 2646 xlog_verify_grant_head(log, 1);
2775 2647
2776 /* just return if we still have some of the pre-reserved space */ 2648 /* just return if we still have some of the pre-reserved space */
@@ -2780,8 +2652,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2780 } 2652 }
2781 2653
2782 xlog_grant_add_space_reserve(log, ticket->t_unit_res); 2654 xlog_grant_add_space_reserve(log, ticket->t_unit_res);
2783 xlog_trace_loggrant(log, ticket, 2655
2784 "xlog_regrant_reserve_log_space: exit"); 2656 trace_xfs_log_regrant_reserve_exit(log, ticket);
2657
2785 xlog_verify_grant_head(log, 0); 2658 xlog_verify_grant_head(log, 0);
2786 spin_unlock(&log->l_grant_lock); 2659 spin_unlock(&log->l_grant_lock);
2787 ticket->t_curr_res = ticket->t_unit_res; 2660 ticket->t_curr_res = ticket->t_unit_res;
@@ -2811,11 +2684,11 @@ xlog_ungrant_log_space(xlog_t *log,
2811 ticket->t_cnt--; 2684 ticket->t_cnt--;
2812 2685
2813 spin_lock(&log->l_grant_lock); 2686 spin_lock(&log->l_grant_lock);
2814 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter"); 2687 trace_xfs_log_ungrant_enter(log, ticket);
2815 2688
2816 xlog_grant_sub_space(log, ticket->t_curr_res); 2689 xlog_grant_sub_space(log, ticket->t_curr_res);
2817 2690
2818 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current"); 2691 trace_xfs_log_ungrant_sub(log, ticket);
2819 2692
2820 /* If this is a permanent reservation ticket, we may be able to free 2693 /* If this is a permanent reservation ticket, we may be able to free
2821 * up more space based on the remaining count. 2694 * up more space based on the remaining count.
@@ -2825,7 +2698,8 @@ xlog_ungrant_log_space(xlog_t *log,
2825 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt); 2698 xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
2826 } 2699 }
2827 2700
2828 xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit"); 2701 trace_xfs_log_ungrant_exit(log, ticket);
2702
2829 xlog_verify_grant_head(log, 1); 2703 xlog_verify_grant_head(log, 1);
2830 spin_unlock(&log->l_grant_lock); 2704 spin_unlock(&log->l_grant_lock);
2831 xfs_log_move_tail(log->l_mp, 1); 2705 xfs_log_move_tail(log->l_mp, 1);
@@ -2927,7 +2801,6 @@ xlog_state_switch_iclogs(xlog_t *log,
2927 log->l_iclog = iclog->ic_next; 2801 log->l_iclog = iclog->ic_next;
2928} /* xlog_state_switch_iclogs */ 2802} /* xlog_state_switch_iclogs */
2929 2803
2930
2931/* 2804/*
2932 * Write out all data in the in-core log as of this exact moment in time. 2805 * Write out all data in the in-core log as of this exact moment in time.
2933 * 2806 *
@@ -2955,11 +2828,17 @@ xlog_state_switch_iclogs(xlog_t *log,
2955 * b) when we return from flushing out this iclog, it is still 2828 * b) when we return from flushing out this iclog, it is still
2956 * not in the active nor dirty state. 2829 * not in the active nor dirty state.
2957 */ 2830 */
2958STATIC int 2831int
2959xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) 2832_xfs_log_force(
2833 struct xfs_mount *mp,
2834 uint flags,
2835 int *log_flushed)
2960{ 2836{
2961 xlog_in_core_t *iclog; 2837 struct log *log = mp->m_log;
2962 xfs_lsn_t lsn; 2838 struct xlog_in_core *iclog;
2839 xfs_lsn_t lsn;
2840
2841 XFS_STATS_INC(xs_log_force);
2963 2842
2964 spin_lock(&log->l_icloglock); 2843 spin_lock(&log->l_icloglock);
2965 2844
@@ -3005,7 +2884,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
3005 2884
3006 if (xlog_state_release_iclog(log, iclog)) 2885 if (xlog_state_release_iclog(log, iclog))
3007 return XFS_ERROR(EIO); 2886 return XFS_ERROR(EIO);
3008 *log_flushed = 1; 2887
2888 if (log_flushed)
2889 *log_flushed = 1;
3009 spin_lock(&log->l_icloglock); 2890 spin_lock(&log->l_icloglock);
3010 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && 2891 if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
3011 iclog->ic_state != XLOG_STATE_DIRTY) 2892 iclog->ic_state != XLOG_STATE_DIRTY)
@@ -3049,19 +2930,37 @@ maybe_sleep:
3049 */ 2930 */
3050 if (iclog->ic_state & XLOG_STATE_IOERROR) 2931 if (iclog->ic_state & XLOG_STATE_IOERROR)
3051 return XFS_ERROR(EIO); 2932 return XFS_ERROR(EIO);
3052 *log_flushed = 1; 2933 if (log_flushed)
3053 2934 *log_flushed = 1;
3054 } else { 2935 } else {
3055 2936
3056no_sleep: 2937no_sleep:
3057 spin_unlock(&log->l_icloglock); 2938 spin_unlock(&log->l_icloglock);
3058 } 2939 }
3059 return 0; 2940 return 0;
3060} /* xlog_state_sync_all */ 2941}
2942
2943/*
2944 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
2945 * about errors or whether the log was flushed or not. This is the normal
2946 * interface to use when trying to unpin items or move the log forward.
2947 */
2948void
2949xfs_log_force(
2950 xfs_mount_t *mp,
2951 uint flags)
2952{
2953 int error;
3061 2954
2955 error = _xfs_log_force(mp, flags, NULL);
2956 if (error) {
2957 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
2958 "error %d returned.", error);
2959 }
2960}
3062 2961
3063/* 2962/*
3064 * Used by code which implements synchronous log forces. 2963 * Force the in-core log to disk for a specific LSN.
3065 * 2964 *
3066 * Find in-core log with lsn. 2965 * Find in-core log with lsn.
3067 * If it is in the DIRTY state, just return. 2966 * If it is in the DIRTY state, just return.
@@ -3069,109 +2968,142 @@ no_sleep:
3069 * state and go to sleep or return. 2968 * state and go to sleep or return.
3070 * If it is in any other state, go to sleep or return. 2969 * If it is in any other state, go to sleep or return.
3071 * 2970 *
3072 * If filesystem activity goes to zero, the iclog will get flushed only by 2971 * Synchronous forces are implemented with a signal variable. All callers
3073 * bdflush(). 2972 * to force a given lsn to disk will wait on a the sv attached to the
2973 * specific in-core log. When given in-core log finally completes its
2974 * write to disk, that thread will wake up all threads waiting on the
2975 * sv.
3074 */ 2976 */
3075STATIC int 2977int
3076xlog_state_sync(xlog_t *log, 2978_xfs_log_force_lsn(
3077 xfs_lsn_t lsn, 2979 struct xfs_mount *mp,
3078 uint flags, 2980 xfs_lsn_t lsn,
3079 int *log_flushed) 2981 uint flags,
2982 int *log_flushed)
3080{ 2983{
3081 xlog_in_core_t *iclog; 2984 struct log *log = mp->m_log;
3082 int already_slept = 0; 2985 struct xlog_in_core *iclog;
2986 int already_slept = 0;
3083 2987
3084try_again: 2988 ASSERT(lsn != 0);
3085 spin_lock(&log->l_icloglock);
3086 iclog = log->l_iclog;
3087 2989
3088 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2990 XFS_STATS_INC(xs_log_force);
3089 spin_unlock(&log->l_icloglock);
3090 return XFS_ERROR(EIO);
3091 }
3092
3093 do {
3094 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3095 iclog = iclog->ic_next;
3096 continue;
3097 }
3098 2991
3099 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2992try_again:
2993 spin_lock(&log->l_icloglock);
2994 iclog = log->l_iclog;
2995 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3100 spin_unlock(&log->l_icloglock); 2996 spin_unlock(&log->l_icloglock);
3101 return 0; 2997 return XFS_ERROR(EIO);
3102 } 2998 }
3103 2999
3104 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3000 do {
3105 /* 3001 if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
3106 * We sleep here if we haven't already slept (e.g. 3002 iclog = iclog->ic_next;
3107 * this is the first time we've looked at the correct 3003 continue;
3108 * iclog buf) and the buffer before us is going to 3004 }
3109 * be sync'ed. The reason for this is that if we 3005
3110 * are doing sync transactions here, by waiting for 3006 if (iclog->ic_state == XLOG_STATE_DIRTY) {
3111 * the previous I/O to complete, we can allow a few 3007 spin_unlock(&log->l_icloglock);
3112 * more transactions into this iclog before we close 3008 return 0;
3113 * it down. 3009 }
3114 * 3010
3115 * Otherwise, we mark the buffer WANT_SYNC, and bump 3011 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3116 * up the refcnt so we can release the log (which drops 3012 /*
3117 * the ref count). The state switch keeps new transaction 3013 * We sleep here if we haven't already slept (e.g.
3118 * commits from using this buffer. When the current commits 3014 * this is the first time we've looked at the correct
3119 * finish writing into the buffer, the refcount will drop to 3015 * iclog buf) and the buffer before us is going to
3120 * zero and the buffer will go out then. 3016 * be sync'ed. The reason for this is that if we
3121 */ 3017 * are doing sync transactions here, by waiting for
3122 if (!already_slept && 3018 * the previous I/O to complete, we can allow a few
3123 (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | 3019 * more transactions into this iclog before we close
3124 XLOG_STATE_SYNCING))) { 3020 * it down.
3125 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3021 *
3126 XFS_STATS_INC(xs_log_force_sleep); 3022 * Otherwise, we mark the buffer WANT_SYNC, and bump
3127 sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, 3023 * up the refcnt so we can release the log (which
3128 &log->l_icloglock, s); 3024 * drops the ref count). The state switch keeps new
3129 *log_flushed = 1; 3025 * transaction commits from using this buffer. When
3130 already_slept = 1; 3026 * the current commits finish writing into the buffer,
3131 goto try_again; 3027 * the refcount will drop to zero and the buffer will
3132 } else { 3028 * go out then.
3029 */
3030 if (!already_slept &&
3031 (iclog->ic_prev->ic_state &
3032 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3033 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3034
3035 XFS_STATS_INC(xs_log_force_sleep);
3036
3037 sv_wait(&iclog->ic_prev->ic_write_wait,
3038 PSWP, &log->l_icloglock, s);
3039 if (log_flushed)
3040 *log_flushed = 1;
3041 already_slept = 1;
3042 goto try_again;
3043 }
3133 atomic_inc(&iclog->ic_refcnt); 3044 atomic_inc(&iclog->ic_refcnt);
3134 xlog_state_switch_iclogs(log, iclog, 0); 3045 xlog_state_switch_iclogs(log, iclog, 0);
3135 spin_unlock(&log->l_icloglock); 3046 spin_unlock(&log->l_icloglock);
3136 if (xlog_state_release_iclog(log, iclog)) 3047 if (xlog_state_release_iclog(log, iclog))
3137 return XFS_ERROR(EIO); 3048 return XFS_ERROR(EIO);
3138 *log_flushed = 1; 3049 if (log_flushed)
3050 *log_flushed = 1;
3139 spin_lock(&log->l_icloglock); 3051 spin_lock(&log->l_icloglock);
3140 } 3052 }
3141 }
3142 3053
3143 if ((flags & XFS_LOG_SYNC) && /* sleep */ 3054 if ((flags & XFS_LOG_SYNC) && /* sleep */
3144 !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { 3055 !(iclog->ic_state &
3056 (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
3057 /*
3058 * Don't wait on completion if we know that we've
3059 * gotten a log write error.
3060 */
3061 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3062 spin_unlock(&log->l_icloglock);
3063 return XFS_ERROR(EIO);
3064 }
3065 XFS_STATS_INC(xs_log_force_sleep);
3066 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3067 /*
3068 * No need to grab the log lock here since we're
3069 * only deciding whether or not to return EIO
3070 * and the memory read should be atomic.
3071 */
3072 if (iclog->ic_state & XLOG_STATE_IOERROR)
3073 return XFS_ERROR(EIO);
3145 3074
3146 /* 3075 if (log_flushed)
3147 * Don't wait on completion if we know that we've 3076 *log_flushed = 1;
3148 * gotten a log write error. 3077 } else { /* just return */
3149 */
3150 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3151 spin_unlock(&log->l_icloglock); 3078 spin_unlock(&log->l_icloglock);
3152 return XFS_ERROR(EIO);
3153 } 3079 }
3154 XFS_STATS_INC(xs_log_force_sleep);
3155 sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
3156 /*
3157 * No need to grab the log lock here since we're
3158 * only deciding whether or not to return EIO
3159 * and the memory read should be atomic.
3160 */
3161 if (iclog->ic_state & XLOG_STATE_IOERROR)
3162 return XFS_ERROR(EIO);
3163 *log_flushed = 1;
3164 } else { /* just return */
3165 spin_unlock(&log->l_icloglock);
3166 }
3167 return 0;
3168 3080
3169 } while (iclog != log->l_iclog); 3081 return 0;
3082 } while (iclog != log->l_iclog);
3083
3084 spin_unlock(&log->l_icloglock);
3085 return 0;
3086}
3170 3087
3171 spin_unlock(&log->l_icloglock); 3088/*
3172 return 0; 3089 * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
3173} /* xlog_state_sync */ 3090 * about errors or whether the log was flushed or not. This is the normal
3091 * interface to use when trying to unpin items or move the log forward.
3092 */
3093void
3094xfs_log_force_lsn(
3095 xfs_mount_t *mp,
3096 xfs_lsn_t lsn,
3097 uint flags)
3098{
3099 int error;
3174 3100
3101 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3102 if (error) {
3103 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
3104 "error %d returned.", error);
3105 }
3106}
3175 3107
3176/* 3108/*
3177 * Called when we want to mark the current iclog as being ready to sync to 3109 * Called when we want to mark the current iclog as being ready to sync to
@@ -3536,7 +3468,6 @@ xfs_log_force_umount(
3536 xlog_ticket_t *tic; 3468 xlog_ticket_t *tic;
3537 xlog_t *log; 3469 xlog_t *log;
3538 int retval; 3470 int retval;
3539 int dummy;
3540 3471
3541 log = mp->m_log; 3472 log = mp->m_log;
3542 3473
@@ -3610,13 +3541,14 @@ xfs_log_force_umount(
3610 } 3541 }
3611 spin_unlock(&log->l_grant_lock); 3542 spin_unlock(&log->l_grant_lock);
3612 3543
3613 if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { 3544 if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
3614 ASSERT(!logerror); 3545 ASSERT(!logerror);
3615 /* 3546 /*
3616 * Force the incore logs to disk before shutting the 3547 * Force the incore logs to disk before shutting the
3617 * log down completely. 3548 * log down completely.
3618 */ 3549 */
3619 xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); 3550 _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
3551
3620 spin_lock(&log->l_icloglock); 3552 spin_lock(&log->l_icloglock);
3621 retval = xlog_state_ioerror(log); 3553 retval = xlog_state_ioerror(log);
3622 spin_unlock(&log->l_icloglock); 3554 spin_unlock(&log->l_icloglock);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d0c9baa50b1a..97a24c7795a4 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -70,14 +70,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
70 * Flags to xfs_log_force() 70 * Flags to xfs_log_force()
71 * 71 *
72 * XFS_LOG_SYNC: Synchronous force in-core log to disk 72 * XFS_LOG_SYNC: Synchronous force in-core log to disk
73 * XFS_LOG_FORCE: Start in-core log write now.
74 * XFS_LOG_URGE: Start write within some window of time.
75 *
76 * Note: Either XFS_LOG_FORCE or XFS_LOG_URGE must be set.
77 */ 73 */
78#define XFS_LOG_SYNC 0x1 74#define XFS_LOG_SYNC 0x1
79#define XFS_LOG_FORCE 0x2
80#define XFS_LOG_URGE 0x4
81 75
82#endif /* __KERNEL__ */ 76#endif /* __KERNEL__ */
83 77
@@ -110,16 +104,12 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
110#define XLOG_REG_TYPE_TRANSHDR 19 104#define XLOG_REG_TYPE_TRANSHDR 19
111#define XLOG_REG_TYPE_MAX 19 105#define XLOG_REG_TYPE_MAX 19
112 106
113#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
114
115typedef struct xfs_log_iovec { 107typedef struct xfs_log_iovec {
116 xfs_caddr_t i_addr; /* beginning address of region */ 108 xfs_caddr_t i_addr; /* beginning address of region */
117 int i_len; /* length in bytes of region */ 109 int i_len; /* length in bytes of region */
118 uint i_type; /* type of region */ 110 uint i_type; /* type of region */
119} xfs_log_iovec_t; 111} xfs_log_iovec_t;
120 112
121typedef void* xfs_log_ticket_t;
122
123/* 113/*
124 * Structure used to pass callback function and the function's argument 114 * Structure used to pass callback function and the function's argument
125 * to the log manager. 115 * to the log manager.
@@ -134,18 +124,25 @@ typedef struct xfs_log_callback {
134#ifdef __KERNEL__ 124#ifdef __KERNEL__
135/* Log manager interfaces */ 125/* Log manager interfaces */
136struct xfs_mount; 126struct xfs_mount;
127struct xlog_in_core;
137struct xlog_ticket; 128struct xlog_ticket;
129
138xfs_lsn_t xfs_log_done(struct xfs_mount *mp, 130xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
139 xfs_log_ticket_t ticket, 131 struct xlog_ticket *ticket,
140 void **iclog, 132 struct xlog_in_core **iclog,
141 uint flags); 133 uint flags);
142int _xfs_log_force(struct xfs_mount *mp, 134int _xfs_log_force(struct xfs_mount *mp,
143 xfs_lsn_t lsn,
144 uint flags, 135 uint flags,
145 int *log_forced); 136 int *log_forced);
146void xfs_log_force(struct xfs_mount *mp, 137void xfs_log_force(struct xfs_mount *mp,
147 xfs_lsn_t lsn,
148 uint flags); 138 uint flags);
139int _xfs_log_force_lsn(struct xfs_mount *mp,
140 xfs_lsn_t lsn,
141 uint flags,
142 int *log_forced);
143void xfs_log_force_lsn(struct xfs_mount *mp,
144 xfs_lsn_t lsn,
145 uint flags);
149int xfs_log_mount(struct xfs_mount *mp, 146int xfs_log_mount(struct xfs_mount *mp,
150 struct xfs_buftarg *log_target, 147 struct xfs_buftarg *log_target,
151 xfs_daddr_t start_block, 148 xfs_daddr_t start_block,
@@ -154,21 +151,21 @@ int xfs_log_mount_finish(struct xfs_mount *mp);
154void xfs_log_move_tail(struct xfs_mount *mp, 151void xfs_log_move_tail(struct xfs_mount *mp,
155 xfs_lsn_t tail_lsn); 152 xfs_lsn_t tail_lsn);
156int xfs_log_notify(struct xfs_mount *mp, 153int xfs_log_notify(struct xfs_mount *mp,
157 void *iclog, 154 struct xlog_in_core *iclog,
158 xfs_log_callback_t *callback_entry); 155 xfs_log_callback_t *callback_entry);
159int xfs_log_release_iclog(struct xfs_mount *mp, 156int xfs_log_release_iclog(struct xfs_mount *mp,
160 void *iclog_hndl); 157 struct xlog_in_core *iclog);
161int xfs_log_reserve(struct xfs_mount *mp, 158int xfs_log_reserve(struct xfs_mount *mp,
162 int length, 159 int length,
163 int count, 160 int count,
164 xfs_log_ticket_t *ticket, 161 struct xlog_ticket **ticket,
165 __uint8_t clientid, 162 __uint8_t clientid,
166 uint flags, 163 uint flags,
167 uint t_type); 164 uint t_type);
168int xfs_log_write(struct xfs_mount *mp, 165int xfs_log_write(struct xfs_mount *mp,
169 xfs_log_iovec_t region[], 166 xfs_log_iovec_t region[],
170 int nentries, 167 int nentries,
171 xfs_log_ticket_t ticket, 168 struct xlog_ticket *ticket,
172 xfs_lsn_t *start_lsn); 169 xfs_lsn_t *start_lsn);
173int xfs_log_unmount_write(struct xfs_mount *mp); 170int xfs_log_unmount_write(struct xfs_mount *mp);
174void xfs_log_unmount(struct xfs_mount *mp); 171void xfs_log_unmount(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 679c7c4926a2..fd02a18facd5 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -19,7 +19,6 @@
19#define __XFS_LOG_PRIV_H__ 19#define __XFS_LOG_PRIV_H__
20 20
21struct xfs_buf; 21struct xfs_buf;
22struct ktrace;
23struct log; 22struct log;
24struct xlog_ticket; 23struct xlog_ticket;
25struct xfs_buf_cancel; 24struct xfs_buf_cancel;
@@ -135,6 +134,12 @@ static inline uint xlog_get_client_id(__be32 i)
135#define XLOG_TIC_INITED 0x1 /* has been initialized */ 134#define XLOG_TIC_INITED 0x1 /* has been initialized */
136#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ 135#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
137#define XLOG_TIC_IN_Q 0x4 136#define XLOG_TIC_IN_Q 0x4
137
138#define XLOG_TIC_FLAGS \
139 { XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
140 { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \
141 { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" }
142
138#endif /* __KERNEL__ */ 143#endif /* __KERNEL__ */
139 144
140#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */ 145#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */
@@ -361,9 +366,6 @@ typedef struct xlog_in_core {
361 int ic_bwritecnt; 366 int ic_bwritecnt;
362 unsigned short ic_state; 367 unsigned short ic_state;
363 char *ic_datap; /* pointer to iclog data */ 368 char *ic_datap; /* pointer to iclog data */
364#ifdef XFS_LOG_TRACE
365 struct ktrace *ic_trace;
366#endif
367 369
368 /* Callback structures need their own cacheline */ 370 /* Callback structures need their own cacheline */
369 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; 371 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
@@ -429,10 +431,6 @@ typedef struct log {
429 int l_grant_write_cycle; 431 int l_grant_write_cycle;
430 int l_grant_write_bytes; 432 int l_grant_write_bytes;
431 433
432#ifdef XFS_LOG_TRACE
433 struct ktrace *l_grant_trace;
434#endif
435
436 /* The following field are used for debugging; need to hold icloglock */ 434 /* The following field are used for debugging; need to hold icloglock */
437#ifdef DEBUG 435#ifdef DEBUG
438 char *l_iclog_bak[XLOG_MAX_ICLOGS]; 436 char *l_iclog_bak[XLOG_MAX_ICLOGS];
@@ -445,23 +443,12 @@ typedef struct log {
445 443
446/* common routines */ 444/* common routines */
447extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 445extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
448extern int xlog_find_tail(xlog_t *log,
449 xfs_daddr_t *head_blk,
450 xfs_daddr_t *tail_blk);
451extern int xlog_recover(xlog_t *log); 446extern int xlog_recover(xlog_t *log);
452extern int xlog_recover_finish(xlog_t *log); 447extern int xlog_recover_finish(xlog_t *log);
453extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 448extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
454extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
455extern void xlog_put_bp(struct xfs_buf *);
456 449
457extern kmem_zone_t *xfs_log_ticket_zone; 450extern kmem_zone_t *xfs_log_ticket_zone;
458 451
459/* iclog tracing */
460#define XLOG_TRACE_GRAB_FLUSH 1
461#define XLOG_TRACE_REL_FLUSH 2
462#define XLOG_TRACE_SLEEP_FLUSH 3
463#define XLOG_TRACE_WAKE_FLUSH 4
464
465/* 452/*
466 * Unmount record type is used as a pseudo transaction type for the ticket. 453 * Unmount record type is used as a pseudo transaction type for the ticket.
467 * It's value must be outside the range of XFS_TRANS_* values. 454 * It's value must be outside the range of XFS_TRANS_* values.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index fb17f8226b09..22e6efdc17ea 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -46,11 +46,10 @@
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_rw.h" 47#include "xfs_rw.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_trace.h"
49 50
50STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 51STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
51STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); 52STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
52STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
53 xlog_recover_item_t *item);
54#if defined(DEBUG) 53#if defined(DEBUG)
55STATIC void xlog_recover_check_summary(xlog_t *); 54STATIC void xlog_recover_check_summary(xlog_t *);
56#else 55#else
@@ -67,7 +66,7 @@ STATIC void xlog_recover_check_summary(xlog_t *);
67 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) 66 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
68#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) 67#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask)
69 68
70xfs_buf_t * 69STATIC xfs_buf_t *
71xlog_get_bp( 70xlog_get_bp(
72 xlog_t *log, 71 xlog_t *log,
73 int nbblks) 72 int nbblks)
@@ -87,7 +86,7 @@ xlog_get_bp(
87 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); 86 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
88} 87}
89 88
90void 89STATIC void
91xlog_put_bp( 90xlog_put_bp(
92 xfs_buf_t *bp) 91 xfs_buf_t *bp)
93{ 92{
@@ -225,16 +224,10 @@ xlog_header_check_dump(
225 xfs_mount_t *mp, 224 xfs_mount_t *mp,
226 xlog_rec_header_t *head) 225 xlog_rec_header_t *head)
227{ 226{
228 int b; 227 cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n",
229 228 __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
230 cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); 229 cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n",
231 for (b = 0; b < 16; b++) 230 &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
232 cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&mp->m_sb.sb_uuid)[b]);
233 cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT);
234 cmn_err(CE_DEBUG, " log : uuid = ");
235 for (b = 0; b < 16; b++)
236 cmn_err(CE_DEBUG, "%02x", ((__uint8_t *)&head->h_fs_uuid)[b]);
237 cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt));
238} 231}
239#else 232#else
240#define xlog_header_check_dump(mp, head) 233#define xlog_header_check_dump(mp, head)
@@ -810,7 +803,7 @@ xlog_find_head(
810 * We could speed up search by using current head_blk buffer, but it is not 803 * We could speed up search by using current head_blk buffer, but it is not
811 * available. 804 * available.
812 */ 805 */
813int 806STATIC int
814xlog_find_tail( 807xlog_find_tail(
815 xlog_t *log, 808 xlog_t *log,
816 xfs_daddr_t *head_blk, 809 xfs_daddr_t *head_blk,
@@ -1372,36 +1365,45 @@ xlog_clear_stale_blocks(
1372 1365
1373STATIC xlog_recover_t * 1366STATIC xlog_recover_t *
1374xlog_recover_find_tid( 1367xlog_recover_find_tid(
1375 xlog_recover_t *q, 1368 struct hlist_head *head,
1376 xlog_tid_t tid) 1369 xlog_tid_t tid)
1377{ 1370{
1378 xlog_recover_t *p = q; 1371 xlog_recover_t *trans;
1372 struct hlist_node *n;
1379 1373
1380 while (p != NULL) { 1374 hlist_for_each_entry(trans, n, head, r_list) {
1381 if (p->r_log_tid == tid) 1375 if (trans->r_log_tid == tid)
1382 break; 1376 return trans;
1383 p = p->r_next;
1384 } 1377 }
1385 return p; 1378 return NULL;
1386} 1379}
1387 1380
1388STATIC void 1381STATIC void
1389xlog_recover_put_hashq( 1382xlog_recover_new_tid(
1390 xlog_recover_t **q, 1383 struct hlist_head *head,
1391 xlog_recover_t *trans) 1384 xlog_tid_t tid,
1385 xfs_lsn_t lsn)
1392{ 1386{
1393 trans->r_next = *q; 1387 xlog_recover_t *trans;
1394 *q = trans; 1388
1389 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1390 trans->r_log_tid = tid;
1391 trans->r_lsn = lsn;
1392 INIT_LIST_HEAD(&trans->r_itemq);
1393
1394 INIT_HLIST_NODE(&trans->r_list);
1395 hlist_add_head(&trans->r_list, head);
1395} 1396}
1396 1397
1397STATIC void 1398STATIC void
1398xlog_recover_add_item( 1399xlog_recover_add_item(
1399 xlog_recover_item_t **itemq) 1400 struct list_head *head)
1400{ 1401{
1401 xlog_recover_item_t *item; 1402 xlog_recover_item_t *item;
1402 1403
1403 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); 1404 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
1404 xlog_recover_insert_item_backq(itemq, item); 1405 INIT_LIST_HEAD(&item->ri_list);
1406 list_add_tail(&item->ri_list, head);
1405} 1407}
1406 1408
1407STATIC int 1409STATIC int
@@ -1414,8 +1416,7 @@ xlog_recover_add_to_cont_trans(
1414 xfs_caddr_t ptr, old_ptr; 1416 xfs_caddr_t ptr, old_ptr;
1415 int old_len; 1417 int old_len;
1416 1418
1417 item = trans->r_itemq; 1419 if (list_empty(&trans->r_itemq)) {
1418 if (item == NULL) {
1419 /* finish copying rest of trans header */ 1420 /* finish copying rest of trans header */
1420 xlog_recover_add_item(&trans->r_itemq); 1421 xlog_recover_add_item(&trans->r_itemq);
1421 ptr = (xfs_caddr_t) &trans->r_theader + 1422 ptr = (xfs_caddr_t) &trans->r_theader +
@@ -1423,7 +1424,8 @@ xlog_recover_add_to_cont_trans(
1423 memcpy(ptr, dp, len); /* d, s, l */ 1424 memcpy(ptr, dp, len); /* d, s, l */
1424 return 0; 1425 return 0;
1425 } 1426 }
1426 item = item->ri_prev; 1427 /* take the tail entry */
1428 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1427 1429
1428 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1430 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
1429 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1431 old_len = item->ri_buf[item->ri_cnt-1].i_len;
@@ -1460,8 +1462,7 @@ xlog_recover_add_to_trans(
1460 1462
1461 if (!len) 1463 if (!len)
1462 return 0; 1464 return 0;
1463 item = trans->r_itemq; 1465 if (list_empty(&trans->r_itemq)) {
1464 if (item == NULL) {
1465 /* we need to catch log corruptions here */ 1466 /* we need to catch log corruptions here */
1466 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 1467 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1467 xlog_warn("XFS: xlog_recover_add_to_trans: " 1468 xlog_warn("XFS: xlog_recover_add_to_trans: "
@@ -1479,12 +1480,15 @@ xlog_recover_add_to_trans(
1479 memcpy(ptr, dp, len); 1480 memcpy(ptr, dp, len);
1480 in_f = (xfs_inode_log_format_t *)ptr; 1481 in_f = (xfs_inode_log_format_t *)ptr;
1481 1482
1482 if (item->ri_prev->ri_total != 0 && 1483 /* take the tail entry */
1483 item->ri_prev->ri_total == item->ri_prev->ri_cnt) { 1484 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
1485 if (item->ri_total != 0 &&
1486 item->ri_total == item->ri_cnt) {
1487 /* tail item is in use, get a new one */
1484 xlog_recover_add_item(&trans->r_itemq); 1488 xlog_recover_add_item(&trans->r_itemq);
1489 item = list_entry(trans->r_itemq.prev,
1490 xlog_recover_item_t, ri_list);
1485 } 1491 }
1486 item = trans->r_itemq;
1487 item = item->ri_prev;
1488 1492
1489 if (item->ri_total == 0) { /* first region to be added */ 1493 if (item->ri_total == 0) { /* first region to be added */
1490 if (in_f->ilf_size == 0 || 1494 if (in_f->ilf_size == 0 ||
@@ -1509,96 +1513,29 @@ xlog_recover_add_to_trans(
1509 return 0; 1513 return 0;
1510} 1514}
1511 1515
1512STATIC void 1516/*
1513xlog_recover_new_tid( 1517 * Sort the log items in the transaction. Cancelled buffers need
1514 xlog_recover_t **q, 1518 * to be put first so they are processed before any items that might
1515 xlog_tid_t tid, 1519 * modify the buffers. If they are cancelled, then the modifications
1516 xfs_lsn_t lsn) 1520 * don't need to be replayed.
1517{ 1521 */
1518 xlog_recover_t *trans;
1519
1520 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP);
1521 trans->r_log_tid = tid;
1522 trans->r_lsn = lsn;
1523 xlog_recover_put_hashq(q, trans);
1524}
1525
1526STATIC int
1527xlog_recover_unlink_tid(
1528 xlog_recover_t **q,
1529 xlog_recover_t *trans)
1530{
1531 xlog_recover_t *tp;
1532 int found = 0;
1533
1534 ASSERT(trans != NULL);
1535 if (trans == *q) {
1536 *q = (*q)->r_next;
1537 } else {
1538 tp = *q;
1539 while (tp) {
1540 if (tp->r_next == trans) {
1541 found = 1;
1542 break;
1543 }
1544 tp = tp->r_next;
1545 }
1546 if (!found) {
1547 xlog_warn(
1548 "XFS: xlog_recover_unlink_tid: trans not found");
1549 ASSERT(0);
1550 return XFS_ERROR(EIO);
1551 }
1552 tp->r_next = tp->r_next->r_next;
1553 }
1554 return 0;
1555}
1556
1557STATIC void
1558xlog_recover_insert_item_backq(
1559 xlog_recover_item_t **q,
1560 xlog_recover_item_t *item)
1561{
1562 if (*q == NULL) {
1563 item->ri_prev = item->ri_next = item;
1564 *q = item;
1565 } else {
1566 item->ri_next = *q;
1567 item->ri_prev = (*q)->ri_prev;
1568 (*q)->ri_prev = item;
1569 item->ri_prev->ri_next = item;
1570 }
1571}
1572
1573STATIC void
1574xlog_recover_insert_item_frontq(
1575 xlog_recover_item_t **q,
1576 xlog_recover_item_t *item)
1577{
1578 xlog_recover_insert_item_backq(q, item);
1579 *q = item;
1580}
1581
1582STATIC int 1522STATIC int
1583xlog_recover_reorder_trans( 1523xlog_recover_reorder_trans(
1584 xlog_recover_t *trans) 1524 xlog_recover_t *trans)
1585{ 1525{
1586 xlog_recover_item_t *first_item, *itemq, *itemq_next; 1526 xlog_recover_item_t *item, *n;
1587 xfs_buf_log_format_t *buf_f; 1527 LIST_HEAD(sort_list);
1588 ushort flags = 0;
1589 1528
1590 first_item = itemq = trans->r_itemq; 1529 list_splice_init(&trans->r_itemq, &sort_list);
1591 trans->r_itemq = NULL; 1530 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
1592 do { 1531 xfs_buf_log_format_t *buf_f;
1593 itemq_next = itemq->ri_next;
1594 buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
1595 1532
1596 switch (ITEM_TYPE(itemq)) { 1533 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
1534
1535 switch (ITEM_TYPE(item)) {
1597 case XFS_LI_BUF: 1536 case XFS_LI_BUF:
1598 flags = buf_f->blf_flags; 1537 if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
1599 if (!(flags & XFS_BLI_CANCEL)) { 1538 list_move(&item->ri_list, &trans->r_itemq);
1600 xlog_recover_insert_item_frontq(&trans->r_itemq,
1601 itemq);
1602 break; 1539 break;
1603 } 1540 }
1604 case XFS_LI_INODE: 1541 case XFS_LI_INODE:
@@ -1606,7 +1543,7 @@ xlog_recover_reorder_trans(
1606 case XFS_LI_QUOTAOFF: 1543 case XFS_LI_QUOTAOFF:
1607 case XFS_LI_EFD: 1544 case XFS_LI_EFD:
1608 case XFS_LI_EFI: 1545 case XFS_LI_EFI:
1609 xlog_recover_insert_item_backq(&trans->r_itemq, itemq); 1546 list_move_tail(&item->ri_list, &trans->r_itemq);
1610 break; 1547 break;
1611 default: 1548 default:
1612 xlog_warn( 1549 xlog_warn(
@@ -1614,8 +1551,8 @@ xlog_recover_reorder_trans(
1614 ASSERT(0); 1551 ASSERT(0);
1615 return XFS_ERROR(EIO); 1552 return XFS_ERROR(EIO);
1616 } 1553 }
1617 itemq = itemq_next; 1554 }
1618 } while (first_item != itemq); 1555 ASSERT(list_empty(&sort_list));
1619 return 0; 1556 return 0;
1620} 1557}
1621 1558
@@ -2206,6 +2143,7 @@ xlog_recover_do_buffer_trans(
2206 xfs_daddr_t blkno; 2143 xfs_daddr_t blkno;
2207 int len; 2144 int len;
2208 ushort flags; 2145 ushort flags;
2146 uint buf_flags;
2209 2147
2210 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; 2148 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
2211 2149
@@ -2246,12 +2184,11 @@ xlog_recover_do_buffer_trans(
2246 } 2184 }
2247 2185
2248 mp = log->l_mp; 2186 mp = log->l_mp;
2249 if (flags & XFS_BLI_INODE_BUF) { 2187 buf_flags = XBF_LOCK;
2250 bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, 2188 if (!(flags & XFS_BLI_INODE_BUF))
2251 XFS_BUF_LOCK); 2189 buf_flags |= XBF_MAPPED;
2252 } else { 2190
2253 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); 2191 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
2254 }
2255 if (XFS_BUF_ISERROR(bp)) { 2192 if (XFS_BUF_ISERROR(bp)) {
2256 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, 2193 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
2257 bp, blkno); 2194 bp, blkno);
@@ -2350,8 +2287,8 @@ xlog_recover_do_inode_trans(
2350 goto error; 2287 goto error;
2351 } 2288 }
2352 2289
2353 bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, 2290 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
2354 in_f->ilf_len, XFS_BUF_LOCK); 2291 XBF_LOCK);
2355 if (XFS_BUF_ISERROR(bp)) { 2292 if (XFS_BUF_ISERROR(bp)) {
2356 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2293 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
2357 bp, in_f->ilf_blkno); 2294 bp, in_f->ilf_blkno);
@@ -2819,14 +2756,13 @@ xlog_recover_do_trans(
2819 int pass) 2756 int pass)
2820{ 2757{
2821 int error = 0; 2758 int error = 0;
2822 xlog_recover_item_t *item, *first_item; 2759 xlog_recover_item_t *item;
2823 2760
2824 error = xlog_recover_reorder_trans(trans); 2761 error = xlog_recover_reorder_trans(trans);
2825 if (error) 2762 if (error)
2826 return error; 2763 return error;
2827 2764
2828 first_item = item = trans->r_itemq; 2765 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2829 do {
2830 switch (ITEM_TYPE(item)) { 2766 switch (ITEM_TYPE(item)) {
2831 case XFS_LI_BUF: 2767 case XFS_LI_BUF:
2832 error = xlog_recover_do_buffer_trans(log, item, pass); 2768 error = xlog_recover_do_buffer_trans(log, item, pass);
@@ -2859,8 +2795,7 @@ xlog_recover_do_trans(
2859 2795
2860 if (error) 2796 if (error)
2861 return error; 2797 return error;
2862 item = item->ri_next; 2798 }
2863 } while (first_item != item);
2864 2799
2865 return 0; 2800 return 0;
2866} 2801}
@@ -2874,21 +2809,18 @@ STATIC void
2874xlog_recover_free_trans( 2809xlog_recover_free_trans(
2875 xlog_recover_t *trans) 2810 xlog_recover_t *trans)
2876{ 2811{
2877 xlog_recover_item_t *first_item, *item, *free_item; 2812 xlog_recover_item_t *item, *n;
2878 int i; 2813 int i;
2879 2814
2880 item = first_item = trans->r_itemq; 2815 list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
2881 do { 2816 /* Free the regions in the item. */
2882 free_item = item; 2817 list_del(&item->ri_list);
2883 item = item->ri_next; 2818 for (i = 0; i < item->ri_cnt; i++)
2884 /* Free the regions in the item. */ 2819 kmem_free(item->ri_buf[i].i_addr);
2885 for (i = 0; i < free_item->ri_cnt; i++) {
2886 kmem_free(free_item->ri_buf[i].i_addr);
2887 }
2888 /* Free the item itself */ 2820 /* Free the item itself */
2889 kmem_free(free_item->ri_buf); 2821 kmem_free(item->ri_buf);
2890 kmem_free(free_item); 2822 kmem_free(item);
2891 } while (first_item != item); 2823 }
2892 /* Free the transaction recover structure */ 2824 /* Free the transaction recover structure */
2893 kmem_free(trans); 2825 kmem_free(trans);
2894} 2826}
@@ -2896,14 +2828,12 @@ xlog_recover_free_trans(
2896STATIC int 2828STATIC int
2897xlog_recover_commit_trans( 2829xlog_recover_commit_trans(
2898 xlog_t *log, 2830 xlog_t *log,
2899 xlog_recover_t **q,
2900 xlog_recover_t *trans, 2831 xlog_recover_t *trans,
2901 int pass) 2832 int pass)
2902{ 2833{
2903 int error; 2834 int error;
2904 2835
2905 if ((error = xlog_recover_unlink_tid(q, trans))) 2836 hlist_del(&trans->r_list);
2906 return error;
2907 if ((error = xlog_recover_do_trans(log, trans, pass))) 2837 if ((error = xlog_recover_do_trans(log, trans, pass)))
2908 return error; 2838 return error;
2909 xlog_recover_free_trans(trans); /* no error */ 2839 xlog_recover_free_trans(trans); /* no error */
@@ -2931,7 +2861,7 @@ xlog_recover_unmount_trans(
2931STATIC int 2861STATIC int
2932xlog_recover_process_data( 2862xlog_recover_process_data(
2933 xlog_t *log, 2863 xlog_t *log,
2934 xlog_recover_t *rhash[], 2864 struct hlist_head rhash[],
2935 xlog_rec_header_t *rhead, 2865 xlog_rec_header_t *rhead,
2936 xfs_caddr_t dp, 2866 xfs_caddr_t dp,
2937 int pass) 2867 int pass)
@@ -2965,7 +2895,7 @@ xlog_recover_process_data(
2965 } 2895 }
2966 tid = be32_to_cpu(ohead->oh_tid); 2896 tid = be32_to_cpu(ohead->oh_tid);
2967 hash = XLOG_RHASH(tid); 2897 hash = XLOG_RHASH(tid);
2968 trans = xlog_recover_find_tid(rhash[hash], tid); 2898 trans = xlog_recover_find_tid(&rhash[hash], tid);
2969 if (trans == NULL) { /* not found; add new tid */ 2899 if (trans == NULL) { /* not found; add new tid */
2970 if (ohead->oh_flags & XLOG_START_TRANS) 2900 if (ohead->oh_flags & XLOG_START_TRANS)
2971 xlog_recover_new_tid(&rhash[hash], tid, 2901 xlog_recover_new_tid(&rhash[hash], tid,
@@ -2983,7 +2913,7 @@ xlog_recover_process_data(
2983 switch (flags) { 2913 switch (flags) {
2984 case XLOG_COMMIT_TRANS: 2914 case XLOG_COMMIT_TRANS:
2985 error = xlog_recover_commit_trans(log, 2915 error = xlog_recover_commit_trans(log,
2986 &rhash[hash], trans, pass); 2916 trans, pass);
2987 break; 2917 break;
2988 case XLOG_UNMOUNT_TRANS: 2918 case XLOG_UNMOUNT_TRANS:
2989 error = xlog_recover_unmount_trans(trans); 2919 error = xlog_recover_unmount_trans(trans);
@@ -3216,7 +3146,7 @@ xlog_recover_process_one_iunlink(
3216 /* 3146 /*
3217 * Get the on disk inode to find the next inode in the bucket. 3147 * Get the on disk inode to find the next inode in the bucket.
3218 */ 3148 */
3219 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); 3149 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK);
3220 if (error) 3150 if (error)
3221 goto fail_iput; 3151 goto fail_iput;
3222 3152
@@ -3517,12 +3447,12 @@ xlog_do_recovery_pass(
3517{ 3447{
3518 xlog_rec_header_t *rhead; 3448 xlog_rec_header_t *rhead;
3519 xfs_daddr_t blk_no; 3449 xfs_daddr_t blk_no;
3520 xfs_caddr_t bufaddr, offset; 3450 xfs_caddr_t offset;
3521 xfs_buf_t *hbp, *dbp; 3451 xfs_buf_t *hbp, *dbp;
3522 int error = 0, h_size; 3452 int error = 0, h_size;
3523 int bblks, split_bblks; 3453 int bblks, split_bblks;
3524 int hblks, split_hblks, wrapped_hblks; 3454 int hblks, split_hblks, wrapped_hblks;
3525 xlog_recover_t *rhash[XLOG_RHASH_SIZE]; 3455 struct hlist_head rhash[XLOG_RHASH_SIZE];
3526 3456
3527 ASSERT(head_blk != tail_blk); 3457 ASSERT(head_blk != tail_blk);
3528 3458
@@ -3610,7 +3540,7 @@ xlog_do_recovery_pass(
3610 /* 3540 /*
3611 * Check for header wrapping around physical end-of-log 3541 * Check for header wrapping around physical end-of-log
3612 */ 3542 */
3613 offset = NULL; 3543 offset = XFS_BUF_PTR(hbp);
3614 split_hblks = 0; 3544 split_hblks = 0;
3615 wrapped_hblks = 0; 3545 wrapped_hblks = 0;
3616 if (blk_no + hblks <= log->l_logBBsize) { 3546 if (blk_no + hblks <= log->l_logBBsize) {
@@ -3646,9 +3576,8 @@ xlog_do_recovery_pass(
3646 * - order is important. 3576 * - order is important.
3647 */ 3577 */
3648 wrapped_hblks = hblks - split_hblks; 3578 wrapped_hblks = hblks - split_hblks;
3649 bufaddr = XFS_BUF_PTR(hbp);
3650 error = XFS_BUF_SET_PTR(hbp, 3579 error = XFS_BUF_SET_PTR(hbp,
3651 bufaddr + BBTOB(split_hblks), 3580 offset + BBTOB(split_hblks),
3652 BBTOB(hblks - split_hblks)); 3581 BBTOB(hblks - split_hblks));
3653 if (error) 3582 if (error)
3654 goto bread_err2; 3583 goto bread_err2;
@@ -3658,14 +3587,10 @@ xlog_do_recovery_pass(
3658 if (error) 3587 if (error)
3659 goto bread_err2; 3588 goto bread_err2;
3660 3589
3661 error = XFS_BUF_SET_PTR(hbp, bufaddr, 3590 error = XFS_BUF_SET_PTR(hbp, offset,
3662 BBTOB(hblks)); 3591 BBTOB(hblks));
3663 if (error) 3592 if (error)
3664 goto bread_err2; 3593 goto bread_err2;
3665
3666 if (!offset)
3667 offset = xlog_align(log, 0,
3668 wrapped_hblks, hbp);
3669 } 3594 }
3670 rhead = (xlog_rec_header_t *)offset; 3595 rhead = (xlog_rec_header_t *)offset;
3671 error = xlog_valid_rec_header(log, rhead, 3596 error = xlog_valid_rec_header(log, rhead,
@@ -3685,7 +3610,7 @@ xlog_do_recovery_pass(
3685 } else { 3610 } else {
3686 /* This log record is split across the 3611 /* This log record is split across the
3687 * physical end of log */ 3612 * physical end of log */
3688 offset = NULL; 3613 offset = XFS_BUF_PTR(dbp);
3689 split_bblks = 0; 3614 split_bblks = 0;
3690 if (blk_no != log->l_logBBsize) { 3615 if (blk_no != log->l_logBBsize) {
3691 /* some data is before the physical 3616 /* some data is before the physical
@@ -3714,9 +3639,8 @@ xlog_do_recovery_pass(
3714 * _first_, then the log start (LR header end) 3639 * _first_, then the log start (LR header end)
3715 * - order is important. 3640 * - order is important.
3716 */ 3641 */
3717 bufaddr = XFS_BUF_PTR(dbp);
3718 error = XFS_BUF_SET_PTR(dbp, 3642 error = XFS_BUF_SET_PTR(dbp,
3719 bufaddr + BBTOB(split_bblks), 3643 offset + BBTOB(split_bblks),
3720 BBTOB(bblks - split_bblks)); 3644 BBTOB(bblks - split_bblks));
3721 if (error) 3645 if (error)
3722 goto bread_err2; 3646 goto bread_err2;
@@ -3727,13 +3651,9 @@ xlog_do_recovery_pass(
3727 if (error) 3651 if (error)
3728 goto bread_err2; 3652 goto bread_err2;
3729 3653
3730 error = XFS_BUF_SET_PTR(dbp, bufaddr, h_size); 3654 error = XFS_BUF_SET_PTR(dbp, offset, h_size);
3731 if (error) 3655 if (error)
3732 goto bread_err2; 3656 goto bread_err2;
3733
3734 if (!offset)
3735 offset = xlog_align(log, wrapped_hblks,
3736 bblks - split_bblks, dbp);
3737 } 3657 }
3738 xlog_unpack_data(rhead, offset, log); 3658 xlog_unpack_data(rhead, offset, log);
3739 if ((error = xlog_recover_process_data(log, rhash, 3659 if ((error = xlog_recover_process_data(log, rhash,
@@ -3993,8 +3913,7 @@ xlog_recover_finish(
3993 * case the unlink transactions would have problems 3913 * case the unlink transactions would have problems
3994 * pushing the EFIs out of the way. 3914 * pushing the EFIs out of the way.
3995 */ 3915 */
3996 xfs_log_force(log->l_mp, (xfs_lsn_t)0, 3916 xfs_log_force(log->l_mp, XFS_LOG_SYNC);
3997 (XFS_LOG_FORCE | XFS_LOG_SYNC));
3998 3917
3999 xlog_recover_process_iunlinks(log); 3918 xlog_recover_process_iunlinks(log);
4000 3919
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h
index b22545555301..75d749207258 100644
--- a/fs/xfs/xfs_log_recover.h
+++ b/fs/xfs/xfs_log_recover.h
@@ -35,22 +35,21 @@
35 * item headers are in ri_buf[0]. Additional buffers follow. 35 * item headers are in ri_buf[0]. Additional buffers follow.
36 */ 36 */
37typedef struct xlog_recover_item { 37typedef struct xlog_recover_item {
38 struct xlog_recover_item *ri_next; 38 struct list_head ri_list;
39 struct xlog_recover_item *ri_prev; 39 int ri_type;
40 int ri_type; 40 int ri_cnt; /* count of regions found */
41 int ri_cnt; /* count of regions found */ 41 int ri_total; /* total regions */
42 int ri_total; /* total regions */ 42 xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
43 xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
44} xlog_recover_item_t; 43} xlog_recover_item_t;
45 44
46struct xlog_tid; 45struct xlog_tid;
47typedef struct xlog_recover { 46typedef struct xlog_recover {
48 struct xlog_recover *r_next; 47 struct hlist_node r_list;
49 xlog_tid_t r_log_tid; /* log's transaction id */ 48 xlog_tid_t r_log_tid; /* log's transaction id */
50 xfs_trans_header_t r_theader; /* trans header for partial */ 49 xfs_trans_header_t r_theader; /* trans header for partial */
51 int r_state; /* not needed */ 50 int r_state; /* not needed */
52 xfs_lsn_t r_lsn; /* xact lsn */ 51 xfs_lsn_t r_lsn; /* xact lsn */
53 xlog_recover_item_t *r_itemq; /* q for items */ 52 struct list_head r_itemq; /* q for items */
54} xlog_recover_t; 53} xlog_recover_t;
55 54
56#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr) 55#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 8b6c9e807efb..e79b56b4bca6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -44,6 +44,8 @@
44#include "xfs_quota.h" 44#include "xfs_quota.h"
45#include "xfs_fsops.h" 45#include "xfs_fsops.h"
46#include "xfs_utils.h" 46#include "xfs_utils.h"
47#include "xfs_trace.h"
48
47 49
48STATIC void xfs_unmountfs_wait(xfs_mount_t *); 50STATIC void xfs_unmountfs_wait(xfs_mount_t *);
49 51
@@ -199,6 +201,38 @@ xfs_uuid_unmount(
199 201
200 202
201/* 203/*
204 * Reference counting access wrappers to the perag structures.
205 */
206struct xfs_perag *
207xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
208{
209 struct xfs_perag *pag;
210 int ref = 0;
211
212 spin_lock(&mp->m_perag_lock);
213 pag = radix_tree_lookup(&mp->m_perag_tree, agno);
214 if (pag) {
215 ASSERT(atomic_read(&pag->pag_ref) >= 0);
216 /* catch leaks in the positive direction during testing */
217 ASSERT(atomic_read(&pag->pag_ref) < 1000);
218 ref = atomic_inc_return(&pag->pag_ref);
219 }
220 spin_unlock(&mp->m_perag_lock);
221 trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
222 return pag;
223}
224
225void
226xfs_perag_put(struct xfs_perag *pag)
227{
228 int ref;
229
230 ASSERT(atomic_read(&pag->pag_ref) > 0);
231 ref = atomic_dec_return(&pag->pag_ref);
232 trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
233}
234
235/*
202 * Free up the resources associated with a mount structure. Assume that 236 * Free up the resources associated with a mount structure. Assume that
203 * the structure was initially zeroed, so we can tell which fields got 237 * the structure was initially zeroed, so we can tell which fields got
204 * initialized. 238 * initialized.
@@ -207,13 +241,16 @@ STATIC void
207xfs_free_perag( 241xfs_free_perag(
208 xfs_mount_t *mp) 242 xfs_mount_t *mp)
209{ 243{
210 if (mp->m_perag) { 244 xfs_agnumber_t agno;
211 int agno; 245 struct xfs_perag *pag;
212 246
213 for (agno = 0; agno < mp->m_maxagi; agno++) 247 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
214 if (mp->m_perag[agno].pagb_list) 248 spin_lock(&mp->m_perag_lock);
215 kmem_free(mp->m_perag[agno].pagb_list); 249 pag = radix_tree_delete(&mp->m_perag_tree, agno);
216 kmem_free(mp->m_perag); 250 ASSERT(pag);
251 ASSERT(atomic_read(&pag->pag_ref) == 0);
252 spin_unlock(&mp->m_perag_lock);
253 kmem_free(pag);
217 } 254 }
218} 255}
219 256
@@ -387,22 +424,57 @@ xfs_initialize_perag_icache(
387 } 424 }
388} 425}
389 426
390xfs_agnumber_t 427int
391xfs_initialize_perag( 428xfs_initialize_perag(
392 xfs_mount_t *mp, 429 xfs_mount_t *mp,
393 xfs_agnumber_t agcount) 430 xfs_agnumber_t agcount,
431 xfs_agnumber_t *maxagi)
394{ 432{
395 xfs_agnumber_t index, max_metadata; 433 xfs_agnumber_t index, max_metadata;
434 xfs_agnumber_t first_initialised = 0;
396 xfs_perag_t *pag; 435 xfs_perag_t *pag;
397 xfs_agino_t agino; 436 xfs_agino_t agino;
398 xfs_ino_t ino; 437 xfs_ino_t ino;
399 xfs_sb_t *sbp = &mp->m_sb; 438 xfs_sb_t *sbp = &mp->m_sb;
400 xfs_ino_t max_inum = XFS_MAXINUMBER_32; 439 xfs_ino_t max_inum = XFS_MAXINUMBER_32;
440 int error = -ENOMEM;
401 441
402 /* Check to see if the filesystem can overflow 32 bit inodes */ 442 /* Check to see if the filesystem can overflow 32 bit inodes */
403 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 443 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
404 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 444 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
405 445
446 /*
447 * Walk the current per-ag tree so we don't try to initialise AGs
448 * that already exist (growfs case). Allocate and insert all the
449 * AGs we don't find ready for initialisation.
450 */
451 for (index = 0; index < agcount; index++) {
452 pag = xfs_perag_get(mp, index);
453 if (pag) {
454 xfs_perag_put(pag);
455 continue;
456 }
457 if (!first_initialised)
458 first_initialised = index;
459 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
460 if (!pag)
461 goto out_unwind;
462 if (radix_tree_preload(GFP_NOFS))
463 goto out_unwind;
464 spin_lock(&mp->m_perag_lock);
465 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
466 BUG();
467 spin_unlock(&mp->m_perag_lock);
468 radix_tree_preload_end();
469 error = -EEXIST;
470 goto out_unwind;
471 }
472 pag->pag_agno = index;
473 pag->pag_mount = mp;
474 spin_unlock(&mp->m_perag_lock);
475 radix_tree_preload_end();
476 }
477
406 /* Clear the mount flag if no inode can overflow 32 bits 478 /* Clear the mount flag if no inode can overflow 32 bits
407 * on this filesystem, or if specifically requested.. 479 * on this filesystem, or if specifically requested..
408 */ 480 */
@@ -436,21 +508,33 @@ xfs_initialize_perag(
436 } 508 }
437 509
438 /* This ag is preferred for inodes */ 510 /* This ag is preferred for inodes */
439 pag = &mp->m_perag[index]; 511 pag = xfs_perag_get(mp, index);
440 pag->pagi_inodeok = 1; 512 pag->pagi_inodeok = 1;
441 if (index < max_metadata) 513 if (index < max_metadata)
442 pag->pagf_metadata = 1; 514 pag->pagf_metadata = 1;
443 xfs_initialize_perag_icache(pag); 515 xfs_initialize_perag_icache(pag);
516 xfs_perag_put(pag);
444 } 517 }
445 } else { 518 } else {
446 /* Setup default behavior for smaller filesystems */ 519 /* Setup default behavior for smaller filesystems */
447 for (index = 0; index < agcount; index++) { 520 for (index = 0; index < agcount; index++) {
448 pag = &mp->m_perag[index]; 521 pag = xfs_perag_get(mp, index);
449 pag->pagi_inodeok = 1; 522 pag->pagi_inodeok = 1;
450 xfs_initialize_perag_icache(pag); 523 xfs_initialize_perag_icache(pag);
524 xfs_perag_put(pag);
451 } 525 }
452 } 526 }
453 return index; 527 if (maxagi)
528 *maxagi = index;
529 return 0;
530
531out_unwind:
532 kmem_free(pag);
533 for (; index > first_initialised; index--) {
534 pag = radix_tree_delete(&mp->m_perag_tree, index);
535 kmem_free(pag);
536 }
537 return error;
454} 538}
455 539
456void 540void
@@ -581,10 +665,10 @@ xfs_readsb(xfs_mount_t *mp, int flags)
581 * access to the superblock. 665 * access to the superblock.
582 */ 666 */
583 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 667 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
584 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 668 extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED;
585 669
586 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 670 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
587 BTOBB(sector_size), extra_flags); 671 extra_flags);
588 if (!bp || XFS_BUF_ISERROR(bp)) { 672 if (!bp || XFS_BUF_ISERROR(bp)) {
589 xfs_fs_mount_cmn_err(flags, "SB read failed"); 673 xfs_fs_mount_cmn_err(flags, "SB read failed");
590 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 674 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@ -624,8 +708,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
624 XFS_BUF_UNMANAGE(bp); 708 XFS_BUF_UNMANAGE(bp);
625 xfs_buf_relse(bp); 709 xfs_buf_relse(bp);
626 sector_size = mp->m_sb.sb_sectsize; 710 sector_size = mp->m_sb.sb_sectsize;
627 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 711 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
628 BTOBB(sector_size), extra_flags); 712 BTOBB(sector_size), extra_flags);
629 if (!bp || XFS_BUF_ISERROR(bp)) { 713 if (!bp || XFS_BUF_ISERROR(bp)) {
630 xfs_fs_mount_cmn_err(flags, "SB re-read failed"); 714 xfs_fs_mount_cmn_err(flags, "SB re-read failed");
631 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 715 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@ -729,12 +813,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
729 error = xfs_ialloc_pagi_init(mp, NULL, index); 813 error = xfs_ialloc_pagi_init(mp, NULL, index);
730 if (error) 814 if (error)
731 return error; 815 return error;
732 pag = &mp->m_perag[index]; 816 pag = xfs_perag_get(mp, index);
733 ifree += pag->pagi_freecount; 817 ifree += pag->pagi_freecount;
734 ialloc += pag->pagi_count; 818 ialloc += pag->pagi_count;
735 bfree += pag->pagf_freeblks; 819 bfree += pag->pagf_freeblks;
736 bfreelst += pag->pagf_flcount; 820 bfreelst += pag->pagf_flcount;
737 btree += pag->pagf_btreeblks; 821 btree += pag->pagf_btreeblks;
822 xfs_perag_put(pag);
738 } 823 }
739 /* 824 /*
740 * Overwrite incore superblock counters with just-read data 825 * Overwrite incore superblock counters with just-read data
@@ -1006,6 +1091,24 @@ xfs_mount_reset_sbqflags(
1006 return xfs_trans_commit(tp, 0); 1091 return xfs_trans_commit(tp, 0);
1007} 1092}
1008 1093
1094__uint64_t
1095xfs_default_resblks(xfs_mount_t *mp)
1096{
1097 __uint64_t resblks;
1098
1099 /*
1100 * We default to 5% or 8192 fsbs of space reserved, whichever is
1101 * smaller. This is intended to cover concurrent allocation
1102 * transactions when we initially hit enospc. These each require a 4
1103 * block reservation. Hence by default we cover roughly 2000 concurrent
1104 * allocation reservations.
1105 */
1106 resblks = mp->m_sb.sb_dblocks;
1107 do_div(resblks, 20);
1108 resblks = min_t(__uint64_t, resblks, 8192);
1109 return resblks;
1110}
1111
1009/* 1112/*
1010 * This function does the following on an initial mount of a file system: 1113 * This function does the following on an initial mount of a file system:
1011 * - reads the superblock from disk and init the mount struct 1114 * - reads the superblock from disk and init the mount struct
@@ -1150,13 +1253,13 @@ xfs_mountfs(
1150 /* 1253 /*
1151 * Allocate and initialize the per-ag data. 1254 * Allocate and initialize the per-ag data.
1152 */ 1255 */
1153 init_rwsem(&mp->m_peraglock); 1256 spin_lock_init(&mp->m_perag_lock);
1154 mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), 1257 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
1155 KM_MAYFAIL); 1258 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1156 if (!mp->m_perag) 1259 if (error) {
1260 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
1157 goto out_remove_uuid; 1261 goto out_remove_uuid;
1158 1262 }
1159 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
1160 1263
1161 if (!sbp->sb_logblocks) { 1264 if (!sbp->sb_logblocks) {
1162 cmn_err(CE_WARN, "XFS: no log defined"); 1265 cmn_err(CE_WARN, "XFS: no log defined");
@@ -1317,17 +1420,16 @@ xfs_mountfs(
1317 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations 1420 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
1318 * are not allowed to use this reserved space. 1421 * are not allowed to use this reserved space.
1319 * 1422 *
1320 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1321 * This may drive us straight to ENOSPC on mount, but that implies 1423 * This may drive us straight to ENOSPC on mount, but that implies
1322 * we were already there on the last unmount. Warn if this occurs. 1424 * we were already there on the last unmount. Warn if this occurs.
1323 */ 1425 */
1324 resblks = mp->m_sb.sb_dblocks; 1426 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1325 do_div(resblks, 20); 1427 resblks = xfs_default_resblks(mp);
1326 resblks = min_t(__uint64_t, resblks, 1024); 1428 error = xfs_reserve_blocks(mp, &resblks, NULL);
1327 error = xfs_reserve_blocks(mp, &resblks, NULL); 1429 if (error)
1328 if (error) 1430 cmn_err(CE_WARN, "XFS: Unable to allocate reserve "
1329 cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " 1431 "blocks. Continuing without a reserve pool.");
1330 "Continuing without a reserve pool."); 1432 }
1331 1433
1332 return 0; 1434 return 0;
1333 1435
@@ -1370,8 +1472,19 @@ xfs_unmountfs(
1370 * push out the iclog we will never get that unlocked. hence we 1472 * push out the iclog we will never get that unlocked. hence we
1371 * need to force the log first. 1473 * need to force the log first.
1372 */ 1474 */
1373 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1475 xfs_log_force(mp, XFS_LOG_SYNC);
1374 xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); 1476
1477 /*
1478 * Do a delwri reclaim pass first so that as many dirty inodes are
1479 * queued up for IO as possible. Then flush the buffers before making
1480 * a synchronous path to catch all the remaining inodes are reclaimed.
1481 * This makes the reclaim process as quick as possible by avoiding
1482 * synchronous writeout and blocking on inodes already in the delwri
1483 * state as much as possible.
1484 */
1485 xfs_reclaim_inodes(mp, 0);
1486 XFS_bflush(mp->m_ddev_targp);
1487 xfs_reclaim_inodes(mp, SYNC_WAIT);
1375 1488
1376 xfs_qm_unmount(mp); 1489 xfs_qm_unmount(mp);
1377 1490
@@ -1380,7 +1493,7 @@ xfs_unmountfs(
1380 * that nothing is pinned. This is important because bflush() 1493 * that nothing is pinned. This is important because bflush()
1381 * will skip pinned buffers. 1494 * will skip pinned buffers.
1382 */ 1495 */
1383 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1496 xfs_log_force(mp, XFS_LOG_SYNC);
1384 1497
1385 xfs_binval(mp->m_ddev_targp); 1498 xfs_binval(mp->m_ddev_targp);
1386 if (mp->m_rtdev_targp) { 1499 if (mp->m_rtdev_targp) {
@@ -1471,7 +1584,7 @@ xfs_log_sbcount(
1471 if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) 1584 if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1472 return 0; 1585 return 0;
1473 1586
1474 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT); 1587 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
1475 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1588 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1476 XFS_DEFAULT_LOG_COUNT); 1589 XFS_DEFAULT_LOG_COUNT);
1477 if (error) { 1590 if (error) {
@@ -1546,15 +1659,14 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1546 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); 1659 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
1547 1660
1548 /* find modified range */ 1661 /* find modified range */
1662 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1663 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1664 last = xfs_sb_info[f + 1].offset - 1;
1549 1665
1550 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1666 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
1551 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1667 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1552 first = xfs_sb_info[f].offset; 1668 first = xfs_sb_info[f].offset;
1553 1669
1554 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1555 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1556 last = xfs_sb_info[f + 1].offset - 1;
1557
1558 xfs_trans_log_buf(tp, bp, first, last); 1670 xfs_trans_log_buf(tp, bp, first, last);
1559} 1671}
1560 1672
@@ -1618,26 +1730,30 @@ xfs_mod_incore_sb_unlocked(
1618 lcounter += rem; 1730 lcounter += rem;
1619 } 1731 }
1620 } else { /* Taking blocks away */ 1732 } else { /* Taking blocks away */
1621
1622 lcounter += delta; 1733 lcounter += delta;
1734 if (lcounter >= 0) {
1735 mp->m_sb.sb_fdblocks = lcounter +
1736 XFS_ALLOC_SET_ASIDE(mp);
1737 return 0;
1738 }
1623 1739
1624 /* 1740 /*
1625 * If were out of blocks, use any available reserved blocks if 1741 * We are out of blocks, use any available reserved
1626 * were allowed to. 1742 * blocks if were allowed to.
1627 */ 1743 */
1744 if (!rsvd)
1745 return XFS_ERROR(ENOSPC);
1628 1746
1629 if (lcounter < 0) { 1747 lcounter = (long long)mp->m_resblks_avail + delta;
1630 if (rsvd) { 1748 if (lcounter >= 0) {
1631 lcounter = (long long)mp->m_resblks_avail + delta; 1749 mp->m_resblks_avail = lcounter;
1632 if (lcounter < 0) { 1750 return 0;
1633 return XFS_ERROR(ENOSPC);
1634 }
1635 mp->m_resblks_avail = lcounter;
1636 return 0;
1637 } else { /* not reserved */
1638 return XFS_ERROR(ENOSPC);
1639 }
1640 } 1751 }
1752 printk_once(KERN_WARNING
1753 "Filesystem \"%s\": reserve blocks depleted! "
1754 "Consider increasing reserve pool size.",
1755 mp->m_fsname);
1756 return XFS_ERROR(ENOSPC);
1641 } 1757 }
1642 1758
1643 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 1759 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
@@ -1885,7 +2001,7 @@ xfs_getsb(
1885 2001
1886 ASSERT(mp->m_sb_bp != NULL); 2002 ASSERT(mp->m_sb_bp != NULL);
1887 bp = mp->m_sb_bp; 2003 bp = mp->m_sb_bp;
1888 if (flags & XFS_BUF_TRYLOCK) { 2004 if (flags & XBF_TRYLOCK) {
1889 if (!XFS_BUF_CPSEMA(bp)) { 2005 if (!XFS_BUF_CPSEMA(bp)) {
1890 return NULL; 2006 return NULL;
1891 } 2007 }
@@ -1945,6 +2061,26 @@ xfs_mount_log_sb(
1945 return error; 2061 return error;
1946} 2062}
1947 2063
2064/*
2065 * If the underlying (data/log/rt) device is readonly, there are some
2066 * operations that cannot proceed.
2067 */
2068int
2069xfs_dev_is_read_only(
2070 struct xfs_mount *mp,
2071 char *message)
2072{
2073 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
2074 xfs_readonly_buftarg(mp->m_logdev_targp) ||
2075 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
2076 cmn_err(CE_NOTE,
2077 "XFS: %s required on read-only device.", message);
2078 cmn_err(CE_NOTE,
2079 "XFS: write access unavailable, cannot proceed.");
2080 return EROFS;
2081 }
2082 return 0;
2083}
1948 2084
1949#ifdef HAVE_PERCPU_SB 2085#ifdef HAVE_PERCPU_SB
1950/* 2086/*
@@ -2123,7 +2259,7 @@ xfs_icsb_destroy_counters(
2123 mutex_destroy(&mp->m_icsb_mutex); 2259 mutex_destroy(&mp->m_icsb_mutex);
2124} 2260}
2125 2261
2126STATIC_INLINE void 2262STATIC void
2127xfs_icsb_lock_cntr( 2263xfs_icsb_lock_cntr(
2128 xfs_icsb_cnts_t *icsbp) 2264 xfs_icsb_cnts_t *icsbp)
2129{ 2265{
@@ -2132,7 +2268,7 @@ xfs_icsb_lock_cntr(
2132 } 2268 }
2133} 2269}
2134 2270
2135STATIC_INLINE void 2271STATIC void
2136xfs_icsb_unlock_cntr( 2272xfs_icsb_unlock_cntr(
2137 xfs_icsb_cnts_t *icsbp) 2273 xfs_icsb_cnts_t *icsbp)
2138{ 2274{
@@ -2140,7 +2276,7 @@ xfs_icsb_unlock_cntr(
2140} 2276}
2141 2277
2142 2278
2143STATIC_INLINE void 2279STATIC void
2144xfs_icsb_lock_all_counters( 2280xfs_icsb_lock_all_counters(
2145 xfs_mount_t *mp) 2281 xfs_mount_t *mp)
2146{ 2282{
@@ -2153,7 +2289,7 @@ xfs_icsb_lock_all_counters(
2153 } 2289 }
2154} 2290}
2155 2291
2156STATIC_INLINE void 2292STATIC void
2157xfs_icsb_unlock_all_counters( 2293xfs_icsb_unlock_all_counters(
2158 xfs_mount_t *mp) 2294 xfs_mount_t *mp)
2159{ 2295{
@@ -2389,12 +2525,12 @@ xfs_icsb_modify_counters(
2389{ 2525{
2390 xfs_icsb_cnts_t *icsbp; 2526 xfs_icsb_cnts_t *icsbp;
2391 long long lcounter; /* long counter for 64 bit fields */ 2527 long long lcounter; /* long counter for 64 bit fields */
2392 int cpu, ret = 0; 2528 int ret = 0;
2393 2529
2394 might_sleep(); 2530 might_sleep();
2395again: 2531again:
2396 cpu = get_cpu(); 2532 preempt_disable();
2397 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu); 2533 icsbp = this_cpu_ptr(mp->m_sb_cnts);
2398 2534
2399 /* 2535 /*
2400 * if the counter is disabled, go to slow path 2536 * if the counter is disabled, go to slow path
@@ -2438,11 +2574,11 @@ again:
2438 break; 2574 break;
2439 } 2575 }
2440 xfs_icsb_unlock_cntr(icsbp); 2576 xfs_icsb_unlock_cntr(icsbp);
2441 put_cpu(); 2577 preempt_enable();
2442 return 0; 2578 return 0;
2443 2579
2444slow_path: 2580slow_path:
2445 put_cpu(); 2581 preempt_enable();
2446 2582
2447 /* 2583 /*
2448 * serialise with a mutex so we don't burn lots of cpu on 2584 * serialise with a mutex so we don't burn lots of cpu on
@@ -2490,7 +2626,7 @@ slow_path:
2490 2626
2491balance_counter: 2627balance_counter:
2492 xfs_icsb_unlock_cntr(icsbp); 2628 xfs_icsb_unlock_cntr(icsbp);
2493 put_cpu(); 2629 preempt_enable();
2494 2630
2495 /* 2631 /*
2496 * We may have multiple threads here if multiple per-cpu 2632 * We may have multiple threads here if multiple per-cpu
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a6c023bc0fb2..9ff48a16a7ee 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -78,7 +78,8 @@ typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
78typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, 78typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
79 struct xfs_inode *, dm_right_t, 79 struct xfs_inode *, dm_right_t,
80 struct xfs_inode *, dm_right_t, 80 struct xfs_inode *, dm_right_t,
81 const char *, const char *, mode_t, int, int); 81 const unsigned char *, const unsigned char *,
82 mode_t, int, int);
82typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, 83typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
83 char *, char *); 84 char *, char *);
84typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *, 85typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
@@ -93,6 +94,9 @@ typedef struct xfs_dmops {
93 xfs_send_unmount_t xfs_send_unmount; 94 xfs_send_unmount_t xfs_send_unmount;
94} xfs_dmops_t; 95} xfs_dmops_t;
95 96
97#define XFS_DMAPI_UNMOUNT_FLAGS(mp) \
98 (((mp)->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? 0 : DM_FLAGS_UNWANTED)
99
96#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \ 100#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
97 (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock) 101 (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
98#define XFS_SEND_MMAP(mp, vma,fl) \ 102#define XFS_SEND_MMAP(mp, vma,fl) \
@@ -101,12 +105,24 @@ typedef struct xfs_dmops {
101 (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right) 105 (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
102#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ 106#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
103 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) 107 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
104#define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
105 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl)
106#define XFS_SEND_MOUNT(mp,right,path,name) \ 108#define XFS_SEND_MOUNT(mp,right,path,name) \
107 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) 109 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
108#define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \ 110#define XFS_SEND_PREUNMOUNT(mp) \
109 (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) 111do { \
112 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
113 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT, mp, \
114 (mp)->m_rootip, DM_RIGHT_NULL, \
115 (mp)->m_rootip, DM_RIGHT_NULL, \
116 NULL, NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
117 } \
118} while (0)
119#define XFS_SEND_UNMOUNT(mp) \
120do { \
121 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
122 (*(mp)->m_dm_ops->xfs_send_unmount)(mp, (mp)->m_rootip, \
123 DM_RIGHT_NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
124 } \
125} while (0)
110 126
111 127
112#ifdef HAVE_PERCPU_SB 128#ifdef HAVE_PERCPU_SB
@@ -192,8 +208,8 @@ typedef struct xfs_mount {
192 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 208 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
193 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 209 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
194 uint m_in_maxlevels; /* max inobt btree levels. */ 210 uint m_in_maxlevels; /* max inobt btree levels. */
195 struct xfs_perag *m_perag; /* per-ag accounting info */ 211 struct radix_tree_root m_perag_tree; /* per-ag accounting info */
196 struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ 212 spinlock_t m_perag_lock; /* lock for m_perag_tree */
197 struct mutex m_growlock; /* growfs mutex */ 213 struct mutex m_growlock; /* growfs mutex */
198 int m_fixedfsid[2]; /* unchanged for life of FS */ 214 int m_fixedfsid[2]; /* unchanged for life of FS */
199 uint m_dmevmask; /* DMI events for this FS */ 215 uint m_dmevmask; /* DMI events for this FS */
@@ -209,6 +225,7 @@ typedef struct xfs_mount {
209 __uint64_t m_maxioffset; /* maximum inode offset */ 225 __uint64_t m_maxioffset; /* maximum inode offset */
210 __uint64_t m_resblks; /* total reserved blocks */ 226 __uint64_t m_resblks; /* total reserved blocks */
211 __uint64_t m_resblks_avail;/* available reserved blocks */ 227 __uint64_t m_resblks_avail;/* available reserved blocks */
228 __uint64_t m_resblks_save; /* reserved blks @ remount,ro */
212 int m_dalign; /* stripe unit */ 229 int m_dalign; /* stripe unit */
213 int m_swidth; /* stripe width */ 230 int m_swidth; /* stripe width */
214 int m_sinoalign; /* stripe unit inode alignment */ 231 int m_sinoalign; /* stripe unit inode alignment */
@@ -228,7 +245,7 @@ typedef struct xfs_mount {
228 struct xfs_qmops *m_qm_ops; /* vector of XQM ops */ 245 struct xfs_qmops *m_qm_ops; /* vector of XQM ops */
229 atomic_t m_active_trans; /* number trans frozen */ 246 atomic_t m_active_trans; /* number trans frozen */
230#ifdef HAVE_PERCPU_SB 247#ifdef HAVE_PERCPU_SB
231 xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */ 248 xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */
232 unsigned long m_icsb_counters; /* disabled per-cpu counters */ 249 unsigned long m_icsb_counters; /* disabled per-cpu counters */
233 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ 250 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
234 struct mutex m_icsb_mutex; /* balancer sync lock */ 251 struct mutex m_icsb_mutex; /* balancer sync lock */
@@ -242,6 +259,7 @@ typedef struct xfs_mount {
242 wait_queue_head_t m_wait_single_sync_task; 259 wait_queue_head_t m_wait_single_sync_task;
243 __int64_t m_update_flags; /* sb flags we need to update 260 __int64_t m_update_flags; /* sb flags we need to update
244 on the next remount,rw */ 261 on the next remount,rw */
262 struct list_head m_mplist; /* inode shrinker mount list */
245} xfs_mount_t; 263} xfs_mount_t;
246 264
247/* 265/*
@@ -369,31 +387,22 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
369} 387}
370 388
371/* 389/*
372 * perag get/put wrappers for eventual ref counting 390 * perag get/put wrappers for ref counting
373 */ 391 */
374static inline xfs_perag_t * 392struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
375xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino) 393void xfs_perag_put(struct xfs_perag *pag);
376{
377 return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)];
378}
379
380static inline void
381xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
382{
383 /* nothing to see here, move along */
384}
385 394
386/* 395/*
387 * Per-cpu superblock locking functions 396 * Per-cpu superblock locking functions
388 */ 397 */
389#ifdef HAVE_PERCPU_SB 398#ifdef HAVE_PERCPU_SB
390STATIC_INLINE void 399static inline void
391xfs_icsb_lock(xfs_mount_t *mp) 400xfs_icsb_lock(xfs_mount_t *mp)
392{ 401{
393 mutex_lock(&mp->m_icsb_mutex); 402 mutex_lock(&mp->m_icsb_mutex);
394} 403}
395 404
396STATIC_INLINE void 405static inline void
397xfs_icsb_unlock(xfs_mount_t *mp) 406xfs_icsb_unlock(xfs_mount_t *mp)
398{ 407{
399 mutex_unlock(&mp->m_icsb_mutex); 408 mutex_unlock(&mp->m_icsb_mutex);
@@ -413,6 +422,7 @@ typedef struct xfs_mod_sb {
413} xfs_mod_sb_t; 422} xfs_mod_sb_t;
414 423
415extern int xfs_log_sbcount(xfs_mount_t *, uint); 424extern int xfs_log_sbcount(xfs_mount_t *, uint);
425extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
416extern int xfs_mountfs(xfs_mount_t *mp); 426extern int xfs_mountfs(xfs_mount_t *mp);
417 427
418extern void xfs_unmountfs(xfs_mount_t *); 428extern void xfs_unmountfs(xfs_mount_t *);
@@ -427,6 +437,8 @@ extern void xfs_freesb(xfs_mount_t *);
427extern int xfs_fs_writable(xfs_mount_t *); 437extern int xfs_fs_writable(xfs_mount_t *);
428extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); 438extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
429 439
440extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
441
430extern int xfs_dmops_get(struct xfs_mount *); 442extern int xfs_dmops_get(struct xfs_mount *);
431extern void xfs_dmops_put(struct xfs_mount *); 443extern void xfs_dmops_put(struct xfs_mount *);
432 444
@@ -435,7 +447,8 @@ extern struct xfs_dmops xfs_dmcore_xfs;
435#endif /* __KERNEL__ */ 447#endif /* __KERNEL__ */
436 448
437extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 449extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
438extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t); 450extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
451 xfs_agnumber_t *);
439extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); 452extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
440extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); 453extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
441 454
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 4b0613d99faa..45ce15dc5b2b 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -398,7 +398,7 @@ exit:
398 * guaranteed that all the free functions for all the elements have finished 398 * guaranteed that all the free functions for all the elements have finished
399 * executing and the reaper is not running. 399 * executing and the reaper is not running.
400 */ 400 */
401void 401static void
402xfs_mru_cache_flush( 402xfs_mru_cache_flush(
403 xfs_mru_cache_t *mru) 403 xfs_mru_cache_t *mru)
404{ 404{
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 5d439f34b0c9..36dd3ec8b4eb 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -42,7 +42,6 @@ void xfs_mru_cache_uninit(void);
42int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, 42int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
43 unsigned int grp_count, 43 unsigned int grp_count,
44 xfs_mru_cache_free_func_t free_func); 44 xfs_mru_cache_free_func_t free_func);
45void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
46void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 45void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
47int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 46int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
48 void *value); 47 void *value);
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 3ec91ac74c2a..fdcab3f81dde 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -92,6 +92,14 @@ typedef struct xfs_dqblk {
92 92
93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
94 94
95#define XFS_DQ_FLAGS \
96 { XFS_DQ_USER, "USER" }, \
97 { XFS_DQ_PROJ, "PROJ" }, \
98 { XFS_DQ_GROUP, "GROUP" }, \
99 { XFS_DQ_DIRTY, "DIRTY" }, \
100 { XFS_DQ_WANT, "WANT" }, \
101 { XFS_DQ_INACTIVE, "INACTIVE" }
102
95/* 103/*
96 * In the worst case, when both user and group quotas are on, 104 * In the worst case, when both user and group quotas are on,
97 * we can have a max of three dquots changing in a single transaction. 105 * we can have a max of three dquots changing in a single transaction.
@@ -215,16 +223,9 @@ typedef struct xfs_qoff_logformat {
215#define XFS_QMOPT_RES_INOS 0x0800000 223#define XFS_QMOPT_RES_INOS 0x0800000
216 224
217/* 225/*
218 * flags for dqflush and dqflush_all.
219 */
220#define XFS_QMOPT_SYNC 0x1000000
221#define XFS_QMOPT_ASYNC 0x2000000
222#define XFS_QMOPT_DELWRI 0x4000000
223
224/*
225 * flags for dqalloc. 226 * flags for dqalloc.
226 */ 227 */
227#define XFS_QMOPT_INHERIT 0x8000000 228#define XFS_QMOPT_INHERIT 0x1000000
228 229
229/* 230/*
230 * flags to xfs_trans_mod_dquot. 231 * flags to xfs_trans_mod_dquot.
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index b81deea0ce19..fc1cda23b817 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -39,6 +39,7 @@
39#include "xfs_utils.h" 39#include "xfs_utils.h"
40#include "xfs_trans_space.h" 40#include "xfs_trans_space.h"
41#include "xfs_vnodeops.h" 41#include "xfs_vnodeops.h"
42#include "xfs_trace.h"
42 43
43 44
44/* 45/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 385f6dceba5d..6be05f756d59 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -45,6 +45,7 @@
45#include "xfs_inode_item.h" 45#include "xfs_inode_item.h"
46#include "xfs_trans_space.h" 46#include "xfs_trans_space.h"
47#include "xfs_utils.h" 47#include "xfs_utils.h"
48#include "xfs_trace.h"
48 49
49 50
50/* 51/*
@@ -1516,6 +1517,8 @@ xfs_rtfree_range(
1516 */ 1517 */
1517 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, 1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
1518 &postblock); 1519 &postblock);
1520 if (error)
1521 return error;
1519 /* 1522 /*
1520 * If there are blocks not being freed at the front of the 1523 * If there are blocks not being freed at the front of the
1521 * old extent, add summary data for them to be allocated. 1524 * old extent, add summary data for them to be allocated.
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 3f816ad7ff19..e336742a58a4 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -44,48 +44,7 @@
44#include "xfs_error.h" 44#include "xfs_error.h"
45#include "xfs_buf_item.h" 45#include "xfs_buf_item.h"
46#include "xfs_rw.h" 46#include "xfs_rw.h"
47 47#include "xfs_trace.h"
48/*
49 * This is a subroutine for xfs_write() and other writers (xfs_ioctl)
50 * which clears the setuid and setgid bits when a file is written.
51 */
52int
53xfs_write_clear_setuid(
54 xfs_inode_t *ip)
55{
56 xfs_mount_t *mp;
57 xfs_trans_t *tp;
58 int error;
59
60 mp = ip->i_mount;
61 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
62 if ((error = xfs_trans_reserve(tp, 0,
63 XFS_WRITEID_LOG_RES(mp),
64 0, 0, 0))) {
65 xfs_trans_cancel(tp, 0);
66 return error;
67 }
68 xfs_ilock(ip, XFS_ILOCK_EXCL);
69 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
70 xfs_trans_ihold(tp, ip);
71 ip->i_d.di_mode &= ~S_ISUID;
72
73 /*
74 * Note that we don't have to worry about mandatory
75 * file locking being disabled here because we only
76 * clear the S_ISGID bit if the Group execute bit is
77 * on, but if it was on then mandatory locking wouldn't
78 * have been enabled.
79 */
80 if (ip->i_d.di_mode & S_IXGRP) {
81 ip->i_d.di_mode &= ~S_ISGID;
82 }
83 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
84 xfs_trans_set_sync(tp);
85 error = xfs_trans_commit(tp, 0);
86 xfs_iunlock(ip, XFS_ILOCK_EXCL);
87 return 0;
88}
89 48
90/* 49/*
91 * Force a shutdown of the filesystem instantly while keeping 50 * Force a shutdown of the filesystem instantly while keeping
@@ -152,90 +111,6 @@ xfs_do_force_shutdown(
152 } 111 }
153} 112}
154 113
155
156/*
157 * Called when we want to stop a buffer from getting written or read.
158 * We attach the EIO error, muck with its flags, and call biodone
159 * so that the proper iodone callbacks get called.
160 */
161int
162xfs_bioerror(
163 xfs_buf_t *bp)
164{
165
166#ifdef XFSERRORDEBUG
167 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
168#endif
169
170 /*
171 * No need to wait until the buffer is unpinned.
172 * We aren't flushing it.
173 */
174 xfs_buftrace("XFS IOERROR", bp);
175 XFS_BUF_ERROR(bp, EIO);
176 /*
177 * We're calling biodone, so delete B_DONE flag. Either way
178 * we have to call the iodone callback, and calling biodone
179 * probably is the best way since it takes care of
180 * GRIO as well.
181 */
182 XFS_BUF_UNREAD(bp);
183 XFS_BUF_UNDELAYWRITE(bp);
184 XFS_BUF_UNDONE(bp);
185 XFS_BUF_STALE(bp);
186
187 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
188 xfs_biodone(bp);
189
190 return (EIO);
191}
192
193/*
194 * Same as xfs_bioerror, except that we are releasing the buffer
195 * here ourselves, and avoiding the biodone call.
196 * This is meant for userdata errors; metadata bufs come with
197 * iodone functions attached, so that we can track down errors.
198 */
199int
200xfs_bioerror_relse(
201 xfs_buf_t *bp)
202{
203 int64_t fl;
204
205 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks);
206 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
207
208 xfs_buftrace("XFS IOERRELSE", bp);
209 fl = XFS_BUF_BFLAGS(bp);
210 /*
211 * No need to wait until the buffer is unpinned.
212 * We aren't flushing it.
213 *
214 * chunkhold expects B_DONE to be set, whether
215 * we actually finish the I/O or not. We don't want to
216 * change that interface.
217 */
218 XFS_BUF_UNREAD(bp);
219 XFS_BUF_UNDELAYWRITE(bp);
220 XFS_BUF_DONE(bp);
221 XFS_BUF_STALE(bp);
222 XFS_BUF_CLR_IODONE_FUNC(bp);
223 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
224 if (!(fl & XFS_B_ASYNC)) {
225 /*
226 * Mark b_error and B_ERROR _both_.
227 * Lot's of chunkcache code assumes that.
228 * There's no reason to mark error for
229 * ASYNC buffers.
230 */
231 XFS_BUF_ERROR(bp, EIO);
232 XFS_BUF_FINISH_IOWAIT(bp);
233 } else {
234 xfs_buf_relse(bp);
235 }
236 return (EIO);
237}
238
239/* 114/*
240 * Prints out an ALERT message about I/O error. 115 * Prints out an ALERT message about I/O error.
241 */ 116 */
@@ -277,10 +152,10 @@ xfs_read_buf(
277 xfs_buf_t *bp; 152 xfs_buf_t *bp;
278 int error; 153 int error;
279 154
280 if (flags) 155 if (!flags)
281 bp = xfs_buf_read_flags(target, blkno, len, flags); 156 flags = XBF_LOCK | XBF_MAPPED;
282 else 157
283 bp = xfs_buf_read(target, blkno, len, flags); 158 bp = xfs_buf_read(target, blkno, len, flags);
284 if (!bp) 159 if (!bp)
285 return XFS_ERROR(EIO); 160 return XFS_ERROR(EIO);
286 error = XFS_BUF_GETERROR(bp); 161 error = XFS_BUF_GETERROR(bp);
@@ -307,32 +182,23 @@ xfs_read_buf(
307} 182}
308 183
309/* 184/*
310 * Wrapper around bwrite() so that we can trap 185 * helper function to extract extent size hint from inode
311 * write errors, and act accordingly.
312 */ 186 */
313int 187xfs_extlen_t
314xfs_bwrite( 188xfs_get_extsz_hint(
315 struct xfs_mount *mp, 189 struct xfs_inode *ip)
316 struct xfs_buf *bp)
317{ 190{
318 int error; 191 xfs_extlen_t extsz;
319 192
320 /* 193 if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
321 * XXXsup how does this work for quotas. 194 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
322 */ 195 ? ip->i_d.di_extsize
323 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); 196 : ip->i_mount->m_sb.sb_rextsize;
324 bp->b_mount = mp; 197 ASSERT(extsz);
325 XFS_BUF_WRITE(bp); 198 } else {
326 199 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
327 if ((error = XFS_bwrite(bp))) { 200 ? ip->i_d.di_extsize : 0;
328 ASSERT(mp);
329 /*
330 * Cannot put a buftrace here since if the buffer is not
331 * B_HOLD then we will brelse() the buffer before returning
332 * from bwrite and we could be tracing a buffer that has
333 * been reused.
334 */
335 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
336 } 201 }
337 return (error); 202
203 return extsz;
338} 204}
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index f5e4874c37d8..11c41ec6ed75 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -37,44 +37,13 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
37} 37}
38 38
39/* 39/*
40 * Flags for xfs_free_eofblocks
41 */
42#define XFS_FREE_EOF_LOCK (1<<0)
43#define XFS_FREE_EOF_NOLOCK (1<<1)
44
45
46/*
47 * helper function to extract extent size hint from inode
48 */
49STATIC_INLINE xfs_extlen_t
50xfs_get_extsz_hint(
51 xfs_inode_t *ip)
52{
53 xfs_extlen_t extsz;
54
55 if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
56 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
57 ? ip->i_d.di_extsize
58 : ip->i_mount->m_sb.sb_rextsize;
59 ASSERT(extsz);
60 } else {
61 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
62 ? ip->i_d.di_extsize : 0;
63 }
64 return extsz;
65}
66
67/*
68 * Prototypes for functions in xfs_rw.c. 40 * Prototypes for functions in xfs_rw.c.
69 */ 41 */
70extern int xfs_write_clear_setuid(struct xfs_inode *ip);
71extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
72extern int xfs_bioerror(struct xfs_buf *bp);
73extern int xfs_bioerror_relse(struct xfs_buf *bp);
74extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, 42extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
75 xfs_daddr_t blkno, int len, uint flags, 43 xfs_daddr_t blkno, int len, uint flags,
76 struct xfs_buf **bpp); 44 struct xfs_buf **bpp);
77extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, 45extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
78 xfs_buf_t *bp, xfs_daddr_t blkno); 46 xfs_buf_t *bp, xfs_daddr_t blkno);
47extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
79 48
80#endif /* __XFS_RW_H__ */ 49#endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 66b849358e62..f73e358bae8d 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -236,19 +236,20 @@ xfs_trans_alloc(
236 uint type) 236 uint type)
237{ 237{
238 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); 238 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
239 return _xfs_trans_alloc(mp, type); 239 return _xfs_trans_alloc(mp, type, KM_SLEEP);
240} 240}
241 241
242xfs_trans_t * 242xfs_trans_t *
243_xfs_trans_alloc( 243_xfs_trans_alloc(
244 xfs_mount_t *mp, 244 xfs_mount_t *mp,
245 uint type) 245 uint type,
246 uint memflags)
246{ 247{
247 xfs_trans_t *tp; 248 xfs_trans_t *tp;
248 249
249 atomic_inc(&mp->m_active_trans); 250 atomic_inc(&mp->m_active_trans);
250 251
251 tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); 252 tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
252 tp->t_magic = XFS_TRANS_MAGIC; 253 tp->t_magic = XFS_TRANS_MAGIC;
253 tp->t_type = type; 254 tp->t_type = type;
254 tp->t_mountp = mp; 255 tp->t_mountp = mp;
@@ -795,7 +796,7 @@ _xfs_trans_commit(
795 int sync; 796 int sync;
796#define XFS_TRANS_LOGVEC_COUNT 16 797#define XFS_TRANS_LOGVEC_COUNT 16
797 xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT]; 798 xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
798 void *commit_iclog; 799 struct xlog_in_core *commit_iclog;
799 int shutdown; 800 int shutdown;
800 801
801 commit_lsn = -1; 802 commit_lsn = -1;
@@ -980,9 +981,8 @@ shut_us_down:
980 */ 981 */
981 if (sync) { 982 if (sync) {
982 if (!error) { 983 if (!error) {
983 error = _xfs_log_force(mp, commit_lsn, 984 error = _xfs_log_force_lsn(mp, commit_lsn,
984 XFS_LOG_FORCE | XFS_LOG_SYNC, 985 XFS_LOG_SYNC, log_flushed);
985 log_flushed);
986 } 986 }
987 XFS_STATS_INC(xs_trans_sync); 987 XFS_STATS_INC(xs_trans_sync);
988 } else { 988 } else {
@@ -1120,7 +1120,7 @@ xfs_trans_fill_vecs(
1120 tp->t_header.th_num_items = nitems; 1120 tp->t_header.th_num_items = nitems;
1121 log_vector->i_addr = (xfs_caddr_t)&tp->t_header; 1121 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
1122 log_vector->i_len = sizeof(xfs_trans_header_t); 1122 log_vector->i_len = sizeof(xfs_trans_header_t);
1123 XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR); 1123 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
1124} 1124}
1125 1125
1126 1126
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ed47fc77759c..79c8bab9dfff 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -100,6 +100,49 @@ typedef struct xfs_trans_header {
100#define XFS_TRANS_TYPE_MAX 41 100#define XFS_TRANS_TYPE_MAX 41
101/* new transaction types need to be reflected in xfs_logprint(8) */ 101/* new transaction types need to be reflected in xfs_logprint(8) */
102 102
103#define XFS_TRANS_TYPES \
104 { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \
105 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
106 { XFS_TRANS_INACTIVE, "INACTIVE" }, \
107 { XFS_TRANS_CREATE, "CREATE" }, \
108 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
109 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
110 { XFS_TRANS_REMOVE, "REMOVE" }, \
111 { XFS_TRANS_LINK, "LINK" }, \
112 { XFS_TRANS_RENAME, "RENAME" }, \
113 { XFS_TRANS_MKDIR, "MKDIR" }, \
114 { XFS_TRANS_RMDIR, "RMDIR" }, \
115 { XFS_TRANS_SYMLINK, "SYMLINK" }, \
116 { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \
117 { XFS_TRANS_GROWFS, "GROWFS" }, \
118 { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \
119 { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \
120 { XFS_TRANS_WRITEID, "WRITEID" }, \
121 { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \
122 { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \
123 { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \
124 { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \
125 { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
126 { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
127 { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
128 { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \
129 { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
130 { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
131 { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
132 { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
133 { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
134 { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
135 { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \
136 { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
137 { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
138 { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
139 { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
140 { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
141 { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
142 { XFS_TRANS_DUMMY1, "DUMMY1" }, \
143 { XFS_TRANS_DUMMY2, "DUMMY2" }, \
144 { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
145
103/* 146/*
104 * This structure is used to track log items associated with 147 * This structure is used to track log items associated with
105 * a transaction. It points to the log item and keeps some 148 * a transaction. It points to the log item and keeps some
@@ -782,6 +825,10 @@ typedef struct xfs_log_item {
782#define XFS_LI_IN_AIL 0x1 825#define XFS_LI_IN_AIL 0x1
783#define XFS_LI_ABORTED 0x2 826#define XFS_LI_ABORTED 0x2
784 827
828#define XFS_LI_FLAGS \
829 { XFS_LI_IN_AIL, "IN_AIL" }, \
830 { XFS_LI_ABORTED, "ABORTED" }
831
785typedef struct xfs_item_ops { 832typedef struct xfs_item_ops {
786 uint (*iop_size)(xfs_log_item_t *); 833 uint (*iop_size)(xfs_log_item_t *);
787 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); 834 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
@@ -814,8 +861,7 @@ typedef struct xfs_item_ops {
814#define XFS_ITEM_SUCCESS 0 861#define XFS_ITEM_SUCCESS 0
815#define XFS_ITEM_PINNED 1 862#define XFS_ITEM_PINNED 1
816#define XFS_ITEM_LOCKED 2 863#define XFS_ITEM_LOCKED 2
817#define XFS_ITEM_FLUSHING 3 864#define XFS_ITEM_PUSHBUF 3
818#define XFS_ITEM_PUSHBUF 4
819 865
820/* 866/*
821 * This structure is used to maintain a list of block ranges that have been 867 * This structure is used to maintain a list of block ranges that have been
@@ -864,7 +910,7 @@ typedef struct xfs_trans {
864 unsigned int t_blk_res_used; /* # of resvd blocks used */ 910 unsigned int t_blk_res_used; /* # of resvd blocks used */
865 unsigned int t_rtx_res; /* # of rt extents resvd */ 911 unsigned int t_rtx_res; /* # of rt extents resvd */
866 unsigned int t_rtx_res_used; /* # of resvd rt extents used */ 912 unsigned int t_rtx_res_used; /* # of resvd rt extents used */
867 xfs_log_ticket_t t_ticket; /* log mgr ticket */ 913 struct xlog_ticket *t_ticket; /* log mgr ticket */
868 xfs_lsn_t t_lsn; /* log seq num of start of 914 xfs_lsn_t t_lsn; /* log seq num of start of
869 * transaction. */ 915 * transaction. */
870 xfs_lsn_t t_commit_lsn; /* log seq num of end of 916 xfs_lsn_t t_commit_lsn; /* log seq num of end of
@@ -924,7 +970,7 @@ typedef struct xfs_trans {
924 * XFS transaction mechanism exported interfaces. 970 * XFS transaction mechanism exported interfaces.
925 */ 971 */
926xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); 972xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
927xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); 973xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint);
928xfs_trans_t *xfs_trans_dup(xfs_trans_t *); 974xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
929int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, 975int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
930 uint, uint); 976 uint, uint);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 2ffc570679be..e799824f7245 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -237,14 +237,15 @@ out:
237} 237}
238 238
239/* 239/*
240 * Function that does the work of pushing on the AIL 240 * xfsaild_push does the work of pushing on the AIL. Returning a timeout of
241 * zero indicates that the caller should sleep until woken.
241 */ 242 */
242long 243long
243xfsaild_push( 244xfsaild_push(
244 struct xfs_ail *ailp, 245 struct xfs_ail *ailp,
245 xfs_lsn_t *last_lsn) 246 xfs_lsn_t *last_lsn)
246{ 247{
247 long tout = 1000; /* milliseconds */ 248 long tout = 0;
248 xfs_lsn_t last_pushed_lsn = *last_lsn; 249 xfs_lsn_t last_pushed_lsn = *last_lsn;
249 xfs_lsn_t target = ailp->xa_target; 250 xfs_lsn_t target = ailp->xa_target;
250 xfs_lsn_t lsn; 251 xfs_lsn_t lsn;
@@ -252,6 +253,7 @@ xfsaild_push(
252 int flush_log, count, stuck; 253 int flush_log, count, stuck;
253 xfs_mount_t *mp = ailp->xa_mount; 254 xfs_mount_t *mp = ailp->xa_mount;
254 struct xfs_ail_cursor *cur = &ailp->xa_cursors; 255 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
256 int push_xfsbufd = 0;
255 257
256 spin_lock(&ailp->xa_lock); 258 spin_lock(&ailp->xa_lock);
257 xfs_trans_ail_cursor_init(ailp, cur); 259 xfs_trans_ail_cursor_init(ailp, cur);
@@ -262,7 +264,7 @@ xfsaild_push(
262 */ 264 */
263 xfs_trans_ail_cursor_done(ailp, cur); 265 xfs_trans_ail_cursor_done(ailp, cur);
264 spin_unlock(&ailp->xa_lock); 266 spin_unlock(&ailp->xa_lock);
265 last_pushed_lsn = 0; 267 *last_lsn = 0;
266 return tout; 268 return tout;
267 } 269 }
268 270
@@ -279,7 +281,6 @@ xfsaild_push(
279 * prevents use from spinning when we can't do anything or there is 281 * prevents use from spinning when we can't do anything or there is
280 * lots of contention on the AIL lists. 282 * lots of contention on the AIL lists.
281 */ 283 */
282 tout = 10;
283 lsn = lip->li_lsn; 284 lsn = lip->li_lsn;
284 flush_log = stuck = count = 0; 285 flush_log = stuck = count = 0;
285 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { 286 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
@@ -308,6 +309,7 @@ xfsaild_push(
308 XFS_STATS_INC(xs_push_ail_pushbuf); 309 XFS_STATS_INC(xs_push_ail_pushbuf);
309 IOP_PUSHBUF(lip); 310 IOP_PUSHBUF(lip);
310 last_pushed_lsn = lsn; 311 last_pushed_lsn = lsn;
312 push_xfsbufd = 1;
311 break; 313 break;
312 314
313 case XFS_ITEM_PINNED: 315 case XFS_ITEM_PINNED:
@@ -322,12 +324,6 @@ xfsaild_push(
322 stuck++; 324 stuck++;
323 break; 325 break;
324 326
325 case XFS_ITEM_FLUSHING:
326 XFS_STATS_INC(xs_push_ail_flushing);
327 last_pushed_lsn = lsn;
328 stuck++;
329 break;
330
331 default: 327 default:
332 ASSERT(0); 328 ASSERT(0);
333 break; 329 break;
@@ -371,19 +367,24 @@ xfsaild_push(
371 * move forward in the AIL. 367 * move forward in the AIL.
372 */ 368 */
373 XFS_STATS_INC(xs_push_ail_flush); 369 XFS_STATS_INC(xs_push_ail_flush);
374 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 370 xfs_log_force(mp, 0);
371 }
372
373 if (push_xfsbufd) {
374 /* we've got delayed write buffers to flush */
375 wake_up_process(mp->m_ddev_targp->bt_task);
375 } 376 }
376 377
377 if (!count) { 378 if (!count) {
378 /* We're past our target or empty, so idle */ 379 /* We're past our target or empty, so idle */
379 tout = 1000; 380 last_pushed_lsn = 0;
380 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 381 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
381 /* 382 /*
382 * We reached the target so wait a bit longer for I/O to 383 * We reached the target so wait a bit longer for I/O to
383 * complete and remove pushed items from the AIL before we 384 * complete and remove pushed items from the AIL before we
384 * start the next scan from the start of the AIL. 385 * start the next scan from the start of the AIL.
385 */ 386 */
386 tout += 20; 387 tout = 50;
387 last_pushed_lsn = 0; 388 last_pushed_lsn = 0;
388 } else if ((stuck * 100) / count > 90) { 389 } else if ((stuck * 100) / count > 90) {
389 /* 390 /*
@@ -395,11 +396,14 @@ xfsaild_push(
395 * Backoff a bit more to allow some I/O to complete before 396 * Backoff a bit more to allow some I/O to complete before
396 * continuing from where we were. 397 * continuing from where we were.
397 */ 398 */
398 tout += 10; 399 tout = 20;
400 } else {
401 /* more to do, but wait a short while before continuing */
402 tout = 10;
399 } 403 }
400 *last_lsn = last_pushed_lsn; 404 *last_lsn = last_pushed_lsn;
401 return tout; 405 return tout;
402} /* xfsaild_push */ 406}
403 407
404 408
405/* 409/*
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 218829e6a152..fb586360d1c9 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -38,6 +38,7 @@
38#include "xfs_trans_priv.h" 38#include "xfs_trans_priv.h"
39#include "xfs_error.h" 39#include "xfs_error.h"
40#include "xfs_rw.h" 40#include "xfs_rw.h"
41#include "xfs_trace.h"
41 42
42 43
43STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, 44STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
@@ -45,6 +46,65 @@ STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
45STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *, 46STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *,
46 xfs_daddr_t, int); 47 xfs_daddr_t, int);
47 48
49/*
50 * Add the locked buffer to the transaction.
51 *
52 * The buffer must be locked, and it cannot be associated with any
53 * transaction.
54 *
55 * If the buffer does not yet have a buf log item associated with it,
56 * then allocate one for it. Then add the buf item to the transaction.
57 */
58STATIC void
59_xfs_trans_bjoin(
60 struct xfs_trans *tp,
61 struct xfs_buf *bp,
62 int reset_recur)
63{
64 struct xfs_buf_log_item *bip;
65
66 ASSERT(XFS_BUF_ISBUSY(bp));
67 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
68
69 /*
70 * The xfs_buf_log_item pointer is stored in b_fsprivate. If
71 * it doesn't have one yet, then allocate one and initialize it.
72 * The checks to see if one is there are in xfs_buf_item_init().
73 */
74 xfs_buf_item_init(bp, tp->t_mountp);
75 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
76 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
77 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
78 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
79 if (reset_recur)
80 bip->bli_recur = 0;
81
82 /*
83 * Take a reference for this transaction on the buf item.
84 */
85 atomic_inc(&bip->bli_refcount);
86
87 /*
88 * Get a log_item_desc to point at the new item.
89 */
90 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
91
92 /*
93 * Initialize b_fsprivate2 so we can find it with incore_match()
94 * in xfs_trans_get_buf() and friends above.
95 */
96 XFS_BUF_SET_FSPRIVATE2(bp, tp);
97
98}
99
100void
101xfs_trans_bjoin(
102 struct xfs_trans *tp,
103 struct xfs_buf *bp)
104{
105 _xfs_trans_bjoin(tp, bp, 0);
106 trace_xfs_trans_bjoin(bp->b_fspriv);
107}
48 108
49/* 109/*
50 * Get and lock the buffer for the caller if it is not already 110 * Get and lock the buffer for the caller if it is not already
@@ -74,16 +134,14 @@ xfs_trans_get_buf(xfs_trans_t *tp,
74 xfs_buf_log_item_t *bip; 134 xfs_buf_log_item_t *bip;
75 135
76 if (flags == 0) 136 if (flags == 0)
77 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 137 flags = XBF_LOCK | XBF_MAPPED;
78 138
79 /* 139 /*
80 * Default to a normal get_buf() call if the tp is NULL. 140 * Default to a normal get_buf() call if the tp is NULL.
81 */ 141 */
82 if (tp == NULL) { 142 if (tp == NULL)
83 bp = xfs_buf_get_flags(target_dev, blkno, len, 143 return xfs_buf_get(target_dev, blkno, len,
84 flags | BUF_BUSY); 144 flags | XBF_DONT_BLOCK);
85 return(bp);
86 }
87 145
88 /* 146 /*
89 * If we find the buffer in the cache with this transaction 147 * If we find the buffer in the cache with this transaction
@@ -98,79 +156,43 @@ xfs_trans_get_buf(xfs_trans_t *tp,
98 } 156 }
99 if (bp != NULL) { 157 if (bp != NULL) {
100 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 158 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
101 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { 159 if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
102 xfs_buftrace("TRANS GET RECUR SHUT", bp);
103 XFS_BUF_SUPER_STALE(bp); 160 XFS_BUF_SUPER_STALE(bp);
104 } 161
105 /* 162 /*
106 * If the buffer is stale then it was binval'ed 163 * If the buffer is stale then it was binval'ed
107 * since last read. This doesn't matter since the 164 * since last read. This doesn't matter since the
108 * caller isn't allowed to use the data anyway. 165 * caller isn't allowed to use the data anyway.
109 */ 166 */
110 else if (XFS_BUF_ISSTALE(bp)) { 167 else if (XFS_BUF_ISSTALE(bp))
111 xfs_buftrace("TRANS GET RECUR STALE", bp);
112 ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); 168 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
113 } 169
114 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 170 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
115 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 171 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
116 ASSERT(bip != NULL); 172 ASSERT(bip != NULL);
117 ASSERT(atomic_read(&bip->bli_refcount) > 0); 173 ASSERT(atomic_read(&bip->bli_refcount) > 0);
118 bip->bli_recur++; 174 bip->bli_recur++;
119 xfs_buftrace("TRANS GET RECUR", bp); 175 trace_xfs_trans_get_buf_recur(bip);
120 xfs_buf_item_trace("GET RECUR", bip);
121 return (bp); 176 return (bp);
122 } 177 }
123 178
124 /* 179 /*
125 * We always specify the BUF_BUSY flag within a transaction so 180 * We always specify the XBF_DONT_BLOCK flag within a transaction
126 * that get_buf does not try to push out a delayed write buffer 181 * so that get_buf does not try to push out a delayed write buffer
127 * which might cause another transaction to take place (if the 182 * which might cause another transaction to take place (if the
128 * buffer was delayed alloc). Such recursive transactions can 183 * buffer was delayed alloc). Such recursive transactions can
129 * easily deadlock with our current transaction as well as cause 184 * easily deadlock with our current transaction as well as cause
130 * us to run out of stack space. 185 * us to run out of stack space.
131 */ 186 */
132 bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY); 187 bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK);
133 if (bp == NULL) { 188 if (bp == NULL) {
134 return NULL; 189 return NULL;
135 } 190 }
136 191
137 ASSERT(!XFS_BUF_GETERROR(bp)); 192 ASSERT(!XFS_BUF_GETERROR(bp));
138 193
139 /* 194 _xfs_trans_bjoin(tp, bp, 1);
140 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 195 trace_xfs_trans_get_buf(bp->b_fspriv);
141 * it doesn't have one yet, then allocate one and initialize it.
142 * The checks to see if one is there are in xfs_buf_item_init().
143 */
144 xfs_buf_item_init(bp, tp->t_mountp);
145
146 /*
147 * Set the recursion count for the buffer within this transaction
148 * to 0.
149 */
150 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
151 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
152 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
153 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
154 bip->bli_recur = 0;
155
156 /*
157 * Take a reference for this transaction on the buf item.
158 */
159 atomic_inc(&bip->bli_refcount);
160
161 /*
162 * Get a log_item_desc to point at the new item.
163 */
164 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
165
166 /*
167 * Initialize b_fsprivate2 so we can find it with incore_match()
168 * above.
169 */
170 XFS_BUF_SET_FSPRIVATE2(bp, tp);
171
172 xfs_buftrace("TRANS GET", bp);
173 xfs_buf_item_trace("GET", bip);
174 return (bp); 196 return (bp);
175} 197}
176 198
@@ -210,49 +232,16 @@ xfs_trans_getsb(xfs_trans_t *tp,
210 ASSERT(bip != NULL); 232 ASSERT(bip != NULL);
211 ASSERT(atomic_read(&bip->bli_refcount) > 0); 233 ASSERT(atomic_read(&bip->bli_refcount) > 0);
212 bip->bli_recur++; 234 bip->bli_recur++;
213 xfs_buf_item_trace("GETSB RECUR", bip); 235 trace_xfs_trans_getsb_recur(bip);
214 return (bp); 236 return (bp);
215 } 237 }
216 238
217 bp = xfs_getsb(mp, flags); 239 bp = xfs_getsb(mp, flags);
218 if (bp == NULL) { 240 if (bp == NULL)
219 return NULL; 241 return NULL;
220 }
221
222 /*
223 * The xfs_buf_log_item pointer is stored in b_fsprivate. If
224 * it doesn't have one yet, then allocate one and initialize it.
225 * The checks to see if one is there are in xfs_buf_item_init().
226 */
227 xfs_buf_item_init(bp, mp);
228
229 /*
230 * Set the recursion count for the buffer within this transaction
231 * to 0.
232 */
233 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
234 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
235 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
236 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
237 bip->bli_recur = 0;
238 242
239 /* 243 _xfs_trans_bjoin(tp, bp, 1);
240 * Take a reference for this transaction on the buf item. 244 trace_xfs_trans_getsb(bp->b_fspriv);
241 */
242 atomic_inc(&bip->bli_refcount);
243
244 /*
245 * Get a log_item_desc to point at the new item.
246 */
247 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
248
249 /*
250 * Initialize b_fsprivate2 so we can find it with incore_match()
251 * above.
252 */
253 XFS_BUF_SET_FSPRIVATE2(bp, tp);
254
255 xfs_buf_item_trace("GETSB", bip);
256 return (bp); 245 return (bp);
257} 246}
258 247
@@ -296,15 +285,15 @@ xfs_trans_read_buf(
296 int error; 285 int error;
297 286
298 if (flags == 0) 287 if (flags == 0)
299 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 288 flags = XBF_LOCK | XBF_MAPPED;
300 289
301 /* 290 /*
302 * Default to a normal get_buf() call if the tp is NULL. 291 * Default to a normal get_buf() call if the tp is NULL.
303 */ 292 */
304 if (tp == NULL) { 293 if (tp == NULL) {
305 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 294 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
306 if (!bp) 295 if (!bp)
307 return (flags & XFS_BUF_TRYLOCK) ? 296 return (flags & XBF_TRYLOCK) ?
308 EAGAIN : XFS_ERROR(ENOMEM); 297 EAGAIN : XFS_ERROR(ENOMEM);
309 298
310 if (XFS_BUF_GETERROR(bp) != 0) { 299 if (XFS_BUF_GETERROR(bp) != 0) {
@@ -350,7 +339,7 @@ xfs_trans_read_buf(
350 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 339 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
351 ASSERT((XFS_BUF_ISERROR(bp)) == 0); 340 ASSERT((XFS_BUF_ISERROR(bp)) == 0);
352 if (!(XFS_BUF_ISDONE(bp))) { 341 if (!(XFS_BUF_ISDONE(bp))) {
353 xfs_buftrace("READ_BUF_INCORE !DONE", bp); 342 trace_xfs_trans_read_buf_io(bp, _RET_IP_);
354 ASSERT(!XFS_BUF_ISASYNC(bp)); 343 ASSERT(!XFS_BUF_ISASYNC(bp));
355 XFS_BUF_READ(bp); 344 XFS_BUF_READ(bp);
356 xfsbdstrat(tp->t_mountp, bp); 345 xfsbdstrat(tp->t_mountp, bp);
@@ -375,7 +364,7 @@ xfs_trans_read_buf(
375 * brelse it either. Just get out. 364 * brelse it either. Just get out.
376 */ 365 */
377 if (XFS_FORCED_SHUTDOWN(mp)) { 366 if (XFS_FORCED_SHUTDOWN(mp)) {
378 xfs_buftrace("READ_BUF_INCORE XFSSHUTDN", bp); 367 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
379 *bpp = NULL; 368 *bpp = NULL;
380 return XFS_ERROR(EIO); 369 return XFS_ERROR(EIO);
381 } 370 }
@@ -385,27 +374,26 @@ xfs_trans_read_buf(
385 bip->bli_recur++; 374 bip->bli_recur++;
386 375
387 ASSERT(atomic_read(&bip->bli_refcount) > 0); 376 ASSERT(atomic_read(&bip->bli_refcount) > 0);
388 xfs_buf_item_trace("READ RECUR", bip); 377 trace_xfs_trans_read_buf_recur(bip);
389 *bpp = bp; 378 *bpp = bp;
390 return 0; 379 return 0;
391 } 380 }
392 381
393 /* 382 /*
394 * We always specify the BUF_BUSY flag within a transaction so 383 * We always specify the XBF_DONT_BLOCK flag within a transaction
395 * that get_buf does not try to push out a delayed write buffer 384 * so that get_buf does not try to push out a delayed write buffer
396 * which might cause another transaction to take place (if the 385 * which might cause another transaction to take place (if the
397 * buffer was delayed alloc). Such recursive transactions can 386 * buffer was delayed alloc). Such recursive transactions can
398 * easily deadlock with our current transaction as well as cause 387 * easily deadlock with our current transaction as well as cause
399 * us to run out of stack space. 388 * us to run out of stack space.
400 */ 389 */
401 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 390 bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
402 if (bp == NULL) { 391 if (bp == NULL) {
403 *bpp = NULL; 392 *bpp = NULL;
404 return 0; 393 return 0;
405 } 394 }
406 if (XFS_BUF_GETERROR(bp) != 0) { 395 if (XFS_BUF_GETERROR(bp) != 0) {
407 XFS_BUF_SUPER_STALE(bp); 396 XFS_BUF_SUPER_STALE(bp);
408 xfs_buftrace("READ ERROR", bp);
409 error = XFS_BUF_GETERROR(bp); 397 error = XFS_BUF_GETERROR(bp);
410 398
411 xfs_ioerror_alert("xfs_trans_read_buf", mp, 399 xfs_ioerror_alert("xfs_trans_read_buf", mp,
@@ -431,41 +419,9 @@ xfs_trans_read_buf(
431 if (XFS_FORCED_SHUTDOWN(mp)) 419 if (XFS_FORCED_SHUTDOWN(mp))
432 goto shutdown_abort; 420 goto shutdown_abort;
433 421
434 /* 422 _xfs_trans_bjoin(tp, bp, 1);
435 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 423 trace_xfs_trans_read_buf(bp->b_fspriv);
436 * it doesn't have one yet, then allocate one and initialize it.
437 * The checks to see if one is there are in xfs_buf_item_init().
438 */
439 xfs_buf_item_init(bp, tp->t_mountp);
440
441 /*
442 * Set the recursion count for the buffer within this transaction
443 * to 0.
444 */
445 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
446 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
447 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
448 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
449 bip->bli_recur = 0;
450
451 /*
452 * Take a reference for this transaction on the buf item.
453 */
454 atomic_inc(&bip->bli_refcount);
455
456 /*
457 * Get a log_item_desc to point at the new item.
458 */
459 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
460 424
461 /*
462 * Initialize b_fsprivate2 so we can find it with incore_match()
463 * above.
464 */
465 XFS_BUF_SET_FSPRIVATE2(bp, tp);
466
467 xfs_buftrace("TRANS READ", bp);
468 xfs_buf_item_trace("READ", bip);
469 *bpp = bp; 425 *bpp = bp;
470 return 0; 426 return 0;
471 427
@@ -480,10 +436,10 @@ shutdown_abort:
480 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) 436 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
481 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); 437 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp);
482#endif 438#endif
483 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != 439 ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
484 (XFS_B_STALE|XFS_B_DELWRI)); 440 (XBF_STALE|XBF_DELWRI));
485 441
486 xfs_buftrace("READ_BUF XFSSHUTDN", bp); 442 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
487 xfs_buf_relse(bp); 443 xfs_buf_relse(bp);
488 *bpp = NULL; 444 *bpp = NULL;
489 return XFS_ERROR(EIO); 445 return XFS_ERROR(EIO);
@@ -549,13 +505,14 @@ xfs_trans_brelse(xfs_trans_t *tp,
549 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 505 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
550 ASSERT(lidp != NULL); 506 ASSERT(lidp != NULL);
551 507
508 trace_xfs_trans_brelse(bip);
509
552 /* 510 /*
553 * If the release is just for a recursive lock, 511 * If the release is just for a recursive lock,
554 * then decrement the count and return. 512 * then decrement the count and return.
555 */ 513 */
556 if (bip->bli_recur > 0) { 514 if (bip->bli_recur > 0) {
557 bip->bli_recur--; 515 bip->bli_recur--;
558 xfs_buf_item_trace("RELSE RECUR", bip);
559 return; 516 return;
560 } 517 }
561 518
@@ -563,10 +520,8 @@ xfs_trans_brelse(xfs_trans_t *tp,
563 * If the buffer is dirty within this transaction, we can't 520 * If the buffer is dirty within this transaction, we can't
564 * release it until we commit. 521 * release it until we commit.
565 */ 522 */
566 if (lidp->lid_flags & XFS_LID_DIRTY) { 523 if (lidp->lid_flags & XFS_LID_DIRTY)
567 xfs_buf_item_trace("RELSE DIRTY", bip);
568 return; 524 return;
569 }
570 525
571 /* 526 /*
572 * If the buffer has been invalidated, then we can't release 527 * If the buffer has been invalidated, then we can't release
@@ -574,13 +529,10 @@ xfs_trans_brelse(xfs_trans_t *tp,
574 * as part of this transaction. This prevents us from pulling 529 * as part of this transaction. This prevents us from pulling
575 * the item from the AIL before we should. 530 * the item from the AIL before we should.
576 */ 531 */
577 if (bip->bli_flags & XFS_BLI_STALE) { 532 if (bip->bli_flags & XFS_BLI_STALE)
578 xfs_buf_item_trace("RELSE STALE", bip);
579 return; 533 return;
580 }
581 534
582 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 535 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
583 xfs_buf_item_trace("RELSE", bip);
584 536
585 /* 537 /*
586 * Free up the log item descriptor tracking the released item. 538 * Free up the log item descriptor tracking the released item.
@@ -634,53 +586,6 @@ xfs_trans_brelse(xfs_trans_t *tp,
634} 586}
635 587
636/* 588/*
637 * Add the locked buffer to the transaction.
638 * The buffer must be locked, and it cannot be associated with any
639 * transaction.
640 *
641 * If the buffer does not yet have a buf log item associated with it,
642 * then allocate one for it. Then add the buf item to the transaction.
643 */
644void
645xfs_trans_bjoin(xfs_trans_t *tp,
646 xfs_buf_t *bp)
647{
648 xfs_buf_log_item_t *bip;
649
650 ASSERT(XFS_BUF_ISBUSY(bp));
651 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
652
653 /*
654 * The xfs_buf_log_item pointer is stored in b_fsprivate. If
655 * it doesn't have one yet, then allocate one and initialize it.
656 * The checks to see if one is there are in xfs_buf_item_init().
657 */
658 xfs_buf_item_init(bp, tp->t_mountp);
659 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
660 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
661 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
662 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
663
664 /*
665 * Take a reference for this transaction on the buf item.
666 */
667 atomic_inc(&bip->bli_refcount);
668
669 /*
670 * Get a log_item_desc to point at the new item.
671 */
672 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
673
674 /*
675 * Initialize b_fsprivate2 so we can find it with incore_match()
676 * in xfs_trans_get_buf() and friends above.
677 */
678 XFS_BUF_SET_FSPRIVATE2(bp, tp);
679
680 xfs_buf_item_trace("BJOIN", bip);
681}
682
683/*
684 * Mark the buffer as not needing to be unlocked when the buf item's 589 * Mark the buffer as not needing to be unlocked when the buf item's
685 * IOP_UNLOCK() routine is called. The buffer must already be locked 590 * IOP_UNLOCK() routine is called. The buffer must already be locked
686 * and associated with the given transaction. 591 * and associated with the given transaction.
@@ -701,7 +606,7 @@ xfs_trans_bhold(xfs_trans_t *tp,
701 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 606 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
702 ASSERT(atomic_read(&bip->bli_refcount) > 0); 607 ASSERT(atomic_read(&bip->bli_refcount) > 0);
703 bip->bli_flags |= XFS_BLI_HOLD; 608 bip->bli_flags |= XFS_BLI_HOLD;
704 xfs_buf_item_trace("BHOLD", bip); 609 trace_xfs_trans_bhold(bip);
705} 610}
706 611
707/* 612/*
@@ -724,7 +629,8 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
724 ASSERT(atomic_read(&bip->bli_refcount) > 0); 629 ASSERT(atomic_read(&bip->bli_refcount) > 0);
725 ASSERT(bip->bli_flags & XFS_BLI_HOLD); 630 ASSERT(bip->bli_flags & XFS_BLI_HOLD);
726 bip->bli_flags &= ~XFS_BLI_HOLD; 631 bip->bli_flags &= ~XFS_BLI_HOLD;
727 xfs_buf_item_trace("BHOLD RELEASE", bip); 632
633 trace_xfs_trans_bhold_release(bip);
728} 634}
729 635
730/* 636/*
@@ -770,6 +676,8 @@ xfs_trans_log_buf(xfs_trans_t *tp,
770 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 676 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
771 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; 677 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone;
772 678
679 trace_xfs_trans_log_buf(bip);
680
773 /* 681 /*
774 * If we invalidated the buffer within this transaction, then 682 * If we invalidated the buffer within this transaction, then
775 * cancel the invalidation now that we're dirtying the buffer 683 * cancel the invalidation now that we're dirtying the buffer
@@ -777,7 +685,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
777 * because we have a reference to the buffer this entire time. 685 * because we have a reference to the buffer this entire time.
778 */ 686 */
779 if (bip->bli_flags & XFS_BLI_STALE) { 687 if (bip->bli_flags & XFS_BLI_STALE) {
780 xfs_buf_item_trace("BLOG UNSTALE", bip);
781 bip->bli_flags &= ~XFS_BLI_STALE; 688 bip->bli_flags &= ~XFS_BLI_STALE;
782 ASSERT(XFS_BUF_ISSTALE(bp)); 689 ASSERT(XFS_BUF_ISSTALE(bp));
783 XFS_BUF_UNSTALE(bp); 690 XFS_BUF_UNSTALE(bp);
@@ -792,7 +699,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
792 lidp->lid_flags &= ~XFS_LID_BUF_STALE; 699 lidp->lid_flags &= ~XFS_LID_BUF_STALE;
793 bip->bli_flags |= XFS_BLI_LOGGED; 700 bip->bli_flags |= XFS_BLI_LOGGED;
794 xfs_buf_item_log(bip, first, last); 701 xfs_buf_item_log(bip, first, last);
795 xfs_buf_item_trace("BLOG", bip);
796} 702}
797 703
798 704
@@ -831,6 +737,8 @@ xfs_trans_binval(
831 ASSERT(lidp != NULL); 737 ASSERT(lidp != NULL);
832 ASSERT(atomic_read(&bip->bli_refcount) > 0); 738 ASSERT(atomic_read(&bip->bli_refcount) > 0);
833 739
740 trace_xfs_trans_binval(bip);
741
834 if (bip->bli_flags & XFS_BLI_STALE) { 742 if (bip->bli_flags & XFS_BLI_STALE) {
835 /* 743 /*
836 * If the buffer is already invalidated, then 744 * If the buffer is already invalidated, then
@@ -843,8 +751,6 @@ xfs_trans_binval(
843 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 751 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
844 ASSERT(lidp->lid_flags & XFS_LID_DIRTY); 752 ASSERT(lidp->lid_flags & XFS_LID_DIRTY);
845 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 753 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
846 xfs_buftrace("XFS_BINVAL RECUR", bp);
847 xfs_buf_item_trace("BINVAL RECUR", bip);
848 return; 754 return;
849 } 755 }
850 756
@@ -878,8 +784,6 @@ xfs_trans_binval(
878 (bip->bli_format.blf_map_size * sizeof(uint))); 784 (bip->bli_format.blf_map_size * sizeof(uint)));
879 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; 785 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE;
880 tp->t_flags |= XFS_TRANS_DIRTY; 786 tp->t_flags |= XFS_TRANS_DIRTY;
881 xfs_buftrace("XFS_BINVAL", bp);
882 xfs_buf_item_trace("BINVAL", bip);
883} 787}
884 788
885/* 789/*
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index d725428c9df6..b09904555d07 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -151,8 +151,8 @@ typedef enum {
151} xfs_btnum_t; 151} xfs_btnum_t;
152 152
153struct xfs_name { 153struct xfs_name {
154 const char *name; 154 const unsigned char *name;
155 int len; 155 int len;
156}; 156};
157 157
158#endif /* __XFS_TYPES_H__ */ 158#endif /* __XFS_TYPES_H__ */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b572f7e840e0..9d376be0ea38 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -53,6 +53,7 @@
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_filestream.h" 54#include "xfs_filestream.h"
55#include "xfs_vnodeops.h" 55#include "xfs_vnodeops.h"
56#include "xfs_trace.h"
56 57
57int 58int
58xfs_setattr( 59xfs_setattr(
@@ -69,7 +70,6 @@ xfs_setattr(
69 uint commit_flags=0; 70 uint commit_flags=0;
70 uid_t uid=0, iuid=0; 71 uid_t uid=0, iuid=0;
71 gid_t gid=0, igid=0; 72 gid_t gid=0, igid=0;
72 int timeflags = 0;
73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
74 int need_iolock = 1; 74 int need_iolock = 1;
75 75
@@ -134,16 +134,13 @@ xfs_setattr(
134 if (flags & XFS_ATTR_NOLOCK) 134 if (flags & XFS_ATTR_NOLOCK)
135 need_iolock = 0; 135 need_iolock = 0;
136 if (!(mask & ATTR_SIZE)) { 136 if (!(mask & ATTR_SIZE)) {
137 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) || 137 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
138 (mp->m_flags & XFS_MOUNT_WSYNC)) { 138 commit_flags = 0;
139 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 139 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
140 commit_flags = 0; 140 0, 0, 0);
141 if ((code = xfs_trans_reserve(tp, 0, 141 if (code) {
142 XFS_ICHANGE_LOG_RES(mp), 0, 142 lock_flags = 0;
143 0, 0))) { 143 goto error_return;
144 lock_flags = 0;
145 goto error_return;
146 }
147 } 144 }
148 } else { 145 } else {
149 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 146 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
@@ -259,7 +256,7 @@ xfs_setattr(
259 iattr->ia_size > ip->i_d.di_size) { 256 iattr->ia_size > ip->i_d.di_size) {
260 code = xfs_flush_pages(ip, 257 code = xfs_flush_pages(ip,
261 ip->i_d.di_size, iattr->ia_size, 258 ip->i_d.di_size, iattr->ia_size,
262 XFS_B_ASYNC, FI_NONE); 259 XBF_ASYNC, FI_NONE);
263 } 260 }
264 261
265 /* wait for all I/O to complete */ 262 /* wait for all I/O to complete */
@@ -294,15 +291,23 @@ xfs_setattr(
294 * or we are explicitly asked to change it. This handles 291 * or we are explicitly asked to change it. This handles
295 * the semantic difference between truncate() and ftruncate() 292 * the semantic difference between truncate() and ftruncate()
296 * as implemented in the VFS. 293 * as implemented in the VFS.
294 *
295 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
296 * is a special case where we need to update the times despite
297 * not having these flags set. For all other operations the
298 * VFS set these flags explicitly if it wants a timestamp
299 * update.
297 */ 300 */
298 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME)) 301 if (iattr->ia_size != ip->i_size &&
299 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 302 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
303 iattr->ia_ctime = iattr->ia_mtime =
304 current_fs_time(inode->i_sb);
305 mask |= ATTR_CTIME | ATTR_MTIME;
306 }
300 307
301 if (iattr->ia_size > ip->i_size) { 308 if (iattr->ia_size > ip->i_size) {
302 ip->i_d.di_size = iattr->ia_size; 309 ip->i_d.di_size = iattr->ia_size;
303 ip->i_size = iattr->ia_size; 310 ip->i_size = iattr->ia_size;
304 if (!(flags & XFS_ATTR_DMI))
305 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
306 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 311 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
307 } else if (iattr->ia_size <= ip->i_size || 312 } else if (iattr->ia_size <= ip->i_size ||
308 (iattr->ia_size == 0 && ip->i_d.di_nextents)) { 313 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
@@ -373,9 +378,6 @@ xfs_setattr(
373 ip->i_d.di_gid = gid; 378 ip->i_d.di_gid = gid;
374 inode->i_gid = gid; 379 inode->i_gid = gid;
375 } 380 }
376
377 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
378 timeflags |= XFS_ICHGTIME_CHG;
379 } 381 }
380 382
381 /* 383 /*
@@ -392,51 +394,37 @@ xfs_setattr(
392 394
393 inode->i_mode &= S_IFMT; 395 inode->i_mode &= S_IFMT;
394 inode->i_mode |= mode & ~S_IFMT; 396 inode->i_mode |= mode & ~S_IFMT;
395
396 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
397 timeflags |= XFS_ICHGTIME_CHG;
398 } 397 }
399 398
400 /* 399 /*
401 * Change file access or modified times. 400 * Change file access or modified times.
402 */ 401 */
403 if (mask & (ATTR_ATIME|ATTR_MTIME)) { 402 if (mask & ATTR_ATIME) {
404 if (mask & ATTR_ATIME) { 403 inode->i_atime = iattr->ia_atime;
405 inode->i_atime = iattr->ia_atime; 404 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
406 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 405 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
407 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 406 ip->i_update_core = 1;
408 ip->i_update_core = 1;
409 }
410 if (mask & ATTR_MTIME) {
411 inode->i_mtime = iattr->ia_mtime;
412 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
413 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
414 timeflags &= ~XFS_ICHGTIME_MOD;
415 timeflags |= XFS_ICHGTIME_CHG;
416 }
417 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
418 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
419 } 407 }
420 408 if (mask & ATTR_CTIME) {
421 /*
422 * Change file inode change time only if ATTR_CTIME set
423 * AND we have been called by a DMI function.
424 */
425
426 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
427 inode->i_ctime = iattr->ia_ctime; 409 inode->i_ctime = iattr->ia_ctime;
428 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 410 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
429 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 411 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
430 ip->i_update_core = 1; 412 ip->i_update_core = 1;
431 timeflags &= ~XFS_ICHGTIME_CHG; 413 }
414 if (mask & ATTR_MTIME) {
415 inode->i_mtime = iattr->ia_mtime;
416 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
417 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
418 ip->i_update_core = 1;
432 } 419 }
433 420
434 /* 421 /*
435 * Send out timestamp changes that need to be set to the 422 * And finally, log the inode core if any attribute in it
436 * current time. Not done when called by a DMI function. 423 * has been changed.
437 */ 424 */
438 if (timeflags && !(flags & XFS_ATTR_DMI)) 425 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
439 xfs_ichgtime(ip, timeflags); 426 ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
427 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
440 428
441 XFS_STATS_INC(xs_ig_attrchg); 429 XFS_STATS_INC(xs_ig_attrchg);
442 430
@@ -451,12 +439,10 @@ xfs_setattr(
451 * mix so this probably isn't worth the trouble to optimize. 439 * mix so this probably isn't worth the trouble to optimize.
452 */ 440 */
453 code = 0; 441 code = 0;
454 if (tp) { 442 if (mp->m_flags & XFS_MOUNT_WSYNC)
455 if (mp->m_flags & XFS_MOUNT_WSYNC) 443 xfs_trans_set_sync(tp);
456 xfs_trans_set_sync(tp);
457 444
458 code = xfs_trans_commit(tp, commit_flags); 445 code = xfs_trans_commit(tp, commit_flags);
459 }
460 446
461 xfs_iunlock(ip, lock_flags); 447 xfs_iunlock(ip, lock_flags);
462 448
@@ -538,9 +524,8 @@ xfs_readlink_bmap(
538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 524 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 525 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
540 526
541 bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt), 527 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
542 XBF_LOCK | XBF_MAPPED | 528 XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
543 XBF_DONT_BLOCK);
544 error = XFS_BUF_GETERROR(bp); 529 error = XFS_BUF_GETERROR(bp);
545 if (error) { 530 if (error) {
546 xfs_ioerror_alert("xfs_readlink", 531 xfs_ioerror_alert("xfs_readlink",
@@ -599,114 +584,9 @@ xfs_readlink(
599} 584}
600 585
601/* 586/*
602 * xfs_fsync 587 * Flags for xfs_free_eofblocks
603 *
604 * This is called to sync the inode and its data out to disk. We need to hold
605 * the I/O lock while flushing the data, and the inode lock while flushing the
606 * inode. The inode lock CANNOT be held while flushing the data, so acquire
607 * after we're done with that.
608 */ 588 */
609int 589#define XFS_FREE_EOF_TRYLOCK (1<<0)
610xfs_fsync(
611 xfs_inode_t *ip)
612{
613 xfs_trans_t *tp;
614 int error = 0;
615 int log_flushed = 0, changed = 1;
616
617 xfs_itrace_entry(ip);
618
619 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
620 return XFS_ERROR(EIO);
621
622 /*
623 * We always need to make sure that the required inode state is safe on
624 * disk. The inode might be clean but we still might need to force the
625 * log because of committed transactions that haven't hit the disk yet.
626 * Likewise, there could be unflushed non-transactional changes to the
627 * inode core that have to go to disk and this requires us to issue
628 * a synchronous transaction to capture these changes correctly.
629 *
630 * This code relies on the assumption that if the update_* fields
631 * of the inode are clear and the inode is unpinned then it is clean
632 * and no action is required.
633 */
634 xfs_ilock(ip, XFS_ILOCK_SHARED);
635
636 if (!ip->i_update_core) {
637 /*
638 * Timestamps/size haven't changed since last inode flush or
639 * inode transaction commit. That means either nothing got
640 * written or a transaction committed which caught the updates.
641 * If the latter happened and the transaction hasn't hit the
642 * disk yet, the inode will be still be pinned. If it is,
643 * force the log.
644 */
645
646 xfs_iunlock(ip, XFS_ILOCK_SHARED);
647
648 if (xfs_ipincount(ip)) {
649 error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
650 XFS_LOG_FORCE | XFS_LOG_SYNC,
651 &log_flushed);
652 } else {
653 /*
654 * If the inode is not pinned and nothing has changed
655 * we don't need to flush the cache.
656 */
657 changed = 0;
658 }
659 } else {
660 /*
661 * Kick off a transaction to log the inode core to get the
662 * updates. The sync transaction will also force the log.
663 */
664 xfs_iunlock(ip, XFS_ILOCK_SHARED);
665 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
666 error = xfs_trans_reserve(tp, 0,
667 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
668 if (error) {
669 xfs_trans_cancel(tp, 0);
670 return error;
671 }
672 xfs_ilock(ip, XFS_ILOCK_EXCL);
673
674 /*
675 * Note - it's possible that we might have pushed ourselves out
676 * of the way during trans_reserve which would flush the inode.
677 * But there's no guarantee that the inode buffer has actually
678 * gone out yet (it's delwri). Plus the buffer could be pinned
679 * anyway if it's part of an inode in another recent
680 * transaction. So we play it safe and fire off the
681 * transaction anyway.
682 */
683 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
684 xfs_trans_ihold(tp, ip);
685 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
686 xfs_trans_set_sync(tp);
687 error = _xfs_trans_commit(tp, 0, &log_flushed);
688
689 xfs_iunlock(ip, XFS_ILOCK_EXCL);
690 }
691
692 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) {
693 /*
694 * If the log write didn't issue an ordered tag we need
695 * to flush the disk cache for the data device now.
696 */
697 if (!log_flushed)
698 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
699
700 /*
701 * If this inode is on the RT dev we need to flush that
702 * cache as well.
703 */
704 if (XFS_IS_REALTIME_INODE(ip))
705 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
706 }
707
708 return error;
709}
710 590
711/* 591/*
712 * This is called by xfs_inactive to free any blocks beyond eof 592 * This is called by xfs_inactive to free any blocks beyond eof
@@ -726,7 +606,6 @@ xfs_free_eofblocks(
726 xfs_filblks_t map_len; 606 xfs_filblks_t map_len;
727 int nimaps; 607 int nimaps;
728 xfs_bmbt_irec_t imap; 608 xfs_bmbt_irec_t imap;
729 int use_iolock = (flags & XFS_FREE_EOF_LOCK);
730 609
731 /* 610 /*
732 * Figure out if there are any blocks beyond the end 611 * Figure out if there are any blocks beyond the end
@@ -768,14 +647,19 @@ xfs_free_eofblocks(
768 * cache and we can't 647 * cache and we can't
769 * do that within a transaction. 648 * do that within a transaction.
770 */ 649 */
771 if (use_iolock) 650 if (flags & XFS_FREE_EOF_TRYLOCK) {
651 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
652 xfs_trans_cancel(tp, 0);
653 return 0;
654 }
655 } else {
772 xfs_ilock(ip, XFS_IOLOCK_EXCL); 656 xfs_ilock(ip, XFS_IOLOCK_EXCL);
657 }
773 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 658 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
774 ip->i_size); 659 ip->i_size);
775 if (error) { 660 if (error) {
776 xfs_trans_cancel(tp, 0); 661 xfs_trans_cancel(tp, 0);
777 if (use_iolock) 662 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
778 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
779 return error; 663 return error;
780 } 664 }
781 665
@@ -812,8 +696,7 @@ xfs_free_eofblocks(
812 error = xfs_trans_commit(tp, 696 error = xfs_trans_commit(tp,
813 XFS_TRANS_RELEASE_LOG_RES); 697 XFS_TRANS_RELEASE_LOG_RES);
814 } 698 }
815 xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) 699 xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
816 : XFS_ILOCK_EXCL));
817 } 700 }
818 return error; 701 return error;
819} 702}
@@ -1103,7 +986,7 @@ xfs_release(
1103 */ 986 */
1104 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 987 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
1105 if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) 988 if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0)
1106 xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); 989 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE);
1107 } 990 }
1108 991
1109 if (ip->i_d.di_nlink != 0) { 992 if (ip->i_d.di_nlink != 0) {
@@ -1113,7 +996,17 @@ xfs_release(
1113 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 996 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1114 (!(ip->i_d.di_flags & 997 (!(ip->i_d.di_flags &
1115 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 998 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
1116 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 999
1000 /*
1001 * If we can't get the iolock just skip truncating
1002 * the blocks past EOF because we could deadlock
1003 * with the mmap_sem otherwise. We'll get another
1004 * chance to drop them once the last reference to
1005 * the inode is dropped, so we'll never leak blocks
1006 * permanently.
1007 */
1008 error = xfs_free_eofblocks(mp, ip,
1009 XFS_FREE_EOF_TRYLOCK);
1117 if (error) 1010 if (error)
1118 return error; 1011 return error;
1119 } 1012 }
@@ -1184,7 +1077,7 @@ xfs_inactive(
1184 (!(ip->i_d.di_flags & 1077 (!(ip->i_d.di_flags &
1185 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1078 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
1186 (ip->i_delayed_blks != 0)))) { 1079 (ip->i_delayed_blks != 0)))) {
1187 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1080 error = xfs_free_eofblocks(mp, ip, 0);
1188 if (error) 1081 if (error)
1189 return VN_INACTIVE_CACHE; 1082 return VN_INACTIVE_CACHE;
1190 } 1083 }
@@ -1380,7 +1273,6 @@ xfs_lookup(
1380 if (error) 1273 if (error)
1381 goto out_free_name; 1274 goto out_free_name;
1382 1275
1383 xfs_itrace_ref(*ipp);
1384 return 0; 1276 return 0;
1385 1277
1386out_free_name: 1278out_free_name:
@@ -1526,7 +1418,6 @@ xfs_create(
1526 * At this point, we've gotten a newly allocated inode. 1418 * At this point, we've gotten a newly allocated inode.
1527 * It is locked (and joined to the transaction). 1419 * It is locked (and joined to the transaction).
1528 */ 1420 */
1529 xfs_itrace_ref(ip);
1530 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1421 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1531 1422
1532 /* 1423 /*
@@ -1986,9 +1877,6 @@ xfs_remove(
1986 if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 1877 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
1987 xfs_filestream_deassociate(ip); 1878 xfs_filestream_deassociate(ip);
1988 1879
1989 xfs_itrace_exit(ip);
1990 xfs_itrace_exit(dp);
1991
1992 std_return: 1880 std_return:
1993 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 1881 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
1994 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL, 1882 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
@@ -2201,7 +2089,8 @@ xfs_symlink(
2201 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { 2089 if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
2202 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, 2090 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
2203 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2091 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
2204 link_name->name, target_path, 0, 0, 0); 2092 link_name->name,
2093 (unsigned char *)target_path, 0, 0, 0);
2205 if (error) 2094 if (error)
2206 return error; 2095 return error;
2207 } 2096 }
@@ -2285,7 +2174,6 @@ xfs_symlink(
2285 goto error_return; 2174 goto error_return;
2286 goto error1; 2175 goto error1;
2287 } 2176 }
2288 xfs_itrace_ref(ip);
2289 2177
2290 /* 2178 /*
2291 * An error after we've joined dp to the transaction will result in the 2179 * An error after we've joined dp to the transaction will result in the
@@ -2398,7 +2286,8 @@ std_return:
2398 dp, DM_RIGHT_NULL, 2286 dp, DM_RIGHT_NULL,
2399 error ? NULL : ip, 2287 error ? NULL : ip,
2400 DM_RIGHT_NULL, link_name->name, 2288 DM_RIGHT_NULL, link_name->name,
2401 target_path, 0, error, 0); 2289 (unsigned char *)target_path,
2290 0, error, 0);
2402 } 2291 }
2403 2292
2404 if (!error) 2293 if (!error)
@@ -2456,46 +2345,6 @@ xfs_set_dmattrs(
2456 return error; 2345 return error;
2457} 2346}
2458 2347
2459int
2460xfs_reclaim(
2461 xfs_inode_t *ip)
2462{
2463
2464 xfs_itrace_entry(ip);
2465
2466 ASSERT(!VN_MAPPED(VFS_I(ip)));
2467
2468 /* bad inode, get out here ASAP */
2469 if (is_bad_inode(VFS_I(ip))) {
2470 xfs_ireclaim(ip);
2471 return 0;
2472 }
2473
2474 xfs_ioend_wait(ip);
2475
2476 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
2477
2478 /*
2479 * If we have nothing to flush with this inode then complete the
2480 * teardown now, otherwise break the link between the xfs inode and the
2481 * linux inode and clean up the xfs inode later. This avoids flushing
2482 * the inode to disk during the delete operation itself.
2483 *
2484 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
2485 * first to ensure that xfs_iunpin() will never see an xfs inode
2486 * that has a linux inode being reclaimed. Synchronisation is provided
2487 * by the i_flags_lock.
2488 */
2489 if (!ip->i_update_core && (ip->i_itemp == NULL)) {
2490 xfs_ilock(ip, XFS_ILOCK_EXCL);
2491 xfs_iflock(ip);
2492 xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2493 return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
2494 }
2495 xfs_inode_set_reclaim_tag(ip);
2496 return 0;
2497}
2498
2499/* 2348/*
2500 * xfs_alloc_file_space() 2349 * xfs_alloc_file_space()
2501 * This routine allocates disk space for the given file. 2350 * This routine allocates disk space for the given file.
@@ -2868,7 +2717,6 @@ xfs_free_file_space(
2868 ioffset = offset & ~(rounding - 1); 2717 ioffset = offset & ~(rounding - 1);
2869 2718
2870 if (VN_CACHED(VFS_I(ip)) != 0) { 2719 if (VN_CACHED(VFS_I(ip)) != 0) {
2871 xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);
2872 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 2720 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
2873 if (error) 2721 if (error)
2874 goto out_unlock_iolock; 2722 goto out_unlock_iolock;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index a9e102de71a1..d8dfa8d0dadd 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -21,7 +21,6 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
21#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ 21#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
22 22
23int xfs_readlink(struct xfs_inode *ip, char *link); 23int xfs_readlink(struct xfs_inode *ip, char *link);
24int xfs_fsync(struct xfs_inode *ip);
25int xfs_release(struct xfs_inode *ip); 24int xfs_release(struct xfs_inode *ip);
26int xfs_inactive(struct xfs_inode *ip); 25int xfs_inactive(struct xfs_inode *ip);
27int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 26int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
@@ -38,31 +37,18 @@ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
38 const char *target_path, mode_t mode, struct xfs_inode **ipp, 37 const char *target_path, mode_t mode, struct xfs_inode **ipp,
39 cred_t *credp); 38 cred_t *credp);
40int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 39int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
41int xfs_reclaim(struct xfs_inode *ip);
42int xfs_change_file_space(struct xfs_inode *ip, int cmd, 40int xfs_change_file_space(struct xfs_inode *ip, int cmd,
43 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); 41 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
44int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 42int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
45 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 43 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
46 struct xfs_name *target_name, struct xfs_inode *target_ip); 44 struct xfs_name *target_name, struct xfs_inode *target_ip);
47int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, 45int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
48 int *valuelenp, int flags); 46 unsigned char *value, int *valuelenp, int flags);
49int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, 47int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
50 int valuelen, int flags); 48 unsigned char *value, int valuelen, int flags);
51int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); 49int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
52int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 50int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
53 int flags, struct attrlist_cursor_kern *cursor); 51 int flags, struct attrlist_cursor_kern *cursor);
54ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
55 const struct iovec *iovp, unsigned int segs,
56 loff_t *offset, int ioflags);
57ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
58 loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
59 int flags, int ioflags);
60ssize_t xfs_splice_write(struct xfs_inode *ip,
61 struct pipe_inode_info *pipe, struct file *outfilp,
62 loff_t *ppos, size_t count, int flags, int ioflags);
63ssize_t xfs_write(struct xfs_inode *xip, struct kiocb *iocb,
64 const struct iovec *iovp, unsigned int nsegs,
65 loff_t *offset, int ioflags);
66int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, 52int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
67 int flags, struct xfs_iomap *iomapp, int *niomaps); 53 int flags, struct xfs_iomap *iomapp, int *niomaps);
68void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, 54void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first,
@@ -73,4 +59,6 @@ int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
73 xfs_off_t last, uint64_t flags, int fiopt); 59 xfs_off_t last, uint64_t flags, int fiopt);
74int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last); 60int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
75 61
62int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
63
76#endif /* _XFS_VNODEOPS_H */ 64#endif /* _XFS_VNODEOPS_H */