aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/kmem.c57
-rw-r--r--fs/xfs/linux-2.6/kmem.h21
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c73
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c392
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c542
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h95
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c856
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c922
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h77
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c354
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c447
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h9
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c62
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c59
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1503
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c98
27 files changed, 3644 insertions, 2048 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 2d3f90afe5f1..666c9db48eb6 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -16,16 +16,33 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/vmalloc.h>
20#include <linux/highmem.h> 19#include <linux/highmem.h>
20#include <linux/slab.h>
21#include <linux/swap.h> 21#include <linux/swap.h>
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include "time.h" 24#include "time.h"
25#include "kmem.h" 25#include "kmem.h"
26 26
27#define MAX_VMALLOCS 6 27/*
28#define MAX_SLAB_SIZE 0x20000 28 * Greedy allocation. May fail and may return vmalloced memory.
29 *
30 * Must be freed using kmem_free_large.
31 */
32void *
33kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
34{
35 void *ptr;
36 size_t kmsize = maxsize;
37
38 while (!(ptr = kmem_zalloc_large(kmsize))) {
39 if ((kmsize >>= 1) <= minsize)
40 kmsize = minsize;
41 }
42 if (ptr)
43 *size = kmsize;
44 return ptr;
45}
29 46
30void * 47void *
31kmem_alloc(size_t size, unsigned int __nocast flags) 48kmem_alloc(size_t size, unsigned int __nocast flags)
@@ -34,19 +51,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
34 gfp_t lflags = kmem_flags_convert(flags); 51 gfp_t lflags = kmem_flags_convert(flags);
35 void *ptr; 52 void *ptr;
36 53
37#ifdef DEBUG
38 if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) {
39 printk(KERN_WARNING "Large %s attempt, size=%ld\n",
40 __func__, (long)size);
41 dump_stack();
42 }
43#endif
44
45 do { 54 do {
46 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) 55 ptr = kmalloc(size, lflags);
47 ptr = kmalloc(size, lflags);
48 else
49 ptr = __vmalloc(size, lflags, PAGE_KERNEL);
50 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 56 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
51 return ptr; 57 return ptr;
52 if (!(++retries % 100)) 58 if (!(++retries % 100))
@@ -68,27 +74,6 @@ kmem_zalloc(size_t size, unsigned int __nocast flags)
68 return ptr; 74 return ptr;
69} 75}
70 76
71void *
72kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
73 unsigned int __nocast flags)
74{
75 void *ptr;
76 size_t kmsize = maxsize;
77 unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP;
78
79 while (!(ptr = kmem_zalloc(kmsize, kmflags))) {
80 if ((kmsize <= minsize) && (flags & KM_NOSLEEP))
81 break;
82 if ((kmsize >>= 1) <= minsize) {
83 kmsize = minsize;
84 kmflags = flags;
85 }
86 }
87 if (ptr)
88 *size = kmsize;
89 return ptr;
90}
91
92void 77void
93kmem_free(const void *ptr) 78kmem_free(const void *ptr)
94{ 79{
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 179cbd630f69..f7c8f7a9ea6d 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -21,6 +21,7 @@
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/vmalloc.h>
24 25
25/* 26/*
26 * General memory allocation interfaces 27 * General memory allocation interfaces
@@ -30,7 +31,6 @@
30#define KM_NOSLEEP 0x0002u 31#define KM_NOSLEEP 0x0002u
31#define KM_NOFS 0x0004u 32#define KM_NOFS 0x0004u
32#define KM_MAYFAIL 0x0008u 33#define KM_MAYFAIL 0x0008u
33#define KM_LARGE 0x0010u
34 34
35/* 35/*
36 * We use a special process flag to avoid recursive callbacks into 36 * We use a special process flag to avoid recursive callbacks into
@@ -42,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags)
42{ 42{
43 gfp_t lflags; 43 gfp_t lflags;
44 44
45 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE)); 45 BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
46 46
47 if (flags & KM_NOSLEEP) { 47 if (flags & KM_NOSLEEP) {
48 lflags = GFP_ATOMIC | __GFP_NOWARN; 48 lflags = GFP_ATOMIC | __GFP_NOWARN;
@@ -56,10 +56,25 @@ kmem_flags_convert(unsigned int __nocast flags)
56 56
57extern void *kmem_alloc(size_t, unsigned int __nocast); 57extern void *kmem_alloc(size_t, unsigned int __nocast);
58extern void *kmem_zalloc(size_t, unsigned int __nocast); 58extern void *kmem_zalloc(size_t, unsigned int __nocast);
59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
60extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); 59extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
61extern void kmem_free(const void *); 60extern void kmem_free(const void *);
62 61
62static inline void *kmem_zalloc_large(size_t size)
63{
64 void *ptr;
65
66 ptr = vmalloc(size);
67 if (ptr)
68 memset(ptr, 0, size);
69 return ptr;
70}
71static inline void kmem_free_large(void *ptr)
72{
73 vfree(ptr);
74}
75
76extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
77
63/* 78/*
64 * Zone interfaces 79 * Zone interfaces
65 */ 80 */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b23a54506446..a7bc925c4d60 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -21,6 +21,8 @@
21#include "xfs_bmap_btree.h" 21#include "xfs_bmap_btree.h"
22#include "xfs_inode.h" 22#include "xfs_inode.h"
23#include "xfs_vnodeops.h" 23#include "xfs_vnodeops.h"
24#include "xfs_trace.h"
25#include <linux/slab.h>
24#include <linux/xattr.h> 26#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h> 27#include <linux/posix_acl_xattr.h>
26 28
@@ -105,7 +107,7 @@ xfs_get_acl(struct inode *inode, int type)
105 struct posix_acl *acl; 107 struct posix_acl *acl;
106 struct xfs_acl *xfs_acl; 108 struct xfs_acl *xfs_acl;
107 int len = sizeof(struct xfs_acl); 109 int len = sizeof(struct xfs_acl);
108 char *ea_name; 110 unsigned char *ea_name;
109 int error; 111 int error;
110 112
111 acl = get_cached_acl(inode, type); 113 acl = get_cached_acl(inode, type);
@@ -132,7 +134,8 @@ xfs_get_acl(struct inode *inode, int type)
132 if (!xfs_acl) 134 if (!xfs_acl)
133 return ERR_PTR(-ENOMEM); 135 return ERR_PTR(-ENOMEM);
134 136
135 error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); 137 error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
138 &len, ATTR_ROOT);
136 if (error) { 139 if (error) {
137 /* 140 /*
138 * If the attribute doesn't exist make sure we have a negative 141 * If the attribute doesn't exist make sure we have a negative
@@ -161,7 +164,7 @@ STATIC int
161xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 164xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
162{ 165{
163 struct xfs_inode *ip = XFS_I(inode); 166 struct xfs_inode *ip = XFS_I(inode);
164 char *ea_name; 167 unsigned char *ea_name;
165 int error; 168 int error;
166 169
167 if (S_ISLNK(inode->i_mode)) 170 if (S_ISLNK(inode->i_mode))
@@ -193,7 +196,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
193 (sizeof(struct xfs_acl_entry) * 196 (sizeof(struct xfs_acl_entry) *
194 (XFS_ACL_MAX_ENTRIES - acl->a_count)); 197 (XFS_ACL_MAX_ENTRIES - acl->a_count));
195 198
196 error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, 199 error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
197 len, ATTR_ROOT); 200 len, ATTR_ROOT);
198 201
199 kfree(xfs_acl); 202 kfree(xfs_acl);
@@ -250,8 +253,9 @@ xfs_set_mode(struct inode *inode, mode_t mode)
250 if (mode != inode->i_mode) { 253 if (mode != inode->i_mode) {
251 struct iattr iattr; 254 struct iattr iattr;
252 255
253 iattr.ia_valid = ATTR_MODE; 256 iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
254 iattr.ia_mode = mode; 257 iattr.ia_mode = mode;
258 iattr.ia_ctime = current_fs_time(inode->i_sb);
255 259
256 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 260 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
257 } 261 }
@@ -260,7 +264,7 @@ xfs_set_mode(struct inode *inode, mode_t mode)
260} 264}
261 265
262static int 266static int
263xfs_acl_exists(struct inode *inode, char *name) 267xfs_acl_exists(struct inode *inode, unsigned char *name)
264{ 268{
265 int len = sizeof(struct xfs_acl); 269 int len = sizeof(struct xfs_acl);
266 270
@@ -353,37 +357,14 @@ xfs_acl_chmod(struct inode *inode)
353 return error; 357 return error;
354} 358}
355 359
356/*
357 * System xattr handlers.
358 *
359 * Currently Posix ACLs are the only system namespace extended attribute
360 * handlers supported by XFS, so we just implement the handlers here.
361 * If we ever support other system extended attributes this will need
362 * some refactoring.
363 */
364
365static int 360static int
366xfs_decode_acl(const char *name) 361xfs_xattr_acl_get(struct dentry *dentry, const char *name,
367{ 362 void *value, size_t size, int type)
368 if (strcmp(name, "posix_acl_access") == 0)
369 return ACL_TYPE_ACCESS;
370 else if (strcmp(name, "posix_acl_default") == 0)
371 return ACL_TYPE_DEFAULT;
372 return -EINVAL;
373}
374
375static int
376xfs_xattr_system_get(struct inode *inode, const char *name,
377 void *value, size_t size)
378{ 363{
379 struct posix_acl *acl; 364 struct posix_acl *acl;
380 int type, error; 365 int error;
381
382 type = xfs_decode_acl(name);
383 if (type < 0)
384 return type;
385 366
386 acl = xfs_get_acl(inode, type); 367 acl = xfs_get_acl(dentry->d_inode, type);
387 if (IS_ERR(acl)) 368 if (IS_ERR(acl))
388 return PTR_ERR(acl); 369 return PTR_ERR(acl);
389 if (acl == NULL) 370 if (acl == NULL)
@@ -396,15 +377,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
396} 377}
397 378
398static int 379static int
399xfs_xattr_system_set(struct inode *inode, const char *name, 380xfs_xattr_acl_set(struct dentry *dentry, const char *name,
400 const void *value, size_t size, int flags) 381 const void *value, size_t size, int flags, int type)
401{ 382{
383 struct inode *inode = dentry->d_inode;
402 struct posix_acl *acl = NULL; 384 struct posix_acl *acl = NULL;
403 int error = 0, type; 385 int error = 0;
404 386
405 type = xfs_decode_acl(name);
406 if (type < 0)
407 return type;
408 if (flags & XATTR_CREATE) 387 if (flags & XATTR_CREATE)
409 return -EINVAL; 388 return -EINVAL;
410 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 389 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -461,8 +440,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
461 return error; 440 return error;
462} 441}
463 442
464struct xattr_handler xfs_xattr_system_handler = { 443struct xattr_handler xfs_xattr_acl_access_handler = {
465 .prefix = XATTR_SYSTEM_PREFIX, 444 .prefix = POSIX_ACL_XATTR_ACCESS,
466 .get = xfs_xattr_system_get, 445 .flags = ACL_TYPE_ACCESS,
467 .set = xfs_xattr_system_set, 446 .get = xfs_xattr_acl_get,
447 .set = xfs_xattr_acl_set,
448};
449
450struct xattr_handler xfs_xattr_acl_default_handler = {
451 .prefix = POSIX_ACL_XATTR_DEFAULT,
452 .flags = ACL_TYPE_DEFAULT,
453 .get = xfs_xattr_acl_get,
454 .set = xfs_xattr_acl_set,
468}; 455};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c2e30eea74dc..0f8b9968a803 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -38,6 +38,9 @@
38#include "xfs_rw.h" 38#include "xfs_rw.h"
39#include "xfs_iomap.h" 39#include "xfs_iomap.h"
40#include "xfs_vnodeops.h" 40#include "xfs_vnodeops.h"
41#include "xfs_trace.h"
42#include "xfs_bmap.h"
43#include <linux/gfp.h>
41#include <linux/mpage.h> 44#include <linux/mpage.h>
42#include <linux/pagevec.h> 45#include <linux/pagevec.h>
43#include <linux/writeback.h> 46#include <linux/writeback.h>
@@ -76,7 +79,7 @@ xfs_ioend_wake(
76 wake_up(to_ioend_wq(ip)); 79 wake_up(to_ioend_wq(ip));
77} 80}
78 81
79STATIC void 82void
80xfs_count_page_state( 83xfs_count_page_state(
81 struct page *page, 84 struct page *page,
82 int *delalloc, 85 int *delalloc,
@@ -98,48 +101,6 @@ xfs_count_page_state(
98 } while ((bh = bh->b_this_page) != head); 101 } while ((bh = bh->b_this_page) != head);
99} 102}
100 103
101#if defined(XFS_RW_TRACE)
102void
103xfs_page_trace(
104 int tag,
105 struct inode *inode,
106 struct page *page,
107 unsigned long pgoff)
108{
109 xfs_inode_t *ip;
110 loff_t isize = i_size_read(inode);
111 loff_t offset = page_offset(page);
112 int delalloc = -1, unmapped = -1, unwritten = -1;
113
114 if (page_has_buffers(page))
115 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
116
117 ip = XFS_I(inode);
118 if (!ip->i_rwtrace)
119 return;
120
121 ktrace_enter(ip->i_rwtrace,
122 (void *)((unsigned long)tag),
123 (void *)ip,
124 (void *)inode,
125 (void *)page,
126 (void *)pgoff,
127 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
128 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
129 (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
130 (void *)((unsigned long)(isize & 0xffffffff)),
131 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
132 (void *)((unsigned long)(offset & 0xffffffff)),
133 (void *)((unsigned long)delalloc),
134 (void *)((unsigned long)unmapped),
135 (void *)((unsigned long)unwritten),
136 (void *)((unsigned long)current_pid()),
137 (void *)NULL);
138}
139#else
140#define xfs_page_trace(tag, inode, page, pgoff)
141#endif
142
143STATIC struct block_device * 104STATIC struct block_device *
144xfs_find_bdev_for_inode( 105xfs_find_bdev_for_inode(
145 struct xfs_inode *ip) 106 struct xfs_inode *ip)
@@ -204,14 +165,17 @@ xfs_ioend_new_eof(
204} 165}
205 166
206/* 167/*
207 * Update on-disk file size now that data has been written to disk. 168 * Update on-disk file size now that data has been written to disk. The
208 * The current in-memory file size is i_size. If a write is beyond 169 * current in-memory file size is i_size. If a write is beyond eof i_new_size
209 * eof i_new_size will be the intended file size until i_size is 170 * will be the intended file size until i_size is updated. If this write does
210 * updated. If this write does not extend all the way to the valid 171 * not extend all the way to the valid file size then restrict this update to
211 * file size then restrict this update to the end of the write. 172 * the end of the write.
173 *
174 * This function does not block as blocking on the inode lock in IO completion
175 * can lead to IO completion order dependency deadlocks.. If it can't get the
176 * inode ilock it will return EAGAIN. Callers must handle this.
212 */ 177 */
213 178STATIC int
214STATIC void
215xfs_setfilesize( 179xfs_setfilesize(
216 xfs_ioend_t *ioend) 180 xfs_ioend_t *ioend)
217{ 181{
@@ -222,85 +186,19 @@ xfs_setfilesize(
222 ASSERT(ioend->io_type != IOMAP_READ); 186 ASSERT(ioend->io_type != IOMAP_READ);
223 187
224 if (unlikely(ioend->io_error)) 188 if (unlikely(ioend->io_error))
225 return; 189 return 0;
190
191 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
192 return EAGAIN;
226 193
227 xfs_ilock(ip, XFS_ILOCK_EXCL);
228 isize = xfs_ioend_new_eof(ioend); 194 isize = xfs_ioend_new_eof(ioend);
229 if (isize) { 195 if (isize) {
230 ip->i_d.di_size = isize; 196 ip->i_d.di_size = isize;
231 xfs_mark_inode_dirty_sync(ip); 197 xfs_mark_inode_dirty(ip);
232 } 198 }
233 199
234 xfs_iunlock(ip, XFS_ILOCK_EXCL); 200 xfs_iunlock(ip, XFS_ILOCK_EXCL);
235} 201 return 0;
236
237/*
238 * Buffered IO write completion for delayed allocate extents.
239 */
240STATIC void
241xfs_end_bio_delalloc(
242 struct work_struct *work)
243{
244 xfs_ioend_t *ioend =
245 container_of(work, xfs_ioend_t, io_work);
246
247 xfs_setfilesize(ioend);
248 xfs_destroy_ioend(ioend);
249}
250
251/*
252 * Buffered IO write completion for regular, written extents.
253 */
254STATIC void
255xfs_end_bio_written(
256 struct work_struct *work)
257{
258 xfs_ioend_t *ioend =
259 container_of(work, xfs_ioend_t, io_work);
260
261 xfs_setfilesize(ioend);
262 xfs_destroy_ioend(ioend);
263}
264
265/*
266 * IO write completion for unwritten extents.
267 *
268 * Issue transactions to convert a buffer range from unwritten
269 * to written extents.
270 */
271STATIC void
272xfs_end_bio_unwritten(
273 struct work_struct *work)
274{
275 xfs_ioend_t *ioend =
276 container_of(work, xfs_ioend_t, io_work);
277 struct xfs_inode *ip = XFS_I(ioend->io_inode);
278 xfs_off_t offset = ioend->io_offset;
279 size_t size = ioend->io_size;
280
281 if (likely(!ioend->io_error)) {
282 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
283 int error;
284 error = xfs_iomap_write_unwritten(ip, offset, size);
285 if (error)
286 ioend->io_error = error;
287 }
288 xfs_setfilesize(ioend);
289 }
290 xfs_destroy_ioend(ioend);
291}
292
293/*
294 * IO read completion for regular, written extents.
295 */
296STATIC void
297xfs_end_bio_read(
298 struct work_struct *work)
299{
300 xfs_ioend_t *ioend =
301 container_of(work, xfs_ioend_t, io_work);
302
303 xfs_destroy_ioend(ioend);
304} 202}
305 203
306/* 204/*
@@ -314,10 +212,10 @@ xfs_finish_ioend(
314 int wait) 212 int wait)
315{ 213{
316 if (atomic_dec_and_test(&ioend->io_remaining)) { 214 if (atomic_dec_and_test(&ioend->io_remaining)) {
317 struct workqueue_struct *wq = xfsdatad_workqueue; 215 struct workqueue_struct *wq;
318 if (ioend->io_work.func == xfs_end_bio_unwritten)
319 wq = xfsconvertd_workqueue;
320 216
217 wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
218 xfsconvertd_workqueue : xfsdatad_workqueue;
321 queue_work(wq, &ioend->io_work); 219 queue_work(wq, &ioend->io_work);
322 if (wait) 220 if (wait)
323 flush_workqueue(wq); 221 flush_workqueue(wq);
@@ -325,6 +223,53 @@ xfs_finish_ioend(
325} 223}
326 224
327/* 225/*
226 * IO write completion.
227 */
228STATIC void
229xfs_end_io(
230 struct work_struct *work)
231{
232 xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
233 struct xfs_inode *ip = XFS_I(ioend->io_inode);
234 int error = 0;
235
236 /*
237 * For unwritten extents we need to issue transactions to convert a
238 * range to normal written extens after the data I/O has finished.
239 */
240 if (ioend->io_type == IOMAP_UNWRITTEN &&
241 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
242
243 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
244 ioend->io_size);
245 if (error)
246 ioend->io_error = error;
247 }
248
249 /*
250 * We might have to update the on-disk file size after extending
251 * writes.
252 */
253 if (ioend->io_type != IOMAP_READ) {
254 error = xfs_setfilesize(ioend);
255 ASSERT(!error || error == EAGAIN);
256 }
257
258 /*
259 * If we didn't complete processing of the ioend, requeue it to the
260 * tail of the workqueue for another attempt later. Otherwise destroy
261 * it.
262 */
263 if (error == EAGAIN) {
264 atomic_inc(&ioend->io_remaining);
265 xfs_finish_ioend(ioend, 0);
266 /* ensure we don't spin on blocked ioends */
267 delay(1);
268 } else
269 xfs_destroy_ioend(ioend);
270}
271
272/*
328 * Allocate and initialise an IO completion structure. 273 * Allocate and initialise an IO completion structure.
329 * We need to track unwritten extent write completion here initially. 274 * We need to track unwritten extent write completion here initially.
330 * We'll need to extend this for updating the ondisk inode size later 275 * We'll need to extend this for updating the ondisk inode size later
@@ -355,15 +300,7 @@ xfs_alloc_ioend(
355 ioend->io_offset = 0; 300 ioend->io_offset = 0;
356 ioend->io_size = 0; 301 ioend->io_size = 0;
357 302
358 if (type == IOMAP_UNWRITTEN) 303 INIT_WORK(&ioend->io_work, xfs_end_io);
359 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten);
360 else if (type == IOMAP_DELAY)
361 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc);
362 else if (type == IOMAP_READ)
363 INIT_WORK(&ioend->io_work, xfs_end_bio_read);
364 else
365 INIT_WORK(&ioend->io_work, xfs_end_bio_written);
366
367 return ioend; 304 return ioend;
368} 305}
369 306
@@ -380,7 +317,7 @@ xfs_map_blocks(
380 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); 317 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
381} 318}
382 319
383STATIC_INLINE int 320STATIC int
384xfs_iomap_valid( 321xfs_iomap_valid(
385 xfs_iomap_t *iomapp, 322 xfs_iomap_t *iomapp,
386 loff_t offset) 323 loff_t offset)
@@ -412,8 +349,9 @@ xfs_end_bio(
412 349
413STATIC void 350STATIC void
414xfs_submit_ioend_bio( 351xfs_submit_ioend_bio(
415 xfs_ioend_t *ioend, 352 struct writeback_control *wbc,
416 struct bio *bio) 353 xfs_ioend_t *ioend,
354 struct bio *bio)
417{ 355{
418 atomic_inc(&ioend->io_remaining); 356 atomic_inc(&ioend->io_remaining);
419 bio->bi_private = ioend; 357 bio->bi_private = ioend;
@@ -424,9 +362,10 @@ xfs_submit_ioend_bio(
424 * but don't update the inode size until I/O completion. 362 * but don't update the inode size until I/O completion.
425 */ 363 */
426 if (xfs_ioend_new_eof(ioend)) 364 if (xfs_ioend_new_eof(ioend))
427 xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); 365 xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
428 366
429 submit_bio(WRITE, bio); 367 submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
368 WRITE_SYNC_PLUG : WRITE, bio);
430 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); 369 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
431 bio_put(bio); 370 bio_put(bio);
432} 371}
@@ -505,6 +444,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
505 */ 444 */
506STATIC void 445STATIC void
507xfs_submit_ioend( 446xfs_submit_ioend(
447 struct writeback_control *wbc,
508 xfs_ioend_t *ioend) 448 xfs_ioend_t *ioend)
509{ 449{
510 xfs_ioend_t *head = ioend; 450 xfs_ioend_t *head = ioend;
@@ -533,19 +473,19 @@ xfs_submit_ioend(
533 retry: 473 retry:
534 bio = xfs_alloc_ioend_bio(bh); 474 bio = xfs_alloc_ioend_bio(bh);
535 } else if (bh->b_blocknr != lastblock + 1) { 475 } else if (bh->b_blocknr != lastblock + 1) {
536 xfs_submit_ioend_bio(ioend, bio); 476 xfs_submit_ioend_bio(wbc, ioend, bio);
537 goto retry; 477 goto retry;
538 } 478 }
539 479
540 if (bio_add_buffer(bio, bh) != bh->b_size) { 480 if (bio_add_buffer(bio, bh) != bh->b_size) {
541 xfs_submit_ioend_bio(ioend, bio); 481 xfs_submit_ioend_bio(wbc, ioend, bio);
542 goto retry; 482 goto retry;
543 } 483 }
544 484
545 lastblock = bh->b_blocknr; 485 lastblock = bh->b_blocknr;
546 } 486 }
547 if (bio) 487 if (bio)
548 xfs_submit_ioend_bio(ioend, bio); 488 xfs_submit_ioend_bio(wbc, ioend, bio);
549 xfs_finish_ioend(ioend, 0); 489 xfs_finish_ioend(ioend, 0);
550 } while ((ioend = next) != NULL); 490 } while ((ioend = next) != NULL);
551} 491}
@@ -904,16 +844,9 @@ xfs_convert_page(
904 844
905 if (startio) { 845 if (startio) {
906 if (count) { 846 if (count) {
907 struct backing_dev_info *bdi;
908
909 bdi = inode->i_mapping->backing_dev_info;
910 wbc->nr_to_write--; 847 wbc->nr_to_write--;
911 if (bdi_write_congested(bdi)) { 848 if (wbc->nr_to_write <= 0)
912 wbc->encountered_congestion = 1;
913 done = 1;
914 } else if (wbc->nr_to_write <= 0) {
915 done = 1; 849 done = 1;
916 }
917 } 850 }
918 xfs_start_page_writeback(page, !page_dirty, count); 851 xfs_start_page_writeback(page, !page_dirty, count);
919 } 852 }
@@ -962,6 +895,125 @@ xfs_cluster_write(
962 } 895 }
963} 896}
964 897
898STATIC void
899xfs_vm_invalidatepage(
900 struct page *page,
901 unsigned long offset)
902{
903 trace_xfs_invalidatepage(page->mapping->host, page, offset);
904 block_invalidatepage(page, offset);
905}
906
907/*
908 * If the page has delalloc buffers on it, we need to punch them out before we
909 * invalidate the page. If we don't, we leave a stale delalloc mapping on the
910 * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
911 * is done on that same region - the delalloc extent is returned when none is
912 * supposed to be there.
913 *
914 * We prevent this by truncating away the delalloc regions on the page before
915 * invalidating it. Because they are delalloc, we can do this without needing a
916 * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
917 * truncation without a transaction as there is no space left for block
918 * reservation (typically why we see a ENOSPC in writeback).
919 *
920 * This is not a performance critical path, so for now just do the punching a
921 * buffer head at a time.
922 */
923STATIC void
924xfs_aops_discard_page(
925 struct page *page)
926{
927 struct inode *inode = page->mapping->host;
928 struct xfs_inode *ip = XFS_I(inode);
929 struct buffer_head *bh, *head;
930 loff_t offset = page_offset(page);
931 ssize_t len = 1 << inode->i_blkbits;
932
933 if (!xfs_is_delayed_page(page, IOMAP_DELAY))
934 goto out_invalidate;
935
936 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
937 goto out_invalidate;
938
939 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
940 "page discard on page %p, inode 0x%llx, offset %llu.",
941 page, ip->i_ino, offset);
942
943 xfs_ilock(ip, XFS_ILOCK_EXCL);
944 bh = head = page_buffers(page);
945 do {
946 int done;
947 xfs_fileoff_t offset_fsb;
948 xfs_bmbt_irec_t imap;
949 int nimaps = 1;
950 int error;
951 xfs_fsblock_t firstblock;
952 xfs_bmap_free_t flist;
953
954 if (!buffer_delay(bh))
955 goto next_buffer;
956
957 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
958
959 /*
960 * Map the range first and check that it is a delalloc extent
961 * before trying to unmap the range. Otherwise we will be
962 * trying to remove a real extent (which requires a
963 * transaction) or a hole, which is probably a bad idea...
964 */
965 error = xfs_bmapi(NULL, ip, offset_fsb, 1,
966 XFS_BMAPI_ENTIRE, NULL, 0, &imap,
967 &nimaps, NULL, NULL);
968
969 if (error) {
970 /* something screwed, just bail */
971 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
972 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
973 "page discard failed delalloc mapping lookup.");
974 }
975 break;
976 }
977 if (!nimaps) {
978 /* nothing there */
979 goto next_buffer;
980 }
981 if (imap.br_startblock != DELAYSTARTBLOCK) {
982 /* been converted, ignore */
983 goto next_buffer;
984 }
985 WARN_ON(imap.br_blockcount == 0);
986
987 /*
988 * Note: while we initialise the firstblock/flist pair, they
989 * should never be used because blocks should never be
990 * allocated or freed for a delalloc extent and hence we need
991 * don't cancel or finish them after the xfs_bunmapi() call.
992 */
993 xfs_bmap_init(&flist, &firstblock);
994 error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
995 &flist, NULL, &done);
996
997 ASSERT(!flist.xbf_count && !flist.xbf_first);
998 if (error) {
999 /* something screwed, just bail */
1000 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1001 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
1002 "page discard unable to remove delalloc mapping.");
1003 }
1004 break;
1005 }
1006next_buffer:
1007 offset += len;
1008
1009 } while ((bh = bh->b_this_page) != head);
1010
1011 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1012out_invalidate:
1013 xfs_vm_invalidatepage(page, 0);
1014 return;
1015}
1016
965/* 1017/*
966 * Calling this without startio set means we are being asked to make a dirty 1018 * Calling this without startio set means we are being asked to make a dirty
967 * page ready for freeing it's buffers. When called with startio set then 1019 * page ready for freeing it's buffers. When called with startio set then
@@ -1198,7 +1250,7 @@ xfs_page_state_convert(
1198 } 1250 }
1199 1251
1200 if (iohead) 1252 if (iohead)
1201 xfs_submit_ioend(iohead); 1253 xfs_submit_ioend(wbc, iohead);
1202 1254
1203 return page_dirty; 1255 return page_dirty;
1204 1256
@@ -1213,7 +1265,7 @@ error:
1213 */ 1265 */
1214 if (err != -EAGAIN) { 1266 if (err != -EAGAIN) {
1215 if (!unmapped) 1267 if (!unmapped)
1216 block_invalidatepage(page, 0); 1268 xfs_aops_discard_page(page);
1217 ClearPageUptodate(page); 1269 ClearPageUptodate(page);
1218 } 1270 }
1219 return err; 1271 return err;
@@ -1249,7 +1301,7 @@ xfs_vm_writepage(
1249 int delalloc, unmapped, unwritten; 1301 int delalloc, unmapped, unwritten;
1250 struct inode *inode = page->mapping->host; 1302 struct inode *inode = page->mapping->host;
1251 1303
1252 xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); 1304 trace_xfs_writepage(inode, page, 0);
1253 1305
1254 /* 1306 /*
1255 * We need a transaction if: 1307 * We need a transaction if:
@@ -1354,7 +1406,7 @@ xfs_vm_releasepage(
1354 .nr_to_write = 1, 1406 .nr_to_write = 1,
1355 }; 1407 };
1356 1408
1357 xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0); 1409 trace_xfs_releasepage(inode, page, 0);
1358 1410
1359 if (!page_has_buffers(page)) 1411 if (!page_has_buffers(page))
1360 return 0; 1412 return 0;
@@ -1535,7 +1587,7 @@ xfs_end_io_direct(
1535 * didn't map an unwritten extent so switch it's completion 1587 * didn't map an unwritten extent so switch it's completion
1536 * handler. 1588 * handler.
1537 */ 1589 */
1538 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 1590 ioend->io_type = IOMAP_NEW;
1539 xfs_finish_ioend(ioend, 0); 1591 xfs_finish_ioend(ioend, 0);
1540 } 1592 }
1541 1593
@@ -1562,19 +1614,13 @@ xfs_vm_direct_IO(
1562 1614
1563 bdev = xfs_find_bdev_for_inode(XFS_I(inode)); 1615 bdev = xfs_find_bdev_for_inode(XFS_I(inode));
1564 1616
1565 if (rw == WRITE) { 1617 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
1566 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); 1618 IOMAP_UNWRITTEN : IOMAP_READ);
1567 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1619
1568 bdev, iov, offset, nr_segs, 1620 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
1569 xfs_get_blocks_direct, 1621 offset, nr_segs,
1570 xfs_end_io_direct); 1622 xfs_get_blocks_direct,
1571 } else { 1623 xfs_end_io_direct);
1572 iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
1573 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
1574 bdev, iov, offset, nr_segs,
1575 xfs_get_blocks_direct,
1576 xfs_end_io_direct);
1577 }
1578 1624
1579 if (unlikely(ret != -EIOCBQUEUED && iocb->private)) 1625 if (unlikely(ret != -EIOCBQUEUED && iocb->private))
1580 xfs_destroy_ioend(iocb->private); 1626 xfs_destroy_ioend(iocb->private);
@@ -1629,16 +1675,6 @@ xfs_vm_readpages(
1629 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1675 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1630} 1676}
1631 1677
1632STATIC void
1633xfs_vm_invalidatepage(
1634 struct page *page,
1635 unsigned long offset)
1636{
1637 xfs_page_trace(XFS_INVALIDPAGE_ENTER,
1638 page->mapping->host, page, offset);
1639 block_invalidatepage(page, offset);
1640}
1641
1642const struct address_space_operations xfs_address_space_operations = { 1678const struct address_space_operations xfs_address_space_operations = {
1643 .readpage = xfs_vm_readpage, 1679 .readpage = xfs_vm_readpage,
1644 .readpages = xfs_vm_readpages, 1680 .readpages = xfs_vm_readpages,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 221b3e66ceef..4cfc6ea87df8 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -45,4 +45,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
45extern void xfs_ioend_init(void); 45extern void xfs_ioend_init(void);
46extern void xfs_ioend_wait(struct xfs_inode *); 46extern void xfs_ioend_wait(struct xfs_inode *);
47 47
48extern void xfs_count_page_state(struct page *, int *, int *, int *);
49
48#endif /* __XFS_AOPS_H__ */ 50#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 965df1227d64..44c2b0ef9a41 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -18,7 +18,7 @@
18#include "xfs.h" 18#include "xfs.h"
19#include <linux/stddef.h> 19#include <linux/stddef.h>
20#include <linux/errno.h> 20#include <linux/errno.h>
21#include <linux/slab.h> 21#include <linux/gfp.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/vmalloc.h> 24#include <linux/vmalloc.h>
@@ -33,12 +33,14 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
36 37
37#include "xfs_sb.h" 38#include "xfs_sb.h"
38#include "xfs_inum.h" 39#include "xfs_inum.h"
39#include "xfs_ag.h" 40#include "xfs_ag.h"
40#include "xfs_dmapi.h" 41#include "xfs_dmapi.h"
41#include "xfs_mount.h" 42#include "xfs_mount.h"
43#include "xfs_trace.h"
42 44
43static kmem_zone_t *xfs_buf_zone; 45static kmem_zone_t *xfs_buf_zone;
44STATIC int xfsbufd(void *); 46STATIC int xfsbufd(void *);
@@ -53,34 +55,6 @@ static struct workqueue_struct *xfslogd_workqueue;
53struct workqueue_struct *xfsdatad_workqueue; 55struct workqueue_struct *xfsdatad_workqueue;
54struct workqueue_struct *xfsconvertd_workqueue; 56struct workqueue_struct *xfsconvertd_workqueue;
55 57
56#ifdef XFS_BUF_TRACE
57void
58xfs_buf_trace(
59 xfs_buf_t *bp,
60 char *id,
61 void *data,
62 void *ra)
63{
64 ktrace_enter(xfs_buf_trace_buf,
65 bp, id,
66 (void *)(unsigned long)bp->b_flags,
67 (void *)(unsigned long)bp->b_hold.counter,
68 (void *)(unsigned long)bp->b_sema.count,
69 (void *)current,
70 data, ra,
71 (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
72 (void *)(unsigned long)(bp->b_file_offset & 0xffffffff),
73 (void *)(unsigned long)bp->b_buffer_length,
74 NULL, NULL, NULL, NULL, NULL);
75}
76ktrace_t *xfs_buf_trace_buf;
77#define XFS_BUF_TRACE_SIZE 4096
78#define XB_TRACE(bp, id, data) \
79 xfs_buf_trace(bp, id, (void *)data, (void *)__builtin_return_address(0))
80#else
81#define XB_TRACE(bp, id, data) do { } while (0)
82#endif
83
84#ifdef XFS_BUF_LOCK_TRACKING 58#ifdef XFS_BUF_LOCK_TRACKING
85# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) 59# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
86# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) 60# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
@@ -103,6 +77,27 @@ ktrace_t *xfs_buf_trace_buf;
103#define xfs_buf_deallocate(bp) \ 77#define xfs_buf_deallocate(bp) \
104 kmem_zone_free(xfs_buf_zone, (bp)); 78 kmem_zone_free(xfs_buf_zone, (bp));
105 79
80static inline int
81xfs_buf_is_vmapped(
82 struct xfs_buf *bp)
83{
84 /*
85 * Return true if the buffer is vmapped.
86 *
87 * The XBF_MAPPED flag is set if the buffer should be mapped, but the
88 * code is clever enough to know it doesn't have to map a single page,
89 * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
90 */
91 return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
92}
93
94static inline int
95xfs_buf_vmap_len(
96 struct xfs_buf *bp)
97{
98 return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
99}
100
106/* 101/*
107 * Page Region interfaces. 102 * Page Region interfaces.
108 * 103 *
@@ -149,7 +144,7 @@ page_region_mask(
149 return mask; 144 return mask;
150} 145}
151 146
152STATIC_INLINE void 147STATIC void
153set_page_region( 148set_page_region(
154 struct page *page, 149 struct page *page,
155 size_t offset, 150 size_t offset,
@@ -161,7 +156,7 @@ set_page_region(
161 SetPageUptodate(page); 156 SetPageUptodate(page);
162} 157}
163 158
164STATIC_INLINE int 159STATIC int
165test_page_region( 160test_page_region(
166 struct page *page, 161 struct page *page,
167 size_t offset, 162 size_t offset,
@@ -173,75 +168,6 @@ test_page_region(
173} 168}
174 169
175/* 170/*
176 * Mapping of multi-page buffers into contiguous virtual space
177 */
178
179typedef struct a_list {
180 void *vm_addr;
181 struct a_list *next;
182} a_list_t;
183
184static a_list_t *as_free_head;
185static int as_list_len;
186static DEFINE_SPINLOCK(as_lock);
187
188/*
189 * Try to batch vunmaps because they are costly.
190 */
191STATIC void
192free_address(
193 void *addr)
194{
195 a_list_t *aentry;
196
197#ifdef CONFIG_XEN
198 /*
199 * Xen needs to be able to make sure it can get an exclusive
200 * RO mapping of pages it wants to turn into a pagetable. If
201 * a newly allocated page is also still being vmap()ed by xfs,
202 * it will cause pagetable construction to fail. This is a
203 * quick workaround to always eagerly unmap pages so that Xen
204 * is happy.
205 */
206 vunmap(addr);
207 return;
208#endif
209
210 aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
211 if (likely(aentry)) {
212 spin_lock(&as_lock);
213 aentry->next = as_free_head;
214 aentry->vm_addr = addr;
215 as_free_head = aentry;
216 as_list_len++;
217 spin_unlock(&as_lock);
218 } else {
219 vunmap(addr);
220 }
221}
222
223STATIC void
224purge_addresses(void)
225{
226 a_list_t *aentry, *old;
227
228 if (as_free_head == NULL)
229 return;
230
231 spin_lock(&as_lock);
232 aentry = as_free_head;
233 as_free_head = NULL;
234 as_list_len = 0;
235 spin_unlock(&as_lock);
236
237 while ((old = aentry) != NULL) {
238 vunmap(aentry->vm_addr);
239 aentry = aentry->next;
240 kfree(old);
241 }
242}
243
244/*
245 * Internal xfs_buf_t object manipulation 171 * Internal xfs_buf_t object manipulation
246 */ 172 */
247 173
@@ -279,7 +205,8 @@ _xfs_buf_initialize(
279 init_waitqueue_head(&bp->b_waiters); 205 init_waitqueue_head(&bp->b_waiters);
280 206
281 XFS_STATS_INC(xb_create); 207 XFS_STATS_INC(xb_create);
282 XB_TRACE(bp, "initialize", target); 208
209 trace_xfs_buf_init(bp, _RET_IP_);
283} 210}
284 211
285/* 212/*
@@ -318,6 +245,7 @@ _xfs_buf_free_pages(
318{ 245{
319 if (bp->b_pages != bp->b_page_array) { 246 if (bp->b_pages != bp->b_page_array) {
320 kmem_free(bp->b_pages); 247 kmem_free(bp->b_pages);
248 bp->b_pages = NULL;
321 } 249 }
322} 250}
323 251
@@ -332,15 +260,16 @@ void
332xfs_buf_free( 260xfs_buf_free(
333 xfs_buf_t *bp) 261 xfs_buf_t *bp)
334{ 262{
335 XB_TRACE(bp, "free", 0); 263 trace_xfs_buf_free(bp, _RET_IP_);
336 264
337 ASSERT(list_empty(&bp->b_hash_list)); 265 ASSERT(list_empty(&bp->b_hash_list));
338 266
339 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { 267 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
340 uint i; 268 uint i;
341 269
342 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 270 if (xfs_buf_is_vmapped(bp))
343 free_address(bp->b_addr - bp->b_offset); 271 vm_unmap_ram(bp->b_addr - bp->b_offset,
272 bp->b_page_count);
344 273
345 for (i = 0; i < bp->b_page_count; i++) { 274 for (i = 0; i < bp->b_page_count; i++) {
346 struct page *page = bp->b_pages[i]; 275 struct page *page = bp->b_pages[i];
@@ -349,9 +278,8 @@ xfs_buf_free(
349 ASSERT(!PagePrivate(page)); 278 ASSERT(!PagePrivate(page));
350 page_cache_release(page); 279 page_cache_release(page);
351 } 280 }
352 _xfs_buf_free_pages(bp);
353 } 281 }
354 282 _xfs_buf_free_pages(bp);
355 xfs_buf_deallocate(bp); 283 xfs_buf_deallocate(bp);
356} 284}
357 285
@@ -445,7 +373,6 @@ _xfs_buf_lookup_pages(
445 if (page_count == bp->b_page_count) 373 if (page_count == bp->b_page_count)
446 bp->b_flags |= XBF_DONE; 374 bp->b_flags |= XBF_DONE;
447 375
448 XB_TRACE(bp, "lookup_pages", (long)page_count);
449 return error; 376 return error;
450} 377}
451 378
@@ -462,10 +389,8 @@ _xfs_buf_map_pages(
462 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; 389 bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
463 bp->b_flags |= XBF_MAPPED; 390 bp->b_flags |= XBF_MAPPED;
464 } else if (flags & XBF_MAPPED) { 391 } else if (flags & XBF_MAPPED) {
465 if (as_list_len > 64) 392 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
466 purge_addresses(); 393 -1, PAGE_KERNEL);
467 bp->b_addr = vmap(bp->b_pages, bp->b_page_count,
468 VM_MAP, PAGE_KERNEL);
469 if (unlikely(bp->b_addr == NULL)) 394 if (unlikely(bp->b_addr == NULL))
470 return -ENOMEM; 395 return -ENOMEM;
471 bp->b_addr += bp->b_offset; 396 bp->b_addr += bp->b_offset;
@@ -548,7 +473,6 @@ found:
548 if (down_trylock(&bp->b_sema)) { 473 if (down_trylock(&bp->b_sema)) {
549 if (!(flags & XBF_TRYLOCK)) { 474 if (!(flags & XBF_TRYLOCK)) {
550 /* wait for buffer ownership */ 475 /* wait for buffer ownership */
551 XB_TRACE(bp, "get_lock", 0);
552 xfs_buf_lock(bp); 476 xfs_buf_lock(bp);
553 XFS_STATS_INC(xb_get_locked_waited); 477 XFS_STATS_INC(xb_get_locked_waited);
554 } else { 478 } else {
@@ -571,7 +495,8 @@ found:
571 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 495 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
572 bp->b_flags &= XBF_MAPPED; 496 bp->b_flags &= XBF_MAPPED;
573 } 497 }
574 XB_TRACE(bp, "got_lock", 0); 498
499 trace_xfs_buf_find(bp, flags, _RET_IP_);
575 XFS_STATS_INC(xb_get_locked); 500 XFS_STATS_INC(xb_get_locked);
576 return bp; 501 return bp;
577} 502}
@@ -582,7 +507,7 @@ found:
582 * although backing storage may not be. 507 * although backing storage may not be.
583 */ 508 */
584xfs_buf_t * 509xfs_buf_t *
585xfs_buf_get_flags( 510xfs_buf_get(
586 xfs_buftarg_t *target,/* target for buffer */ 511 xfs_buftarg_t *target,/* target for buffer */
587 xfs_off_t ioff, /* starting offset of range */ 512 xfs_off_t ioff, /* starting offset of range */
588 size_t isize, /* length of range */ 513 size_t isize, /* length of range */
@@ -627,7 +552,7 @@ xfs_buf_get_flags(
627 bp->b_bn = ioff; 552 bp->b_bn = ioff;
628 bp->b_count_desired = bp->b_buffer_length; 553 bp->b_count_desired = bp->b_buffer_length;
629 554
630 XB_TRACE(bp, "get", (unsigned long)flags); 555 trace_xfs_buf_get(bp, flags, _RET_IP_);
631 return bp; 556 return bp;
632 557
633 no_buffer: 558 no_buffer:
@@ -644,8 +569,6 @@ _xfs_buf_read(
644{ 569{
645 int status; 570 int status;
646 571
647 XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
648
649 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); 572 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
650 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 573 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
651 574
@@ -661,7 +584,7 @@ _xfs_buf_read(
661} 584}
662 585
663xfs_buf_t * 586xfs_buf_t *
664xfs_buf_read_flags( 587xfs_buf_read(
665 xfs_buftarg_t *target, 588 xfs_buftarg_t *target,
666 xfs_off_t ioff, 589 xfs_off_t ioff,
667 size_t isize, 590 size_t isize,
@@ -671,21 +594,20 @@ xfs_buf_read_flags(
671 594
672 flags |= XBF_READ; 595 flags |= XBF_READ;
673 596
674 bp = xfs_buf_get_flags(target, ioff, isize, flags); 597 bp = xfs_buf_get(target, ioff, isize, flags);
675 if (bp) { 598 if (bp) {
599 trace_xfs_buf_read(bp, flags, _RET_IP_);
600
676 if (!XFS_BUF_ISDONE(bp)) { 601 if (!XFS_BUF_ISDONE(bp)) {
677 XB_TRACE(bp, "read", (unsigned long)flags);
678 XFS_STATS_INC(xb_get_read); 602 XFS_STATS_INC(xb_get_read);
679 _xfs_buf_read(bp, flags); 603 _xfs_buf_read(bp, flags);
680 } else if (flags & XBF_ASYNC) { 604 } else if (flags & XBF_ASYNC) {
681 XB_TRACE(bp, "read_async", (unsigned long)flags);
682 /* 605 /*
683 * Read ahead call which is already satisfied, 606 * Read ahead call which is already satisfied,
684 * drop the buffer 607 * drop the buffer
685 */ 608 */
686 goto no_buffer; 609 goto no_buffer;
687 } else { 610 } else {
688 XB_TRACE(bp, "read_done", (unsigned long)flags);
689 /* We do not want read in the flags */ 611 /* We do not want read in the flags */
690 bp->b_flags &= ~XBF_READ; 612 bp->b_flags &= ~XBF_READ;
691 } 613 }
@@ -718,7 +640,7 @@ xfs_buf_readahead(
718 return; 640 return;
719 641
720 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); 642 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
721 xfs_buf_read_flags(target, ioff, isize, flags); 643 xfs_buf_read(target, ioff, isize, flags);
722} 644}
723 645
724xfs_buf_t * 646xfs_buf_t *
@@ -823,7 +745,7 @@ xfs_buf_get_noaddr(
823 745
824 xfs_buf_unlock(bp); 746 xfs_buf_unlock(bp);
825 747
826 XB_TRACE(bp, "no_daddr", len); 748 trace_xfs_buf_get_noaddr(bp, _RET_IP_);
827 return bp; 749 return bp;
828 750
829 fail_free_mem: 751 fail_free_mem:
@@ -845,8 +767,8 @@ void
845xfs_buf_hold( 767xfs_buf_hold(
846 xfs_buf_t *bp) 768 xfs_buf_t *bp)
847{ 769{
770 trace_xfs_buf_hold(bp, _RET_IP_);
848 atomic_inc(&bp->b_hold); 771 atomic_inc(&bp->b_hold);
849 XB_TRACE(bp, "hold", 0);
850} 772}
851 773
852/* 774/*
@@ -859,7 +781,7 @@ xfs_buf_rele(
859{ 781{
860 xfs_bufhash_t *hash = bp->b_hash; 782 xfs_bufhash_t *hash = bp->b_hash;
861 783
862 XB_TRACE(bp, "rele", bp->b_relse); 784 trace_xfs_buf_rele(bp, _RET_IP_);
863 785
864 if (unlikely(!hash)) { 786 if (unlikely(!hash)) {
865 ASSERT(!bp->b_relse); 787 ASSERT(!bp->b_relse);
@@ -909,21 +831,19 @@ xfs_buf_cond_lock(
909 int locked; 831 int locked;
910 832
911 locked = down_trylock(&bp->b_sema) == 0; 833 locked = down_trylock(&bp->b_sema) == 0;
912 if (locked) { 834 if (locked)
913 XB_SET_OWNER(bp); 835 XB_SET_OWNER(bp);
914 } 836
915 XB_TRACE(bp, "cond_lock", (long)locked); 837 trace_xfs_buf_cond_lock(bp, _RET_IP_);
916 return locked ? 0 : -EBUSY; 838 return locked ? 0 : -EBUSY;
917} 839}
918 840
919#if defined(DEBUG) || defined(XFS_BLI_TRACE)
920int 841int
921xfs_buf_lock_value( 842xfs_buf_lock_value(
922 xfs_buf_t *bp) 843 xfs_buf_t *bp)
923{ 844{
924 return bp->b_sema.count; 845 return bp->b_sema.count;
925} 846}
926#endif
927 847
928/* 848/*
929 * Locks a buffer object. 849 * Locks a buffer object.
@@ -935,12 +855,14 @@ void
935xfs_buf_lock( 855xfs_buf_lock(
936 xfs_buf_t *bp) 856 xfs_buf_t *bp)
937{ 857{
938 XB_TRACE(bp, "lock", 0); 858 trace_xfs_buf_lock(bp, _RET_IP_);
859
939 if (atomic_read(&bp->b_io_remaining)) 860 if (atomic_read(&bp->b_io_remaining))
940 blk_run_address_space(bp->b_target->bt_mapping); 861 blk_run_address_space(bp->b_target->bt_mapping);
941 down(&bp->b_sema); 862 down(&bp->b_sema);
942 XB_SET_OWNER(bp); 863 XB_SET_OWNER(bp);
943 XB_TRACE(bp, "locked", 0); 864
865 trace_xfs_buf_lock_done(bp, _RET_IP_);
944} 866}
945 867
946/* 868/*
@@ -962,7 +884,8 @@ xfs_buf_unlock(
962 884
963 XB_CLEAR_OWNER(bp); 885 XB_CLEAR_OWNER(bp);
964 up(&bp->b_sema); 886 up(&bp->b_sema);
965 XB_TRACE(bp, "unlock", 0); 887
888 trace_xfs_buf_unlock(bp, _RET_IP_);
966} 889}
967 890
968 891
@@ -974,17 +897,18 @@ void
974xfs_buf_pin( 897xfs_buf_pin(
975 xfs_buf_t *bp) 898 xfs_buf_t *bp)
976{ 899{
900 trace_xfs_buf_pin(bp, _RET_IP_);
977 atomic_inc(&bp->b_pin_count); 901 atomic_inc(&bp->b_pin_count);
978 XB_TRACE(bp, "pin", (long)bp->b_pin_count.counter);
979} 902}
980 903
981void 904void
982xfs_buf_unpin( 905xfs_buf_unpin(
983 xfs_buf_t *bp) 906 xfs_buf_t *bp)
984{ 907{
908 trace_xfs_buf_unpin(bp, _RET_IP_);
909
985 if (atomic_dec_and_test(&bp->b_pin_count)) 910 if (atomic_dec_and_test(&bp->b_pin_count))
986 wake_up_all(&bp->b_waiters); 911 wake_up_all(&bp->b_waiters);
987 XB_TRACE(bp, "unpin", (long)bp->b_pin_count.counter);
988} 912}
989 913
990int 914int
@@ -1035,7 +959,7 @@ xfs_buf_iodone_work(
1035 */ 959 */
1036 if ((bp->b_error == EOPNOTSUPP) && 960 if ((bp->b_error == EOPNOTSUPP) &&
1037 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { 961 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
1038 XB_TRACE(bp, "ordered_retry", bp->b_iodone); 962 trace_xfs_buf_ordered_retry(bp, _RET_IP_);
1039 bp->b_flags &= ~XBF_ORDERED; 963 bp->b_flags &= ~XBF_ORDERED;
1040 bp->b_flags |= _XFS_BARRIER_FAILED; 964 bp->b_flags |= _XFS_BARRIER_FAILED;
1041 xfs_buf_iorequest(bp); 965 xfs_buf_iorequest(bp);
@@ -1050,12 +974,12 @@ xfs_buf_ioend(
1050 xfs_buf_t *bp, 974 xfs_buf_t *bp,
1051 int schedule) 975 int schedule)
1052{ 976{
977 trace_xfs_buf_iodone(bp, _RET_IP_);
978
1053 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); 979 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
1054 if (bp->b_error == 0) 980 if (bp->b_error == 0)
1055 bp->b_flags |= XBF_DONE; 981 bp->b_flags |= XBF_DONE;
1056 982
1057 XB_TRACE(bp, "iodone", bp->b_iodone);
1058
1059 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { 983 if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
1060 if (schedule) { 984 if (schedule) {
1061 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); 985 INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
@@ -1075,26 +999,34 @@ xfs_buf_ioerror(
1075{ 999{
1076 ASSERT(error >= 0 && error <= 0xffff); 1000 ASSERT(error >= 0 && error <= 0xffff);
1077 bp->b_error = (unsigned short)error; 1001 bp->b_error = (unsigned short)error;
1078 XB_TRACE(bp, "ioerror", (unsigned long)error); 1002 trace_xfs_buf_ioerror(bp, error, _RET_IP_);
1079} 1003}
1080 1004
1081int 1005int
1082xfs_bawrite( 1006xfs_bwrite(
1083 void *mp, 1007 struct xfs_mount *mp,
1084 struct xfs_buf *bp) 1008 struct xfs_buf *bp)
1085{ 1009{
1086 XB_TRACE(bp, "bawrite", 0); 1010 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
1011 int error = 0;
1087 1012
1088 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 1013 bp->b_strat = xfs_bdstrat_cb;
1014 bp->b_mount = mp;
1015 bp->b_flags |= XBF_WRITE;
1016 if (!iowait)
1017 bp->b_flags |= _XBF_RUN_QUEUES;
1089 1018
1090 xfs_buf_delwri_dequeue(bp); 1019 xfs_buf_delwri_dequeue(bp);
1020 xfs_buf_iostrategy(bp);
1091 1021
1092 bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); 1022 if (iowait) {
1093 bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); 1023 error = xfs_buf_iowait(bp);
1024 if (error)
1025 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1026 xfs_buf_relse(bp);
1027 }
1094 1028
1095 bp->b_mount = mp; 1029 return error;
1096 bp->b_strat = xfs_bdstrat_cb;
1097 return xfs_bdstrat_cb(bp);
1098} 1030}
1099 1031
1100void 1032void
@@ -1102,7 +1034,7 @@ xfs_bdwrite(
1102 void *mp, 1034 void *mp,
1103 struct xfs_buf *bp) 1035 struct xfs_buf *bp)
1104{ 1036{
1105 XB_TRACE(bp, "bdwrite", 0); 1037 trace_xfs_buf_bdwrite(bp, _RET_IP_);
1106 1038
1107 bp->b_strat = xfs_bdstrat_cb; 1039 bp->b_strat = xfs_bdstrat_cb;
1108 bp->b_mount = mp; 1040 bp->b_mount = mp;
@@ -1113,7 +1045,127 @@ xfs_bdwrite(
1113 xfs_buf_delwri_queue(bp, 1); 1045 xfs_buf_delwri_queue(bp, 1);
1114} 1046}
1115 1047
1116STATIC_INLINE void 1048/*
1049 * Called when we want to stop a buffer from getting written or read.
1050 * We attach the EIO error, muck with its flags, and call biodone
1051 * so that the proper iodone callbacks get called.
1052 */
1053STATIC int
1054xfs_bioerror(
1055 xfs_buf_t *bp)
1056{
1057#ifdef XFSERRORDEBUG
1058 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
1059#endif
1060
1061 /*
1062 * No need to wait until the buffer is unpinned, we aren't flushing it.
1063 */
1064 XFS_BUF_ERROR(bp, EIO);
1065
1066 /*
1067 * We're calling biodone, so delete XBF_DONE flag.
1068 */
1069 XFS_BUF_UNREAD(bp);
1070 XFS_BUF_UNDELAYWRITE(bp);
1071 XFS_BUF_UNDONE(bp);
1072 XFS_BUF_STALE(bp);
1073
1074 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1075 xfs_biodone(bp);
1076
1077 return EIO;
1078}
1079
1080/*
1081 * Same as xfs_bioerror, except that we are releasing the buffer
1082 * here ourselves, and avoiding the biodone call.
1083 * This is meant for userdata errors; metadata bufs come with
1084 * iodone functions attached, so that we can track down errors.
1085 */
1086STATIC int
1087xfs_bioerror_relse(
1088 struct xfs_buf *bp)
1089{
1090 int64_t fl = XFS_BUF_BFLAGS(bp);
1091 /*
1092 * No need to wait until the buffer is unpinned.
1093 * We aren't flushing it.
1094 *
1095 * chunkhold expects B_DONE to be set, whether
1096 * we actually finish the I/O or not. We don't want to
1097 * change that interface.
1098 */
1099 XFS_BUF_UNREAD(bp);
1100 XFS_BUF_UNDELAYWRITE(bp);
1101 XFS_BUF_DONE(bp);
1102 XFS_BUF_STALE(bp);
1103 XFS_BUF_CLR_IODONE_FUNC(bp);
1104 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1105 if (!(fl & XBF_ASYNC)) {
1106 /*
1107 * Mark b_error and B_ERROR _both_.
1108 * Lot's of chunkcache code assumes that.
1109 * There's no reason to mark error for
1110 * ASYNC buffers.
1111 */
1112 XFS_BUF_ERROR(bp, EIO);
1113 XFS_BUF_FINISH_IOWAIT(bp);
1114 } else {
1115 xfs_buf_relse(bp);
1116 }
1117
1118 return EIO;
1119}
1120
1121
1122/*
1123 * All xfs metadata buffers except log state machine buffers
1124 * get this attached as their b_bdstrat callback function.
1125 * This is so that we can catch a buffer
1126 * after prematurely unpinning it to forcibly shutdown the filesystem.
1127 */
1128int
1129xfs_bdstrat_cb(
1130 struct xfs_buf *bp)
1131{
1132 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1133 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1134 /*
1135 * Metadata write that didn't get logged but
1136 * written delayed anyway. These aren't associated
1137 * with a transaction, and can be ignored.
1138 */
1139 if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
1140 return xfs_bioerror_relse(bp);
1141 else
1142 return xfs_bioerror(bp);
1143 }
1144
1145 xfs_buf_iorequest(bp);
1146 return 0;
1147}
1148
1149/*
1150 * Wrapper around bdstrat so that we can stop data from going to disk in case
1151 * we are shutting down the filesystem. Typically user data goes thru this
1152 * path; one of the exceptions is the superblock.
1153 */
1154void
1155xfsbdstrat(
1156 struct xfs_mount *mp,
1157 struct xfs_buf *bp)
1158{
1159 if (XFS_FORCED_SHUTDOWN(mp)) {
1160 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1161 xfs_bioerror_relse(bp);
1162 return;
1163 }
1164
1165 xfs_buf_iorequest(bp);
1166}
1167
1168STATIC void
1117_xfs_buf_ioend( 1169_xfs_buf_ioend(
1118 xfs_buf_t *bp, 1170 xfs_buf_t *bp,
1119 int schedule) 1171 int schedule)
@@ -1135,6 +1187,9 @@ xfs_buf_bio_end_io(
1135 1187
1136 xfs_buf_ioerror(bp, -error); 1188 xfs_buf_ioerror(bp, -error);
1137 1189
1190 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1191 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1192
1138 do { 1193 do {
1139 struct page *page = bvec->bv_page; 1194 struct page *page = bvec->bv_page;
1140 1195
@@ -1177,10 +1232,14 @@ _xfs_buf_ioapply(
1177 if (bp->b_flags & XBF_ORDERED) { 1232 if (bp->b_flags & XBF_ORDERED) {
1178 ASSERT(!(bp->b_flags & XBF_READ)); 1233 ASSERT(!(bp->b_flags & XBF_READ));
1179 rw = WRITE_BARRIER; 1234 rw = WRITE_BARRIER;
1180 } else if (bp->b_flags & _XBF_RUN_QUEUES) { 1235 } else if (bp->b_flags & XBF_LOG_BUFFER) {
1181 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1236 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1182 bp->b_flags &= ~_XBF_RUN_QUEUES; 1237 bp->b_flags &= ~_XBF_RUN_QUEUES;
1183 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; 1238 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
1239 } else if (bp->b_flags & _XBF_RUN_QUEUES) {
1240 ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
1241 bp->b_flags &= ~_XBF_RUN_QUEUES;
1242 rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
1184 } else { 1243 } else {
1185 rw = (bp->b_flags & XBF_WRITE) ? WRITE : 1244 rw = (bp->b_flags & XBF_WRITE) ? WRITE :
1186 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; 1245 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
@@ -1240,6 +1299,10 @@ next_chunk:
1240 1299
1241submit_io: 1300submit_io:
1242 if (likely(bio->bi_size)) { 1301 if (likely(bio->bi_size)) {
1302 if (xfs_buf_is_vmapped(bp)) {
1303 flush_kernel_vmap_range(bp->b_addr,
1304 xfs_buf_vmap_len(bp));
1305 }
1243 submit_bio(rw, bio); 1306 submit_bio(rw, bio);
1244 if (size) 1307 if (size)
1245 goto next_chunk; 1308 goto next_chunk;
@@ -1253,7 +1316,7 @@ int
1253xfs_buf_iorequest( 1316xfs_buf_iorequest(
1254 xfs_buf_t *bp) 1317 xfs_buf_t *bp)
1255{ 1318{
1256 XB_TRACE(bp, "iorequest", 0); 1319 trace_xfs_buf_iorequest(bp, _RET_IP_);
1257 1320
1258 if (bp->b_flags & XBF_DELWRI) { 1321 if (bp->b_flags & XBF_DELWRI) {
1259 xfs_buf_delwri_queue(bp, 1); 1322 xfs_buf_delwri_queue(bp, 1);
@@ -1287,11 +1350,13 @@ int
1287xfs_buf_iowait( 1350xfs_buf_iowait(
1288 xfs_buf_t *bp) 1351 xfs_buf_t *bp)
1289{ 1352{
1290 XB_TRACE(bp, "iowait", 0); 1353 trace_xfs_buf_iowait(bp, _RET_IP_);
1354
1291 if (atomic_read(&bp->b_io_remaining)) 1355 if (atomic_read(&bp->b_io_remaining))
1292 blk_run_address_space(bp->b_target->bt_mapping); 1356 blk_run_address_space(bp->b_target->bt_mapping);
1293 wait_for_completion(&bp->b_iowait); 1357 wait_for_completion(&bp->b_iowait);
1294 XB_TRACE(bp, "iowaited", (long)bp->b_error); 1358
1359 trace_xfs_buf_iowait_done(bp, _RET_IP_);
1295 return bp->b_error; 1360 return bp->b_error;
1296} 1361}
1297 1362
@@ -1318,7 +1383,7 @@ xfs_buf_iomove(
1318 xfs_buf_t *bp, /* buffer to process */ 1383 xfs_buf_t *bp, /* buffer to process */
1319 size_t boff, /* starting buffer offset */ 1384 size_t boff, /* starting buffer offset */
1320 size_t bsize, /* length to copy */ 1385 size_t bsize, /* length to copy */
1321 caddr_t data, /* data address */ 1386 void *data, /* data address */
1322 xfs_buf_rw_t mode) /* read/write/zero flag */ 1387 xfs_buf_rw_t mode) /* read/write/zero flag */
1323{ 1388{
1324 size_t bend, cpoff, csize; 1389 size_t bend, cpoff, csize;
@@ -1400,8 +1465,8 @@ xfs_alloc_bufhash(
1400 1465
1401 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ 1466 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
1402 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; 1467 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
1403 btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * 1468 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
1404 sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); 1469 sizeof(xfs_bufhash_t));
1405 for (i = 0; i < (1 << btp->bt_hashshift); i++) { 1470 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1406 spin_lock_init(&btp->bt_hash[i].bh_lock); 1471 spin_lock_init(&btp->bt_hash[i].bh_lock);
1407 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); 1472 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
@@ -1412,7 +1477,7 @@ STATIC void
1412xfs_free_bufhash( 1477xfs_free_bufhash(
1413 xfs_buftarg_t *btp) 1478 xfs_buftarg_t *btp)
1414{ 1479{
1415 kmem_free(btp->bt_hash); 1480 kmem_free_large(btp->bt_hash);
1416 btp->bt_hash = NULL; 1481 btp->bt_hash = NULL;
1417} 1482}
1418 1483
@@ -1604,7 +1669,8 @@ xfs_buf_delwri_queue(
1604 struct list_head *dwq = &bp->b_target->bt_delwrite_queue; 1669 struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
1605 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; 1670 spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
1606 1671
1607 XB_TRACE(bp, "delwri_q", (long)unlock); 1672 trace_xfs_buf_delwri_queue(bp, _RET_IP_);
1673
1608 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); 1674 ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
1609 1675
1610 spin_lock(dwlk); 1676 spin_lock(dwlk);
@@ -1616,6 +1682,11 @@ xfs_buf_delwri_queue(
1616 list_del(&bp->b_list); 1682 list_del(&bp->b_list);
1617 } 1683 }
1618 1684
1685 if (list_empty(dwq)) {
1686 /* start xfsbufd as it is about to have something to do */
1687 wake_up_process(bp->b_target->bt_task);
1688 }
1689
1619 bp->b_flags |= _XBF_DELWRI_Q; 1690 bp->b_flags |= _XBF_DELWRI_Q;
1620 list_add_tail(&bp->b_list, dwq); 1691 list_add_tail(&bp->b_list, dwq);
1621 bp->b_queuetime = jiffies; 1692 bp->b_queuetime = jiffies;
@@ -1644,7 +1715,36 @@ xfs_buf_delwri_dequeue(
1644 if (dequeued) 1715 if (dequeued)
1645 xfs_buf_rele(bp); 1716 xfs_buf_rele(bp);
1646 1717
1647 XB_TRACE(bp, "delwri_dq", (long)dequeued); 1718 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
1719}
1720
1721/*
1722 * If a delwri buffer needs to be pushed before it has aged out, then promote
1723 * it to the head of the delwri queue so that it will be flushed on the next
1724 * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
1725 * than the age currently needed to flush the buffer. Hence the next time the
1726 * xfsbufd sees it is guaranteed to be considered old enough to flush.
1727 */
1728void
1729xfs_buf_delwri_promote(
1730 struct xfs_buf *bp)
1731{
1732 struct xfs_buftarg *btp = bp->b_target;
1733 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
1734
1735 ASSERT(bp->b_flags & XBF_DELWRI);
1736 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1737
1738 /*
1739 * Check the buffer age before locking the delayed write queue as we
1740 * don't need to promote buffers that are already past the flush age.
1741 */
1742 if (bp->b_queuetime < jiffies - age)
1743 return;
1744 bp->b_queuetime = jiffies - age;
1745 spin_lock(&btp->bt_delwrite_lock);
1746 list_move(&bp->b_list, &btp->bt_delwrite_queue);
1747 spin_unlock(&btp->bt_delwrite_lock);
1648} 1748}
1649 1749
1650STATIC void 1750STATIC void
@@ -1665,6 +1765,8 @@ xfsbufd_wakeup(
1665 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { 1765 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1666 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) 1766 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1667 continue; 1767 continue;
1768 if (list_empty(&btp->bt_delwrite_queue))
1769 continue;
1668 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); 1770 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1669 wake_up_process(btp->bt_task); 1771 wake_up_process(btp->bt_task);
1670 } 1772 }
@@ -1692,7 +1794,7 @@ xfs_buf_delwri_split(
1692 INIT_LIST_HEAD(list); 1794 INIT_LIST_HEAD(list);
1693 spin_lock(dwlk); 1795 spin_lock(dwlk);
1694 list_for_each_entry_safe(bp, n, dwq, b_list) { 1796 list_for_each_entry_safe(bp, n, dwq, b_list) {
1695 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); 1797 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1696 ASSERT(bp->b_flags & XBF_DELWRI); 1798 ASSERT(bp->b_flags & XBF_DELWRI);
1697 1799
1698 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { 1800 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1715,20 +1817,53 @@ xfs_buf_delwri_split(
1715 1817
1716} 1818}
1717 1819
1820/*
1821 * Compare function is more complex than it needs to be because
1822 * the return value is only 32 bits and we are doing comparisons
1823 * on 64 bit values
1824 */
1825static int
1826xfs_buf_cmp(
1827 void *priv,
1828 struct list_head *a,
1829 struct list_head *b)
1830{
1831 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1832 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1833 xfs_daddr_t diff;
1834
1835 diff = ap->b_bn - bp->b_bn;
1836 if (diff < 0)
1837 return -1;
1838 if (diff > 0)
1839 return 1;
1840 return 0;
1841}
1842
1843void
1844xfs_buf_delwri_sort(
1845 xfs_buftarg_t *target,
1846 struct list_head *list)
1847{
1848 list_sort(NULL, list, xfs_buf_cmp);
1849}
1850
1718STATIC int 1851STATIC int
1719xfsbufd( 1852xfsbufd(
1720 void *data) 1853 void *data)
1721{ 1854{
1722 struct list_head tmp; 1855 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1723 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1724 int count;
1725 xfs_buf_t *bp;
1726 1856
1727 current->flags |= PF_MEMALLOC; 1857 current->flags |= PF_MEMALLOC;
1728 1858
1729 set_freezable(); 1859 set_freezable();
1730 1860
1731 do { 1861 do {
1862 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1863 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1864 int count = 0;
1865 struct list_head tmp;
1866
1732 if (unlikely(freezing(current))) { 1867 if (unlikely(freezing(current))) {
1733 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1868 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1734 refrigerator(); 1869 refrigerator();
@@ -1736,24 +1871,20 @@ xfsbufd(
1736 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1871 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1737 } 1872 }
1738 1873
1739 schedule_timeout_interruptible( 1874 /* sleep for a long time if there is nothing to do. */
1740 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1875 if (list_empty(&target->bt_delwrite_queue))
1876 tout = MAX_SCHEDULE_TIMEOUT;
1877 schedule_timeout_interruptible(tout);
1741 1878
1742 xfs_buf_delwri_split(target, &tmp, 1879 xfs_buf_delwri_split(target, &tmp, age);
1743 xfs_buf_age_centisecs * msecs_to_jiffies(10)); 1880 list_sort(NULL, &tmp, xfs_buf_cmp);
1744
1745 count = 0;
1746 while (!list_empty(&tmp)) { 1881 while (!list_empty(&tmp)) {
1747 bp = list_entry(tmp.next, xfs_buf_t, b_list); 1882 struct xfs_buf *bp;
1748 ASSERT(target == bp->b_target); 1883 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1749
1750 list_del_init(&bp->b_list); 1884 list_del_init(&bp->b_list);
1751 xfs_buf_iostrategy(bp); 1885 xfs_buf_iostrategy(bp);
1752 count++; 1886 count++;
1753 } 1887 }
1754
1755 if (as_list_len > 0)
1756 purge_addresses();
1757 if (count) 1888 if (count)
1758 blk_run_address_space(target->bt_mapping); 1889 blk_run_address_space(target->bt_mapping);
1759 1890
@@ -1772,42 +1903,45 @@ xfs_flush_buftarg(
1772 xfs_buftarg_t *target, 1903 xfs_buftarg_t *target,
1773 int wait) 1904 int wait)
1774{ 1905{
1775 struct list_head tmp; 1906 xfs_buf_t *bp;
1776 xfs_buf_t *bp, *n;
1777 int pincount = 0; 1907 int pincount = 0;
1908 LIST_HEAD(tmp_list);
1909 LIST_HEAD(wait_list);
1778 1910
1779 xfs_buf_runall_queues(xfsconvertd_workqueue); 1911 xfs_buf_runall_queues(xfsconvertd_workqueue);
1780 xfs_buf_runall_queues(xfsdatad_workqueue); 1912 xfs_buf_runall_queues(xfsdatad_workqueue);
1781 xfs_buf_runall_queues(xfslogd_workqueue); 1913 xfs_buf_runall_queues(xfslogd_workqueue);
1782 1914
1783 set_bit(XBT_FORCE_FLUSH, &target->bt_flags); 1915 set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1784 pincount = xfs_buf_delwri_split(target, &tmp, 0); 1916 pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
1785 1917
1786 /* 1918 /*
1787 * Dropped the delayed write list lock, now walk the temporary list 1919 * Dropped the delayed write list lock, now walk the temporary list.
1920 * All I/O is issued async and then if we need to wait for completion
1921 * we do that after issuing all the IO.
1788 */ 1922 */
1789 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1923 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1924 while (!list_empty(&tmp_list)) {
1925 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1790 ASSERT(target == bp->b_target); 1926 ASSERT(target == bp->b_target);
1791 if (wait) 1927 list_del_init(&bp->b_list);
1928 if (wait) {
1792 bp->b_flags &= ~XBF_ASYNC; 1929 bp->b_flags &= ~XBF_ASYNC;
1793 else 1930 list_add(&bp->b_list, &wait_list);
1794 list_del_init(&bp->b_list); 1931 }
1795
1796 xfs_buf_iostrategy(bp); 1932 xfs_buf_iostrategy(bp);
1797 } 1933 }
1798 1934
1799 if (wait) 1935 if (wait) {
1936 /* Expedite and wait for IO to complete. */
1800 blk_run_address_space(target->bt_mapping); 1937 blk_run_address_space(target->bt_mapping);
1938 while (!list_empty(&wait_list)) {
1939 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1801 1940
1802 /* 1941 list_del_init(&bp->b_list);
1803 * Remaining list items must be flushed before returning 1942 xfs_iowait(bp);
1804 */ 1943 xfs_buf_relse(bp);
1805 while (!list_empty(&tmp)) { 1944 }
1806 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1807
1808 list_del_init(&bp->b_list);
1809 xfs_iowait(bp);
1810 xfs_buf_relse(bp);
1811 } 1945 }
1812 1946
1813 return pincount; 1947 return pincount;
@@ -1816,14 +1950,10 @@ xfs_flush_buftarg(
1816int __init 1950int __init
1817xfs_buf_init(void) 1951xfs_buf_init(void)
1818{ 1952{
1819#ifdef XFS_BUF_TRACE
1820 xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);
1821#endif
1822
1823 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", 1953 xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
1824 KM_ZONE_HWALIGN, NULL); 1954 KM_ZONE_HWALIGN, NULL);
1825 if (!xfs_buf_zone) 1955 if (!xfs_buf_zone)
1826 goto out_free_trace_buf; 1956 goto out;
1827 1957
1828 xfslogd_workqueue = create_workqueue("xfslogd"); 1958 xfslogd_workqueue = create_workqueue("xfslogd");
1829 if (!xfslogd_workqueue) 1959 if (!xfslogd_workqueue)
@@ -1846,10 +1976,7 @@ xfs_buf_init(void)
1846 destroy_workqueue(xfslogd_workqueue); 1976 destroy_workqueue(xfslogd_workqueue);
1847 out_free_buf_zone: 1977 out_free_buf_zone:
1848 kmem_zone_destroy(xfs_buf_zone); 1978 kmem_zone_destroy(xfs_buf_zone);
1849 out_free_trace_buf: 1979 out:
1850#ifdef XFS_BUF_TRACE
1851 ktrace_free(xfs_buf_trace_buf);
1852#endif
1853 return -ENOMEM; 1980 return -ENOMEM;
1854} 1981}
1855 1982
@@ -1861,9 +1988,6 @@ xfs_buf_terminate(void)
1861 destroy_workqueue(xfsdatad_workqueue); 1988 destroy_workqueue(xfsdatad_workqueue);
1862 destroy_workqueue(xfslogd_workqueue); 1989 destroy_workqueue(xfslogd_workqueue);
1863 kmem_zone_destroy(xfs_buf_zone); 1990 kmem_zone_destroy(xfs_buf_zone);
1864#ifdef XFS_BUF_TRACE
1865 ktrace_free(xfs_buf_trace_buf);
1866#endif
1867} 1991}
1868 1992
1869#ifdef CONFIG_KDB_MODULES 1993#ifdef CONFIG_KDB_MODULES
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9b4d666ad31f..386e7361e50e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ 55 XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
56 XBF_ORDERED = (1 << 11), /* use ordered writes */ 56 XBF_ORDERED = (1 << 11), /* use ordered writes */
57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ 57 XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
58 XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */
58 59
59 /* flags used only as arguments to access routines */ 60 /* flags used only as arguments to access routines */
60 XBF_LOCK = (1 << 14), /* lock requested */ 61 XBF_LOCK = (1 << 14), /* lock requested */
@@ -95,6 +96,28 @@ typedef enum {
95 _XFS_BARRIER_FAILED = (1 << 23), 96 _XFS_BARRIER_FAILED = (1 << 23),
96} xfs_buf_flags_t; 97} xfs_buf_flags_t;
97 98
99#define XFS_BUF_FLAGS \
100 { XBF_READ, "READ" }, \
101 { XBF_WRITE, "WRITE" }, \
102 { XBF_MAPPED, "MAPPED" }, \
103 { XBF_ASYNC, "ASYNC" }, \
104 { XBF_DONE, "DONE" }, \
105 { XBF_DELWRI, "DELWRI" }, \
106 { XBF_STALE, "STALE" }, \
107 { XBF_FS_MANAGED, "FS_MANAGED" }, \
108 { XBF_ORDERED, "ORDERED" }, \
109 { XBF_READ_AHEAD, "READ_AHEAD" }, \
110 { XBF_LOCK, "LOCK" }, /* should never be set */\
111 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
112 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
113 { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
114 { _XBF_PAGES, "PAGES" }, \
115 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
116 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
117 { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \
118 { _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
119
120
98typedef enum { 121typedef enum {
99 XBT_FORCE_SLEEP = 0, 122 XBT_FORCE_SLEEP = 0,
100 XBT_FORCE_FLUSH = 1, 123 XBT_FORCE_FLUSH = 1,
@@ -186,15 +209,10 @@ extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
186#define xfs_incore(buftarg,blkno,len,lockit) \ 209#define xfs_incore(buftarg,blkno,len,lockit) \
187 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) 210 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
188 211
189extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t, 212extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
190 xfs_buf_flags_t); 213 xfs_buf_flags_t);
191#define xfs_buf_get(target, blkno, len, flags) \ 214extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
192 xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
193
194extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
195 xfs_buf_flags_t); 215 xfs_buf_flags_t);
196#define xfs_buf_read(target, blkno, len, flags) \
197 xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
198 216
199extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 217extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
200extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); 218extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
@@ -214,13 +232,17 @@ extern void xfs_buf_lock(xfs_buf_t *);
214extern void xfs_buf_unlock(xfs_buf_t *); 232extern void xfs_buf_unlock(xfs_buf_t *);
215 233
216/* Buffer Read and Write Routines */ 234/* Buffer Read and Write Routines */
217extern int xfs_bawrite(void *mp, xfs_buf_t *bp); 235extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
218extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); 236extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
237
238extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
239extern int xfs_bdstrat_cb(struct xfs_buf *);
240
219extern void xfs_buf_ioend(xfs_buf_t *, int); 241extern void xfs_buf_ioend(xfs_buf_t *, int);
220extern void xfs_buf_ioerror(xfs_buf_t *, int); 242extern void xfs_buf_ioerror(xfs_buf_t *, int);
221extern int xfs_buf_iorequest(xfs_buf_t *); 243extern int xfs_buf_iorequest(xfs_buf_t *);
222extern int xfs_buf_iowait(xfs_buf_t *); 244extern int xfs_buf_iowait(xfs_buf_t *);
223extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, 245extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
224 xfs_buf_rw_t); 246 xfs_buf_rw_t);
225 247
226static inline int xfs_buf_iostrategy(xfs_buf_t *bp) 248static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
@@ -243,49 +265,29 @@ extern int xfs_buf_ispin(xfs_buf_t *);
243 265
244/* Delayed Write Buffer Routines */ 266/* Delayed Write Buffer Routines */
245extern void xfs_buf_delwri_dequeue(xfs_buf_t *); 267extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
268extern void xfs_buf_delwri_promote(xfs_buf_t *);
246 269
247/* Buffer Daemon Setup Routines */ 270/* Buffer Daemon Setup Routines */
248extern int xfs_buf_init(void); 271extern int xfs_buf_init(void);
249extern void xfs_buf_terminate(void); 272extern void xfs_buf_terminate(void);
250 273
251#ifdef XFS_BUF_TRACE
252extern ktrace_t *xfs_buf_trace_buf;
253extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
254#else
255#define xfs_buf_trace(bp,id,ptr,ra) do { } while (0)
256#endif
257
258#define xfs_buf_target_name(target) \ 274#define xfs_buf_target_name(target) \
259 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) 275 ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
260 276
261 277
262#define XFS_B_ASYNC XBF_ASYNC
263#define XFS_B_DELWRI XBF_DELWRI
264#define XFS_B_READ XBF_READ
265#define XFS_B_WRITE XBF_WRITE
266#define XFS_B_STALE XBF_STALE
267
268#define XFS_BUF_TRYLOCK XBF_TRYLOCK
269#define XFS_INCORE_TRYLOCK XBF_TRYLOCK
270#define XFS_BUF_LOCK XBF_LOCK
271#define XFS_BUF_MAPPED XBF_MAPPED
272
273#define BUF_BUSY XBF_DONT_BLOCK
274
275#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) 278#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
276#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ 279#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
277 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) 280 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
278 281
279#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) 282#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE)
280#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) 283#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
281#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) 284#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
282#define XFS_BUF_SUPER_STALE(bp) do { \ 285#define XFS_BUF_SUPER_STALE(bp) do { \
283 XFS_BUF_STALE(bp); \ 286 XFS_BUF_STALE(bp); \
284 xfs_buf_delwri_dequeue(bp); \ 287 xfs_buf_delwri_dequeue(bp); \
285 XFS_BUF_DONE(bp); \ 288 XFS_BUF_DONE(bp); \
286 } while (0) 289 } while (0)
287 290
288#define XFS_BUF_MANAGE XBF_FS_MANAGED
289#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) 291#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
290 292
291#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) 293#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
@@ -370,39 +372,15 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
370 372
371#define xfs_bpin(bp) xfs_buf_pin(bp) 373#define xfs_bpin(bp) xfs_buf_pin(bp)
372#define xfs_bunpin(bp) xfs_buf_unpin(bp) 374#define xfs_bunpin(bp) xfs_buf_unpin(bp)
373
374#define xfs_buftrace(id, bp) \
375 xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
376
377#define xfs_biodone(bp) xfs_buf_ioend(bp, 0) 375#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
378 376
379#define xfs_biomove(bp, off, len, data, rw) \ 377#define xfs_biomove(bp, off, len, data, rw) \
380 xfs_buf_iomove((bp), (off), (len), (data), \ 378 xfs_buf_iomove((bp), (off), (len), (data), \
381 ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) 379 ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
382 380
383#define xfs_biozero(bp, off, len) \ 381#define xfs_biozero(bp, off, len) \
384 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 382 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
385 383
386
387static inline int XFS_bwrite(xfs_buf_t *bp)
388{
389 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
390 int error = 0;
391
392 if (!iowait)
393 bp->b_flags |= _XBF_RUN_QUEUES;
394
395 xfs_buf_delwri_dequeue(bp);
396 xfs_buf_iostrategy(bp);
397 if (iowait) {
398 error = xfs_buf_iowait(bp);
399 xfs_buf_relse(bp);
400 }
401 return error;
402}
403
404#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
405
406#define xfs_iowait(bp) xfs_buf_iowait(bp) 384#define xfs_iowait(bp) xfs_buf_iowait(bp)
407 385
408#define xfs_baread(target, rablkno, ralen) \ 386#define xfs_baread(target, rablkno, ralen) \
@@ -417,6 +395,7 @@ extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
417extern void xfs_wait_buftarg(xfs_buftarg_t *); 395extern void xfs_wait_buftarg(xfs_buftarg_t *);
418extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 396extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
419extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 397extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
398
420#ifdef CONFIG_KDB_MODULES 399#ifdef CONFIG_KDB_MODULES
421extern struct list_head *xfs_get_buftarg_list(void); 400extern struct list_head *xfs_get_buftarg_list(void);
422#endif 401#endif
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 87b8cbd23d4b..846b75aeb2ab 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -29,6 +29,7 @@
29#include "xfs_vnodeops.h" 29#include "xfs_vnodeops.h"
30#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
31#include "xfs_inode.h" 31#include "xfs_inode.h"
32#include "xfs_inode_item.h"
32 33
33/* 34/*
34 * Note that we only accept fileids which are long enough rather than allow 35 * Note that we only accept fileids which are long enough rather than allow
@@ -215,9 +216,28 @@ xfs_fs_get_parent(
215 return d_obtain_alias(VFS_I(cip)); 216 return d_obtain_alias(VFS_I(cip));
216} 217}
217 218
219STATIC int
220xfs_fs_nfs_commit_metadata(
221 struct inode *inode)
222{
223 struct xfs_inode *ip = XFS_I(inode);
224 struct xfs_mount *mp = ip->i_mount;
225 int error = 0;
226
227 xfs_ilock(ip, XFS_ILOCK_SHARED);
228 if (xfs_ipincount(ip)) {
229 error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
230 XFS_LOG_SYNC, NULL);
231 }
232 xfs_iunlock(ip, XFS_ILOCK_SHARED);
233
234 return error;
235}
236
218const struct export_operations xfs_export_operations = { 237const struct export_operations xfs_export_operations = {
219 .encode_fh = xfs_fs_encode_fh, 238 .encode_fh = xfs_fs_encode_fh,
220 .fh_to_dentry = xfs_fs_fh_to_dentry, 239 .fh_to_dentry = xfs_fs_fh_to_dentry,
221 .fh_to_parent = xfs_fs_fh_to_parent, 240 .fh_to_parent = xfs_fs_fh_to_parent,
222 .get_parent = xfs_fs_get_parent, 241 .get_parent = xfs_fs_get_parent,
242 .commit_metadata = xfs_fs_nfs_commit_metadata,
223}; 243};
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index eff61e2732af..42dd3bcfba6b 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -16,6 +16,7 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_fs.h"
19#include "xfs_bit.h" 20#include "xfs_bit.h"
20#include "xfs_log.h" 21#include "xfs_log.h"
21#include "xfs_inum.h" 22#include "xfs_inum.h"
@@ -34,52 +35,279 @@
34#include "xfs_dir2_sf.h" 35#include "xfs_dir2_sf.h"
35#include "xfs_dinode.h" 36#include "xfs_dinode.h"
36#include "xfs_inode.h" 37#include "xfs_inode.h"
38#include "xfs_inode_item.h"
39#include "xfs_bmap.h"
37#include "xfs_error.h" 40#include "xfs_error.h"
38#include "xfs_rw.h" 41#include "xfs_rw.h"
39#include "xfs_vnodeops.h" 42#include "xfs_vnodeops.h"
40#include "xfs_da_btree.h" 43#include "xfs_da_btree.h"
41#include "xfs_ioctl.h" 44#include "xfs_ioctl.h"
45#include "xfs_trace.h"
42 46
43#include <linux/dcache.h> 47#include <linux/dcache.h>
44 48
45static const struct vm_operations_struct xfs_file_vm_ops; 49static const struct vm_operations_struct xfs_file_vm_ops;
46 50
47STATIC ssize_t 51/*
48xfs_file_aio_read( 52 * xfs_iozero
49 struct kiocb *iocb, 53 *
50 const struct iovec *iov, 54 * xfs_iozero clears the specified range of buffer supplied,
51 unsigned long nr_segs, 55 * and marks all the affected blocks as valid and modified. If
52 loff_t pos) 56 * an affected block is not allocated, it will be allocated. If
57 * an affected block is not completely overwritten, and is not
58 * valid before the operation, it will be read from disk before
59 * being partially zeroed.
60 */
61STATIC int
62xfs_iozero(
63 struct xfs_inode *ip, /* inode */
64 loff_t pos, /* offset in file */
65 size_t count) /* size of data to zero */
53{ 66{
54 struct file *file = iocb->ki_filp; 67 struct page *page;
55 int ioflags = IO_ISAIO; 68 struct address_space *mapping;
69 int status;
56 70
57 BUG_ON(iocb->ki_pos != pos); 71 mapping = VFS_I(ip)->i_mapping;
58 if (unlikely(file->f_flags & O_DIRECT)) 72 do {
59 ioflags |= IO_ISDIRECT; 73 unsigned offset, bytes;
60 if (file->f_mode & FMODE_NOCMTIME) 74 void *fsdata;
61 ioflags |= IO_INVIS; 75
62 return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov, 76 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
63 nr_segs, &iocb->ki_pos, ioflags); 77 bytes = PAGE_CACHE_SIZE - offset;
78 if (bytes > count)
79 bytes = count;
80
81 status = pagecache_write_begin(NULL, mapping, pos, bytes,
82 AOP_FLAG_UNINTERRUPTIBLE,
83 &page, &fsdata);
84 if (status)
85 break;
86
87 zero_user(page, offset, bytes);
88
89 status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
90 page, fsdata);
91 WARN_ON(status <= 0); /* can't return less than zero! */
92 pos += bytes;
93 count -= bytes;
94 status = 0;
95 } while (count);
96
97 return (-status);
98}
99
100STATIC int
101xfs_file_fsync(
102 struct file *file,
103 struct dentry *dentry,
104 int datasync)
105{
106 struct xfs_inode *ip = XFS_I(dentry->d_inode);
107 struct xfs_trans *tp;
108 int error = 0;
109 int log_flushed = 0;
110
111 xfs_itrace_entry(ip);
112
113 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
114 return -XFS_ERROR(EIO);
115
116 xfs_iflags_clear(ip, XFS_ITRUNCATED);
117
118 /*
119 * We always need to make sure that the required inode state is safe on
120 * disk. The inode might be clean but we still might need to force the
121 * log because of committed transactions that haven't hit the disk yet.
122 * Likewise, there could be unflushed non-transactional changes to the
123 * inode core that have to go to disk and this requires us to issue
124 * a synchronous transaction to capture these changes correctly.
125 *
126 * This code relies on the assumption that if the i_update_core field
127 * of the inode is clear and the inode is unpinned then it is clean
128 * and no action is required.
129 */
130 xfs_ilock(ip, XFS_ILOCK_SHARED);
131
132 /*
133 * First check if the VFS inode is marked dirty. All the dirtying
134 * of non-transactional updates no goes through mark_inode_dirty*,
135 * which allows us to distinguish beteeen pure timestamp updates
136 * and i_size updates which need to be caught for fdatasync.
137 * After that also theck for the dirty state in the XFS inode, which
138 * might gets cleared when the inode gets written out via the AIL
139 * or xfs_iflush_cluster.
140 */
141 if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) ||
142 ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
143 ip->i_update_core) {
144 /*
145 * Kick off a transaction to log the inode core to get the
146 * updates. The sync transaction will also force the log.
147 */
148 xfs_iunlock(ip, XFS_ILOCK_SHARED);
149 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
150 error = xfs_trans_reserve(tp, 0,
151 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
152 if (error) {
153 xfs_trans_cancel(tp, 0);
154 return -error;
155 }
156 xfs_ilock(ip, XFS_ILOCK_EXCL);
157
158 /*
159 * Note - it's possible that we might have pushed ourselves out
160 * of the way during trans_reserve which would flush the inode.
161 * But there's no guarantee that the inode buffer has actually
162 * gone out yet (it's delwri). Plus the buffer could be pinned
163 * anyway if it's part of an inode in another recent
164 * transaction. So we play it safe and fire off the
165 * transaction anyway.
166 */
167 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
168 xfs_trans_ihold(tp, ip);
169 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
170 xfs_trans_set_sync(tp);
171 error = _xfs_trans_commit(tp, 0, &log_flushed);
172
173 xfs_iunlock(ip, XFS_ILOCK_EXCL);
174 } else {
175 /*
176 * Timestamps/size haven't changed since last inode flush or
177 * inode transaction commit. That means either nothing got
178 * written or a transaction committed which caught the updates.
179 * If the latter happened and the transaction hasn't hit the
180 * disk yet, the inode will be still be pinned. If it is,
181 * force the log.
182 */
183 if (xfs_ipincount(ip)) {
184 error = _xfs_log_force_lsn(ip->i_mount,
185 ip->i_itemp->ili_last_lsn,
186 XFS_LOG_SYNC, &log_flushed);
187 }
188 xfs_iunlock(ip, XFS_ILOCK_SHARED);
189 }
190
191 if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
192 /*
193 * If the log write didn't issue an ordered tag we need
194 * to flush the disk cache for the data device now.
195 */
196 if (!log_flushed)
197 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
198
199 /*
200 * If this inode is on the RT dev we need to flush that
201 * cache as well.
202 */
203 if (XFS_IS_REALTIME_INODE(ip))
204 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
205 }
206
207 return -error;
64} 208}
65 209
66STATIC ssize_t 210STATIC ssize_t
67xfs_file_aio_write( 211xfs_file_aio_read(
68 struct kiocb *iocb, 212 struct kiocb *iocb,
69 const struct iovec *iov, 213 const struct iovec *iovp,
70 unsigned long nr_segs, 214 unsigned long nr_segs,
71 loff_t pos) 215 loff_t pos)
72{ 216{
73 struct file *file = iocb->ki_filp; 217 struct file *file = iocb->ki_filp;
74 int ioflags = IO_ISAIO; 218 struct inode *inode = file->f_mapping->host;
219 struct xfs_inode *ip = XFS_I(inode);
220 struct xfs_mount *mp = ip->i_mount;
221 size_t size = 0;
222 ssize_t ret = 0;
223 int ioflags = 0;
224 xfs_fsize_t n;
225 unsigned long seg;
226
227 XFS_STATS_INC(xs_read_calls);
75 228
76 BUG_ON(iocb->ki_pos != pos); 229 BUG_ON(iocb->ki_pos != pos);
230
77 if (unlikely(file->f_flags & O_DIRECT)) 231 if (unlikely(file->f_flags & O_DIRECT))
78 ioflags |= IO_ISDIRECT; 232 ioflags |= IO_ISDIRECT;
79 if (file->f_mode & FMODE_NOCMTIME) 233 if (file->f_mode & FMODE_NOCMTIME)
80 ioflags |= IO_INVIS; 234 ioflags |= IO_INVIS;
81 return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs, 235
82 &iocb->ki_pos, ioflags); 236 /* START copy & waste from filemap.c */
237 for (seg = 0; seg < nr_segs; seg++) {
238 const struct iovec *iv = &iovp[seg];
239
240 /*
241 * If any segment has a negative length, or the cumulative
242 * length ever wraps negative then return -EINVAL.
243 */
244 size += iv->iov_len;
245 if (unlikely((ssize_t)(size|iv->iov_len) < 0))
246 return XFS_ERROR(-EINVAL);
247 }
248 /* END copy & waste from filemap.c */
249
250 if (unlikely(ioflags & IO_ISDIRECT)) {
251 xfs_buftarg_t *target =
252 XFS_IS_REALTIME_INODE(ip) ?
253 mp->m_rtdev_targp : mp->m_ddev_targp;
254 if ((iocb->ki_pos & target->bt_smask) ||
255 (size & target->bt_smask)) {
256 if (iocb->ki_pos == ip->i_size)
257 return 0;
258 return -XFS_ERROR(EINVAL);
259 }
260 }
261
262 n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
263 if (n <= 0 || size == 0)
264 return 0;
265
266 if (n < size)
267 size = n;
268
269 if (XFS_FORCED_SHUTDOWN(mp))
270 return -EIO;
271
272 if (unlikely(ioflags & IO_ISDIRECT))
273 mutex_lock(&inode->i_mutex);
274 xfs_ilock(ip, XFS_IOLOCK_SHARED);
275
276 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
277 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
278 int iolock = XFS_IOLOCK_SHARED;
279
280 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
281 dmflags, &iolock);
282 if (ret) {
283 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
284 if (unlikely(ioflags & IO_ISDIRECT))
285 mutex_unlock(&inode->i_mutex);
286 return ret;
287 }
288 }
289
290 if (unlikely(ioflags & IO_ISDIRECT)) {
291 if (inode->i_mapping->nrpages) {
292 ret = -xfs_flushinval_pages(ip,
293 (iocb->ki_pos & PAGE_CACHE_MASK),
294 -1, FI_REMAPF_LOCKED);
295 }
296 mutex_unlock(&inode->i_mutex);
297 if (ret) {
298 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
299 return ret;
300 }
301 }
302
303 trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
304
305 ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
306 if (ret > 0)
307 XFS_STATS_ADD(xs_read_bytes, ret);
308
309 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
310 return ret;
83} 311}
84 312
85STATIC ssize_t 313STATIC ssize_t
@@ -87,16 +315,44 @@ xfs_file_splice_read(
87 struct file *infilp, 315 struct file *infilp,
88 loff_t *ppos, 316 loff_t *ppos,
89 struct pipe_inode_info *pipe, 317 struct pipe_inode_info *pipe,
90 size_t len, 318 size_t count,
91 unsigned int flags) 319 unsigned int flags)
92{ 320{
321 struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
322 struct xfs_mount *mp = ip->i_mount;
93 int ioflags = 0; 323 int ioflags = 0;
324 ssize_t ret;
325
326 XFS_STATS_INC(xs_read_calls);
94 327
95 if (infilp->f_mode & FMODE_NOCMTIME) 328 if (infilp->f_mode & FMODE_NOCMTIME)
96 ioflags |= IO_INVIS; 329 ioflags |= IO_INVIS;
97 330
98 return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), 331 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
99 infilp, ppos, pipe, len, flags, ioflags); 332 return -EIO;
333
334 xfs_ilock(ip, XFS_IOLOCK_SHARED);
335
336 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
337 int iolock = XFS_IOLOCK_SHARED;
338 int error;
339
340 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
341 FILP_DELAY_FLAG(infilp), &iolock);
342 if (error) {
343 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
344 return -error;
345 }
346 }
347
348 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
349
350 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
351 if (ret > 0)
352 XFS_STATS_ADD(xs_read_bytes, ret);
353
354 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
355 return ret;
100} 356}
101 357
102STATIC ssize_t 358STATIC ssize_t
@@ -104,16 +360,538 @@ xfs_file_splice_write(
104 struct pipe_inode_info *pipe, 360 struct pipe_inode_info *pipe,
105 struct file *outfilp, 361 struct file *outfilp,
106 loff_t *ppos, 362 loff_t *ppos,
107 size_t len, 363 size_t count,
108 unsigned int flags) 364 unsigned int flags)
109{ 365{
366 struct inode *inode = outfilp->f_mapping->host;
367 struct xfs_inode *ip = XFS_I(inode);
368 struct xfs_mount *mp = ip->i_mount;
369 xfs_fsize_t isize, new_size;
110 int ioflags = 0; 370 int ioflags = 0;
371 ssize_t ret;
372
373 XFS_STATS_INC(xs_write_calls);
111 374
112 if (outfilp->f_mode & FMODE_NOCMTIME) 375 if (outfilp->f_mode & FMODE_NOCMTIME)
113 ioflags |= IO_INVIS; 376 ioflags |= IO_INVIS;
114 377
115 return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), 378 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
116 pipe, outfilp, ppos, len, flags, ioflags); 379 return -EIO;
380
381 xfs_ilock(ip, XFS_IOLOCK_EXCL);
382
383 if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
384 int iolock = XFS_IOLOCK_EXCL;
385 int error;
386
387 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
388 FILP_DELAY_FLAG(outfilp), &iolock);
389 if (error) {
390 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
391 return -error;
392 }
393 }
394
395 new_size = *ppos + count;
396
397 xfs_ilock(ip, XFS_ILOCK_EXCL);
398 if (new_size > ip->i_size)
399 ip->i_new_size = new_size;
400 xfs_iunlock(ip, XFS_ILOCK_EXCL);
401
402 trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
403
404 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
405 if (ret > 0)
406 XFS_STATS_ADD(xs_write_bytes, ret);
407
408 isize = i_size_read(inode);
409 if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
410 *ppos = isize;
411
412 if (*ppos > ip->i_size) {
413 xfs_ilock(ip, XFS_ILOCK_EXCL);
414 if (*ppos > ip->i_size)
415 ip->i_size = *ppos;
416 xfs_iunlock(ip, XFS_ILOCK_EXCL);
417 }
418
419 if (ip->i_new_size) {
420 xfs_ilock(ip, XFS_ILOCK_EXCL);
421 ip->i_new_size = 0;
422 if (ip->i_d.di_size > ip->i_size)
423 ip->i_d.di_size = ip->i_size;
424 xfs_iunlock(ip, XFS_ILOCK_EXCL);
425 }
426 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
427 return ret;
428}
429
430/*
431 * This routine is called to handle zeroing any space in the last
432 * block of the file that is beyond the EOF. We do this since the
433 * size is being increased without writing anything to that block
434 * and we don't want anyone to read the garbage on the disk.
435 */
436STATIC int /* error (positive) */
437xfs_zero_last_block(
438 xfs_inode_t *ip,
439 xfs_fsize_t offset,
440 xfs_fsize_t isize)
441{
442 xfs_fileoff_t last_fsb;
443 xfs_mount_t *mp = ip->i_mount;
444 int nimaps;
445 int zero_offset;
446 int zero_len;
447 int error = 0;
448 xfs_bmbt_irec_t imap;
449
450 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
451
452 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
453 if (zero_offset == 0) {
454 /*
455 * There are no extra bytes in the last block on disk to
456 * zero, so return.
457 */
458 return 0;
459 }
460
461 last_fsb = XFS_B_TO_FSBT(mp, isize);
462 nimaps = 1;
463 error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
464 &nimaps, NULL, NULL);
465 if (error) {
466 return error;
467 }
468 ASSERT(nimaps > 0);
469 /*
470 * If the block underlying isize is just a hole, then there
471 * is nothing to zero.
472 */
473 if (imap.br_startblock == HOLESTARTBLOCK) {
474 return 0;
475 }
476 /*
477 * Zero the part of the last block beyond the EOF, and write it
478 * out sync. We need to drop the ilock while we do this so we
479 * don't deadlock when the buffer cache calls back to us.
480 */
481 xfs_iunlock(ip, XFS_ILOCK_EXCL);
482
483 zero_len = mp->m_sb.sb_blocksize - zero_offset;
484 if (isize + zero_len > offset)
485 zero_len = offset - isize;
486 error = xfs_iozero(ip, isize, zero_len);
487
488 xfs_ilock(ip, XFS_ILOCK_EXCL);
489 ASSERT(error >= 0);
490 return error;
491}
492
493/*
494 * Zero any on disk space between the current EOF and the new,
495 * larger EOF. This handles the normal case of zeroing the remainder
496 * of the last block in the file and the unusual case of zeroing blocks
497 * out beyond the size of the file. This second case only happens
498 * with fixed size extents and when the system crashes before the inode
499 * size was updated but after blocks were allocated. If fill is set,
500 * then any holes in the range are filled and zeroed. If not, the holes
501 * are left alone as holes.
502 */
503
504int /* error (positive) */
505xfs_zero_eof(
506 xfs_inode_t *ip,
507 xfs_off_t offset, /* starting I/O offset */
508 xfs_fsize_t isize) /* current inode size */
509{
510 xfs_mount_t *mp = ip->i_mount;
511 xfs_fileoff_t start_zero_fsb;
512 xfs_fileoff_t end_zero_fsb;
513 xfs_fileoff_t zero_count_fsb;
514 xfs_fileoff_t last_fsb;
515 xfs_fileoff_t zero_off;
516 xfs_fsize_t zero_len;
517 int nimaps;
518 int error = 0;
519 xfs_bmbt_irec_t imap;
520
521 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
522 ASSERT(offset > isize);
523
524 /*
525 * First handle zeroing the block on which isize resides.
526 * We only zero a part of that block so it is handled specially.
527 */
528 error = xfs_zero_last_block(ip, offset, isize);
529 if (error) {
530 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
531 return error;
532 }
533
534 /*
535 * Calculate the range between the new size and the old
536 * where blocks needing to be zeroed may exist. To get the
537 * block where the last byte in the file currently resides,
538 * we need to subtract one from the size and truncate back
539 * to a block boundary. We subtract 1 in case the size is
540 * exactly on a block boundary.
541 */
542 last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
543 start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
544 end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
545 ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
546 if (last_fsb == end_zero_fsb) {
547 /*
548 * The size was only incremented on its last block.
549 * We took care of that above, so just return.
550 */
551 return 0;
552 }
553
554 ASSERT(start_zero_fsb <= end_zero_fsb);
555 while (start_zero_fsb <= end_zero_fsb) {
556 nimaps = 1;
557 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
558 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
559 0, NULL, 0, &imap, &nimaps, NULL, NULL);
560 if (error) {
561 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
562 return error;
563 }
564 ASSERT(nimaps > 0);
565
566 if (imap.br_state == XFS_EXT_UNWRITTEN ||
567 imap.br_startblock == HOLESTARTBLOCK) {
568 /*
569 * This loop handles initializing pages that were
570 * partially initialized by the code below this
571 * loop. It basically zeroes the part of the page
572 * that sits on a hole and sets the page as P_HOLE
573 * and calls remapf if it is a mapped file.
574 */
575 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
576 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
577 continue;
578 }
579
580 /*
581 * There are blocks we need to zero.
582 * Drop the inode lock while we're doing the I/O.
583 * We'll still have the iolock to protect us.
584 */
585 xfs_iunlock(ip, XFS_ILOCK_EXCL);
586
587 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
588 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
589
590 if ((zero_off + zero_len) > offset)
591 zero_len = offset - zero_off;
592
593 error = xfs_iozero(ip, zero_off, zero_len);
594 if (error) {
595 goto out_lock;
596 }
597
598 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
599 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
600
601 xfs_ilock(ip, XFS_ILOCK_EXCL);
602 }
603
604 return 0;
605
606out_lock:
607 xfs_ilock(ip, XFS_ILOCK_EXCL);
608 ASSERT(error >= 0);
609 return error;
610}
611
612STATIC ssize_t
613xfs_file_aio_write(
614 struct kiocb *iocb,
615 const struct iovec *iovp,
616 unsigned long nr_segs,
617 loff_t pos)
618{
619 struct file *file = iocb->ki_filp;
620 struct address_space *mapping = file->f_mapping;
621 struct inode *inode = mapping->host;
622 struct xfs_inode *ip = XFS_I(inode);
623 struct xfs_mount *mp = ip->i_mount;
624 ssize_t ret = 0, error = 0;
625 int ioflags = 0;
626 xfs_fsize_t isize, new_size;
627 int iolock;
628 int eventsent = 0;
629 size_t ocount = 0, count;
630 int need_i_mutex;
631
632 XFS_STATS_INC(xs_write_calls);
633
634 BUG_ON(iocb->ki_pos != pos);
635
636 if (unlikely(file->f_flags & O_DIRECT))
637 ioflags |= IO_ISDIRECT;
638 if (file->f_mode & FMODE_NOCMTIME)
639 ioflags |= IO_INVIS;
640
641 error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
642 if (error)
643 return error;
644
645 count = ocount;
646 if (count == 0)
647 return 0;
648
649 xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
650
651 if (XFS_FORCED_SHUTDOWN(mp))
652 return -EIO;
653
654relock:
655 if (ioflags & IO_ISDIRECT) {
656 iolock = XFS_IOLOCK_SHARED;
657 need_i_mutex = 0;
658 } else {
659 iolock = XFS_IOLOCK_EXCL;
660 need_i_mutex = 1;
661 mutex_lock(&inode->i_mutex);
662 }
663
664 xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
665
666start:
667 error = -generic_write_checks(file, &pos, &count,
668 S_ISBLK(inode->i_mode));
669 if (error) {
670 xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
671 goto out_unlock_mutex;
672 }
673
674 if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
675 !(ioflags & IO_INVIS) && !eventsent)) {
676 int dmflags = FILP_DELAY_FLAG(file);
677
678 if (need_i_mutex)
679 dmflags |= DM_FLAGS_IMUX;
680
681 xfs_iunlock(ip, XFS_ILOCK_EXCL);
682 error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
683 pos, count, dmflags, &iolock);
684 if (error) {
685 goto out_unlock_internal;
686 }
687 xfs_ilock(ip, XFS_ILOCK_EXCL);
688 eventsent = 1;
689
690 /*
691 * The iolock was dropped and reacquired in XFS_SEND_DATA
692 * so we have to recheck the size when appending.
693 * We will only "goto start;" once, since having sent the
694 * event prevents another call to XFS_SEND_DATA, which is
695 * what allows the size to change in the first place.
696 */
697 if ((file->f_flags & O_APPEND) && pos != ip->i_size)
698 goto start;
699 }
700
701 if (ioflags & IO_ISDIRECT) {
702 xfs_buftarg_t *target =
703 XFS_IS_REALTIME_INODE(ip) ?
704 mp->m_rtdev_targp : mp->m_ddev_targp;
705
706 if ((pos & target->bt_smask) || (count & target->bt_smask)) {
707 xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
708 return XFS_ERROR(-EINVAL);
709 }
710
711 if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
712 xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
713 iolock = XFS_IOLOCK_EXCL;
714 need_i_mutex = 1;
715 mutex_lock(&inode->i_mutex);
716 xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
717 goto start;
718 }
719 }
720
721 new_size = pos + count;
722 if (new_size > ip->i_size)
723 ip->i_new_size = new_size;
724
725 if (likely(!(ioflags & IO_INVIS)))
726 file_update_time(file);
727
728 /*
729 * If the offset is beyond the size of the file, we have a couple
730 * of things to do. First, if there is already space allocated
731 * we need to either create holes or zero the disk or ...
732 *
733 * If there is a page where the previous size lands, we need
734 * to zero it out up to the new size.
735 */
736
737 if (pos > ip->i_size) {
738 error = xfs_zero_eof(ip, pos, ip->i_size);
739 if (error) {
740 xfs_iunlock(ip, XFS_ILOCK_EXCL);
741 goto out_unlock_internal;
742 }
743 }
744 xfs_iunlock(ip, XFS_ILOCK_EXCL);
745
746 /*
747 * If we're writing the file then make sure to clear the
748 * setuid and setgid bits if the process is not being run
749 * by root. This keeps people from modifying setuid and
750 * setgid binaries.
751 */
752 error = -file_remove_suid(file);
753 if (unlikely(error))
754 goto out_unlock_internal;
755
756 /* We can write back this queue in page reclaim */
757 current->backing_dev_info = mapping->backing_dev_info;
758
759 if ((ioflags & IO_ISDIRECT)) {
760 if (mapping->nrpages) {
761 WARN_ON(need_i_mutex == 0);
762 error = xfs_flushinval_pages(ip,
763 (pos & PAGE_CACHE_MASK),
764 -1, FI_REMAPF_LOCKED);
765 if (error)
766 goto out_unlock_internal;
767 }
768
769 if (need_i_mutex) {
770 /* demote the lock now the cached pages are gone */
771 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
772 mutex_unlock(&inode->i_mutex);
773
774 iolock = XFS_IOLOCK_SHARED;
775 need_i_mutex = 0;
776 }
777
778 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
779 ret = generic_file_direct_write(iocb, iovp,
780 &nr_segs, pos, &iocb->ki_pos, count, ocount);
781
782 /*
783 * direct-io write to a hole: fall through to buffered I/O
784 * for completing the rest of the request.
785 */
786 if (ret >= 0 && ret != count) {
787 XFS_STATS_ADD(xs_write_bytes, ret);
788
789 pos += ret;
790 count -= ret;
791
792 ioflags &= ~IO_ISDIRECT;
793 xfs_iunlock(ip, iolock);
794 goto relock;
795 }
796 } else {
797 int enospc = 0;
798 ssize_t ret2 = 0;
799
800write_retry:
801 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags);
802 ret2 = generic_file_buffered_write(iocb, iovp, nr_segs,
803 pos, &iocb->ki_pos, count, ret);
804 /*
805 * if we just got an ENOSPC, flush the inode now we
806 * aren't holding any page locks and retry *once*
807 */
808 if (ret2 == -ENOSPC && !enospc) {
809 error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
810 if (error)
811 goto out_unlock_internal;
812 enospc = 1;
813 goto write_retry;
814 }
815 ret = ret2;
816 }
817
818 current->backing_dev_info = NULL;
819
820 isize = i_size_read(inode);
821 if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize))
822 iocb->ki_pos = isize;
823
824 if (iocb->ki_pos > ip->i_size) {
825 xfs_ilock(ip, XFS_ILOCK_EXCL);
826 if (iocb->ki_pos > ip->i_size)
827 ip->i_size = iocb->ki_pos;
828 xfs_iunlock(ip, XFS_ILOCK_EXCL);
829 }
830
831 if (ret == -ENOSPC &&
832 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
833 xfs_iunlock(ip, iolock);
834 if (need_i_mutex)
835 mutex_unlock(&inode->i_mutex);
836 error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
837 DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
838 0, 0, 0); /* Delay flag intentionally unused */
839 if (need_i_mutex)
840 mutex_lock(&inode->i_mutex);
841 xfs_ilock(ip, iolock);
842 if (error)
843 goto out_unlock_internal;
844 goto start;
845 }
846
847 error = -ret;
848 if (ret <= 0)
849 goto out_unlock_internal;
850
851 XFS_STATS_ADD(xs_write_bytes, ret);
852
853 /* Handle various SYNC-type writes */
854 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
855 loff_t end = pos + ret - 1;
856 int error2;
857
858 xfs_iunlock(ip, iolock);
859 if (need_i_mutex)
860 mutex_unlock(&inode->i_mutex);
861
862 error2 = filemap_write_and_wait_range(mapping, pos, end);
863 if (!error)
864 error = error2;
865 if (need_i_mutex)
866 mutex_lock(&inode->i_mutex);
867 xfs_ilock(ip, iolock);
868
869 error2 = -xfs_file_fsync(file, file->f_path.dentry,
870 (file->f_flags & __O_SYNC) ? 0 : 1);
871 if (!error)
872 error = error2;
873 }
874
875 out_unlock_internal:
876 if (ip->i_new_size) {
877 xfs_ilock(ip, XFS_ILOCK_EXCL);
878 ip->i_new_size = 0;
879 /*
880 * If this was a direct or synchronous I/O that failed (such
881 * as ENOSPC) then part of the I/O may have been written to
882 * disk before the error occured. In this case the on-disk
883 * file size may have been adjusted beyond the in-memory file
884 * size and now needs to be truncated back.
885 */
886 if (ip->i_d.di_size > ip->i_size)
887 ip->i_d.di_size = ip->i_size;
888 xfs_iunlock(ip, XFS_ILOCK_EXCL);
889 }
890 xfs_iunlock(ip, iolock);
891 out_unlock_mutex:
892 if (need_i_mutex)
893 mutex_unlock(&inode->i_mutex);
894 return -error;
117} 895}
118 896
119STATIC int 897STATIC int
@@ -160,28 +938,6 @@ xfs_file_release(
160 return -xfs_release(XFS_I(inode)); 938 return -xfs_release(XFS_I(inode));
161} 939}
162 940
163/*
164 * We ignore the datasync flag here because a datasync is effectively
165 * identical to an fsync. That is, datasync implies that we need to write
166 * only the metadata needed to be able to access the data that is written
167 * if we crash after the call completes. Hence if we are writing beyond
168 * EOF we have to log the inode size change as well, which makes it a
169 * full fsync. If we don't write beyond EOF, the inode core will be
170 * clean in memory and so we don't need to log the inode, just like
171 * fsync.
172 */
173STATIC int
174xfs_file_fsync(
175 struct file *file,
176 struct dentry *dentry,
177 int datasync)
178{
179 struct xfs_inode *ip = XFS_I(dentry->d_inode);
180
181 xfs_iflags_clear(ip, XFS_ITRUNCATED);
182 return -xfs_fsync(ip);
183}
184
185STATIC int 941STATIC int
186xfs_file_readdir( 942xfs_file_readdir(
187 struct file *filp, 943 struct file *filp,
@@ -203,9 +959,9 @@ xfs_file_readdir(
203 * 959 *
204 * Try to give it an estimate that's good enough, maybe at some 960 * Try to give it an estimate that's good enough, maybe at some
205 * point we can change the ->readdir prototype to include the 961 * point we can change the ->readdir prototype to include the
206 * buffer size. 962 * buffer size. For now we use the current glibc buffer size.
207 */ 963 */
208 bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size); 964 bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
209 965
210 error = xfs_readdir(ip, dirent, bufsize, 966 error = xfs_readdir(ip, dirent, bufsize,
211 (xfs_off_t *)&filp->f_pos, filldir); 967 (xfs_off_t *)&filp->f_pos, filldir);
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 08be36d7326c..b6918d76bc7b 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -19,6 +19,7 @@
19#include "xfs_vnodeops.h" 19#include "xfs_vnodeops.h"
20#include "xfs_bmap_btree.h" 20#include "xfs_bmap_btree.h"
21#include "xfs_inode.h" 21#include "xfs_inode.h"
22#include "xfs_trace.h"
22 23
23int fs_noerr(void) { return 0; } 24int fs_noerr(void) { return 0; }
24int fs_nosys(void) { return ENOSYS; } 25int fs_nosys(void) { return ENOSYS; }
@@ -51,6 +52,8 @@ xfs_flushinval_pages(
51 struct address_space *mapping = VFS_I(ip)->i_mapping; 52 struct address_space *mapping = VFS_I(ip)->i_mapping;
52 int ret = 0; 53 int ret = 0;
53 54
55 trace_xfs_pagecache_inval(ip, first, last);
56
54 if (mapping->nrpages) { 57 if (mapping->nrpages) {
55 xfs_iflags_clear(ip, XFS_ITRUNCATED); 58 xfs_iflags_clear(ip, XFS_ITRUNCATED);
56 ret = filemap_write_and_wait(mapping); 59 ret = filemap_write_and_wait(mapping);
@@ -76,7 +79,7 @@ xfs_flush_pages(
76 xfs_iflags_clear(ip, XFS_ITRUNCATED); 79 xfs_iflags_clear(ip, XFS_ITRUNCATED);
77 ret = -filemap_fdatawrite(mapping); 80 ret = -filemap_fdatawrite(mapping);
78 } 81 }
79 if (flags & XFS_B_ASYNC) 82 if (flags & XBF_ASYNC)
80 return ret; 83 return ret;
81 ret2 = xfs_wait_on_pages(ip, first, last); 84 ret2 = xfs_wait_on_pages(ip, first, last);
82 if (!ret) 85 if (!ret)
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 5bb523d7f37e..7b26cc2fd284 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -51,12 +51,14 @@
51#include "xfs_quota.h" 51#include "xfs_quota.h"
52#include "xfs_inode_item.h" 52#include "xfs_inode_item.h"
53#include "xfs_export.h" 53#include "xfs_export.h"
54#include "xfs_trace.h"
54 55
55#include <linux/capability.h> 56#include <linux/capability.h>
56#include <linux/dcache.h> 57#include <linux/dcache.h>
57#include <linux/mount.h> 58#include <linux/mount.h>
58#include <linux/namei.h> 59#include <linux/namei.h>
59#include <linux/pagemap.h> 60#include <linux/pagemap.h>
61#include <linux/slab.h>
60#include <linux/exportfs.h> 62#include <linux/exportfs.h>
61 63
62/* 64/*
@@ -446,12 +448,12 @@ xfs_attrlist_by_handle(
446int 448int
447xfs_attrmulti_attr_get( 449xfs_attrmulti_attr_get(
448 struct inode *inode, 450 struct inode *inode,
449 char *name, 451 unsigned char *name,
450 char __user *ubuf, 452 unsigned char __user *ubuf,
451 __uint32_t *len, 453 __uint32_t *len,
452 __uint32_t flags) 454 __uint32_t flags)
453{ 455{
454 char *kbuf; 456 unsigned char *kbuf;
455 int error = EFAULT; 457 int error = EFAULT;
456 458
457 if (*len > XATTR_SIZE_MAX) 459 if (*len > XATTR_SIZE_MAX)
@@ -475,12 +477,12 @@ xfs_attrmulti_attr_get(
475int 477int
476xfs_attrmulti_attr_set( 478xfs_attrmulti_attr_set(
477 struct inode *inode, 479 struct inode *inode,
478 char *name, 480 unsigned char *name,
479 const char __user *ubuf, 481 const unsigned char __user *ubuf,
480 __uint32_t len, 482 __uint32_t len,
481 __uint32_t flags) 483 __uint32_t flags)
482{ 484{
483 char *kbuf; 485 unsigned char *kbuf;
484 int error = EFAULT; 486 int error = EFAULT;
485 487
486 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 488 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -500,7 +502,7 @@ xfs_attrmulti_attr_set(
500int 502int
501xfs_attrmulti_attr_remove( 503xfs_attrmulti_attr_remove(
502 struct inode *inode, 504 struct inode *inode,
503 char *name, 505 unsigned char *name,
504 __uint32_t flags) 506 __uint32_t flags)
505{ 507{
506 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 508 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
@@ -518,7 +520,7 @@ xfs_attrmulti_by_handle(
518 xfs_fsop_attrmulti_handlereq_t am_hreq; 520 xfs_fsop_attrmulti_handlereq_t am_hreq;
519 struct dentry *dentry; 521 struct dentry *dentry;
520 unsigned int i, size; 522 unsigned int i, size;
521 char *attr_name; 523 unsigned char *attr_name;
522 524
523 if (!capable(CAP_SYS_ADMIN)) 525 if (!capable(CAP_SYS_ADMIN))
524 return -XFS_ERROR(EPERM); 526 return -XFS_ERROR(EPERM);
@@ -546,7 +548,7 @@ xfs_attrmulti_by_handle(
546 548
547 error = 0; 549 error = 0;
548 for (i = 0; i < am_hreq.opcount; i++) { 550 for (i = 0; i < am_hreq.opcount; i++) {
549 ops[i].am_error = strncpy_from_user(attr_name, 551 ops[i].am_error = strncpy_from_user((char *)attr_name,
550 ops[i].am_attrname, MAXNAMELEN); 552 ops[i].am_attrname, MAXNAMELEN);
551 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 553 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
552 error = -ERANGE; 554 error = -ERANGE;
@@ -1430,6 +1432,9 @@ xfs_file_ioctl(
1430 if (!capable(CAP_SYS_ADMIN)) 1432 if (!capable(CAP_SYS_ADMIN))
1431 return -EPERM; 1433 return -EPERM;
1432 1434
1435 if (mp->m_flags & XFS_MOUNT_RDONLY)
1436 return -XFS_ERROR(EROFS);
1437
1433 if (copy_from_user(&inout, arg, sizeof(inout))) 1438 if (copy_from_user(&inout, arg, sizeof(inout)))
1434 return -XFS_ERROR(EFAULT); 1439 return -XFS_ERROR(EFAULT);
1435 1440
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
index 7bd7c6afc1eb..d56173b34a2a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl.h
@@ -45,23 +45,23 @@ xfs_readlink_by_handle(
45extern int 45extern int
46xfs_attrmulti_attr_get( 46xfs_attrmulti_attr_get(
47 struct inode *inode, 47 struct inode *inode,
48 char *name, 48 unsigned char *name,
49 char __user *ubuf, 49 unsigned char __user *ubuf,
50 __uint32_t *len, 50 __uint32_t *len,
51 __uint32_t flags); 51 __uint32_t flags);
52 52
53extern int 53extern int
54 xfs_attrmulti_attr_set( 54xfs_attrmulti_attr_set(
55 struct inode *inode, 55 struct inode *inode,
56 char *name, 56 unsigned char *name,
57 const char __user *ubuf, 57 const unsigned char __user *ubuf,
58 __uint32_t len, 58 __uint32_t len,
59 __uint32_t flags); 59 __uint32_t flags);
60 60
61extern int 61extern int
62xfs_attrmulti_attr_remove( 62xfs_attrmulti_attr_remove(
63 struct inode *inode, 63 struct inode *inode,
64 char *name, 64 unsigned char *name,
65 __uint32_t flags); 65 __uint32_t flags);
66 66
67extern struct dentry * 67extern struct dentry *
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index eafcc7c18706..593c05b4df8d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -18,6 +18,7 @@
18#include <linux/compat.h> 18#include <linux/compat.h>
19#include <linux/ioctl.h> 19#include <linux/ioctl.h>
20#include <linux/mount.h> 20#include <linux/mount.h>
21#include <linux/slab.h>
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include "xfs.h" 23#include "xfs.h"
23#include "xfs_fs.h" 24#include "xfs_fs.h"
@@ -46,6 +47,7 @@
46#include "xfs_attr.h" 47#include "xfs_attr.h"
47#include "xfs_ioctl.h" 48#include "xfs_ioctl.h"
48#include "xfs_ioctl32.h" 49#include "xfs_ioctl32.h"
50#include "xfs_trace.h"
49 51
50#define _NATIVE_IOC(cmd, type) \ 52#define _NATIVE_IOC(cmd, type) \
51 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) 53 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
@@ -410,7 +412,7 @@ xfs_compat_attrmulti_by_handle(
410 compat_xfs_fsop_attrmulti_handlereq_t am_hreq; 412 compat_xfs_fsop_attrmulti_handlereq_t am_hreq;
411 struct dentry *dentry; 413 struct dentry *dentry;
412 unsigned int i, size; 414 unsigned int i, size;
413 char *attr_name; 415 unsigned char *attr_name;
414 416
415 if (!capable(CAP_SYS_ADMIN)) 417 if (!capable(CAP_SYS_ADMIN))
416 return -XFS_ERROR(EPERM); 418 return -XFS_ERROR(EPERM);
@@ -439,7 +441,7 @@ xfs_compat_attrmulti_by_handle(
439 441
440 error = 0; 442 error = 0;
441 for (i = 0; i < am_hreq.opcount; i++) { 443 for (i = 0; i < am_hreq.opcount; i++) {
442 ops[i].am_error = strncpy_from_user(attr_name, 444 ops[i].am_error = strncpy_from_user((char *)attr_name,
443 compat_ptr(ops[i].am_attrname), 445 compat_ptr(ops[i].am_attrname),
444 MAXNAMELEN); 446 MAXNAMELEN);
445 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 447 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index cd42ef78f6b5..e65a7937f3a4 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -47,6 +47,7 @@
47#include "xfs_buf_item.h" 47#include "xfs_buf_item.h"
48#include "xfs_utils.h" 48#include "xfs_utils.h"
49#include "xfs_vnodeops.h" 49#include "xfs_vnodeops.h"
50#include "xfs_trace.h"
50 51
51#include <linux/capability.h> 52#include <linux/capability.h>
52#include <linux/xattr.h> 53#include <linux/xattr.h>
@@ -55,6 +56,7 @@
55#include <linux/security.h> 56#include <linux/security.h>
56#include <linux/falloc.h> 57#include <linux/falloc.h>
57#include <linux/fiemap.h> 58#include <linux/fiemap.h>
59#include <linux/slab.h>
58 60
59/* 61/*
60 * Bring the timestamps in the XFS inode uptodate. 62 * Bring the timestamps in the XFS inode uptodate.
@@ -90,6 +92,16 @@ xfs_mark_inode_dirty_sync(
90 mark_inode_dirty_sync(inode); 92 mark_inode_dirty_sync(inode);
91} 93}
92 94
95void
96xfs_mark_inode_dirty(
97 xfs_inode_t *ip)
98{
99 struct inode *inode = VFS_I(ip);
100
101 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
102 mark_inode_dirty(inode);
103}
104
93/* 105/*
94 * Change the requested timestamp in the given inode. 106 * Change the requested timestamp in the given inode.
95 * We don't lock across timestamp updates, and we don't log them but 107 * We don't lock across timestamp updates, and we don't log them but
@@ -139,10 +151,10 @@ xfs_init_security(
139 struct xfs_inode *ip = XFS_I(inode); 151 struct xfs_inode *ip = XFS_I(inode);
140 size_t length; 152 size_t length;
141 void *value; 153 void *value;
142 char *name; 154 unsigned char *name;
143 int error; 155 int error;
144 156
145 error = security_inode_init_security(inode, dir, &name, 157 error = security_inode_init_security(inode, dir, (char **)&name,
146 &value, &length); 158 &value, &length);
147 if (error) { 159 if (error) {
148 if (error == -EOPNOTSUPP) 160 if (error == -EOPNOTSUPP)
@@ -573,8 +585,8 @@ xfs_vn_fallocate(
573 bf.l_len = len; 585 bf.l_len = len;
574 586
575 xfs_ilock(ip, XFS_IOLOCK_EXCL); 587 xfs_ilock(ip, XFS_IOLOCK_EXCL);
576 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 588 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
577 0, XFS_ATTR_NOLOCK); 589 0, XFS_ATTR_NOLOCK);
578 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 590 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
579 offset + len > i_size_read(inode)) 591 offset + len > i_size_read(inode))
580 new_size = offset + len; 592 new_size = offset + len;
@@ -585,7 +597,7 @@ xfs_vn_fallocate(
585 597
586 iattr.ia_valid = ATTR_SIZE; 598 iattr.ia_valid = ATTR_SIZE;
587 iattr.ia_size = new_size; 599 iattr.ia_size = new_size;
588 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); 600 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
589 } 601 }
590 602
591 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 603 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -793,7 +805,7 @@ xfs_setup_inode(
793 struct inode *inode = &ip->i_vnode; 805 struct inode *inode = &ip->i_vnode;
794 806
795 inode->i_ino = ip->i_ino; 807 inode->i_ino = ip->i_ino;
796 inode->i_state = I_NEW|I_LOCK; 808 inode->i_state = I_NEW;
797 inode_add_to_lists(ip->i_mount->m_super, inode); 809 inode_add_to_lists(ip->i_mount->m_super, inode);
798 810
799 inode->i_mode = ip->i_d.di_mode; 811 inode->i_mode = ip->i_d.di_mode;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 6127e24062d0..facfb323a706 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -40,7 +40,6 @@
40#include <sv.h> 40#include <sv.h>
41#include <time.h> 41#include <time.h>
42 42
43#include <support/ktrace.h>
44#include <support/debug.h> 43#include <support/debug.h>
45#include <support/uuid.h> 44#include <support/uuid.h>
46 45
@@ -89,7 +88,6 @@
89#include <xfs_super.h> 88#include <xfs_super.h>
90#include <xfs_globals.h> 89#include <xfs_globals.h>
91#include <xfs_fs_subr.h> 90#include <xfs_fs_subr.h>
92#include <xfs_lrw.h>
93#include <xfs_buf.h> 91#include <xfs_buf.h>
94 92
95/* 93/*
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
deleted file mode 100644
index 072050f8d346..000000000000
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ /dev/null
@@ -1,922 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_bit.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_alloc.h"
28#include "xfs_dmapi.h"
29#include "xfs_quota.h"
30#include "xfs_mount.h"
31#include "xfs_bmap_btree.h"
32#include "xfs_alloc_btree.h"
33#include "xfs_ialloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h"
38#include "xfs_bmap.h"
39#include "xfs_btree.h"
40#include "xfs_ialloc.h"
41#include "xfs_rtalloc.h"
42#include "xfs_error.h"
43#include "xfs_itable.h"
44#include "xfs_rw.h"
45#include "xfs_attr.h"
46#include "xfs_inode_item.h"
47#include "xfs_buf_item.h"
48#include "xfs_utils.h"
49#include "xfs_iomap.h"
50#include "xfs_vnodeops.h"
51
52#include <linux/capability.h>
53#include <linux/writeback.h>
54
55
56#if defined(XFS_RW_TRACE)
57void
58xfs_rw_enter_trace(
59 int tag,
60 xfs_inode_t *ip,
61 void *data,
62 size_t segs,
63 loff_t offset,
64 int ioflags)
65{
66 if (ip->i_rwtrace == NULL)
67 return;
68 ktrace_enter(ip->i_rwtrace,
69 (void *)(unsigned long)tag,
70 (void *)ip,
71 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
72 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
73 (void *)data,
74 (void *)((unsigned long)segs),
75 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
76 (void *)((unsigned long)(offset & 0xffffffff)),
77 (void *)((unsigned long)ioflags),
78 (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
79 (void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
80 (void *)((unsigned long)current_pid()),
81 (void *)NULL,
82 (void *)NULL,
83 (void *)NULL,
84 (void *)NULL);
85}
86
87void
88xfs_inval_cached_trace(
89 xfs_inode_t *ip,
90 xfs_off_t offset,
91 xfs_off_t len,
92 xfs_off_t first,
93 xfs_off_t last)
94{
95
96 if (ip->i_rwtrace == NULL)
97 return;
98 ktrace_enter(ip->i_rwtrace,
99 (void *)(__psint_t)XFS_INVAL_CACHED,
100 (void *)ip,
101 (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
102 (void *)((unsigned long)(offset & 0xffffffff)),
103 (void *)((unsigned long)((len >> 32) & 0xffffffff)),
104 (void *)((unsigned long)(len & 0xffffffff)),
105 (void *)((unsigned long)((first >> 32) & 0xffffffff)),
106 (void *)((unsigned long)(first & 0xffffffff)),
107 (void *)((unsigned long)((last >> 32) & 0xffffffff)),
108 (void *)((unsigned long)(last & 0xffffffff)),
109 (void *)((unsigned long)current_pid()),
110 (void *)NULL,
111 (void *)NULL,
112 (void *)NULL,
113 (void *)NULL,
114 (void *)NULL);
115}
116#endif
117
118/*
119 * xfs_iozero
120 *
121 * xfs_iozero clears the specified range of buffer supplied,
122 * and marks all the affected blocks as valid and modified. If
123 * an affected block is not allocated, it will be allocated. If
124 * an affected block is not completely overwritten, and is not
125 * valid before the operation, it will be read from disk before
126 * being partially zeroed.
127 */
128STATIC int
129xfs_iozero(
130 struct xfs_inode *ip, /* inode */
131 loff_t pos, /* offset in file */
132 size_t count) /* size of data to zero */
133{
134 struct page *page;
135 struct address_space *mapping;
136 int status;
137
138 mapping = VFS_I(ip)->i_mapping;
139 do {
140 unsigned offset, bytes;
141 void *fsdata;
142
143 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
144 bytes = PAGE_CACHE_SIZE - offset;
145 if (bytes > count)
146 bytes = count;
147
148 status = pagecache_write_begin(NULL, mapping, pos, bytes,
149 AOP_FLAG_UNINTERRUPTIBLE,
150 &page, &fsdata);
151 if (status)
152 break;
153
154 zero_user(page, offset, bytes);
155
156 status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
157 page, fsdata);
158 WARN_ON(status <= 0); /* can't return less than zero! */
159 pos += bytes;
160 count -= bytes;
161 status = 0;
162 } while (count);
163
164 return (-status);
165}
166
167ssize_t /* bytes read, or (-) error */
168xfs_read(
169 xfs_inode_t *ip,
170 struct kiocb *iocb,
171 const struct iovec *iovp,
172 unsigned int segs,
173 loff_t *offset,
174 int ioflags)
175{
176 struct file *file = iocb->ki_filp;
177 struct inode *inode = file->f_mapping->host;
178 xfs_mount_t *mp = ip->i_mount;
179 size_t size = 0;
180 ssize_t ret = 0;
181 xfs_fsize_t n;
182 unsigned long seg;
183
184
185 XFS_STATS_INC(xs_read_calls);
186
187 /* START copy & waste from filemap.c */
188 for (seg = 0; seg < segs; seg++) {
189 const struct iovec *iv = &iovp[seg];
190
191 /*
192 * If any segment has a negative length, or the cumulative
193 * length ever wraps negative then return -EINVAL.
194 */
195 size += iv->iov_len;
196 if (unlikely((ssize_t)(size|iv->iov_len) < 0))
197 return XFS_ERROR(-EINVAL);
198 }
199 /* END copy & waste from filemap.c */
200
201 if (unlikely(ioflags & IO_ISDIRECT)) {
202 xfs_buftarg_t *target =
203 XFS_IS_REALTIME_INODE(ip) ?
204 mp->m_rtdev_targp : mp->m_ddev_targp;
205 if ((*offset & target->bt_smask) ||
206 (size & target->bt_smask)) {
207 if (*offset == ip->i_size) {
208 return (0);
209 }
210 return -XFS_ERROR(EINVAL);
211 }
212 }
213
214 n = XFS_MAXIOFFSET(mp) - *offset;
215 if ((n <= 0) || (size == 0))
216 return 0;
217
218 if (n < size)
219 size = n;
220
221 if (XFS_FORCED_SHUTDOWN(mp))
222 return -EIO;
223
224 if (unlikely(ioflags & IO_ISDIRECT))
225 mutex_lock(&inode->i_mutex);
226 xfs_ilock(ip, XFS_IOLOCK_SHARED);
227
228 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
229 int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
230 int iolock = XFS_IOLOCK_SHARED;
231
232 ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
233 dmflags, &iolock);
234 if (ret) {
235 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
236 if (unlikely(ioflags & IO_ISDIRECT))
237 mutex_unlock(&inode->i_mutex);
238 return ret;
239 }
240 }
241
242 if (unlikely(ioflags & IO_ISDIRECT)) {
243 if (inode->i_mapping->nrpages)
244 ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
245 -1, FI_REMAPF_LOCKED);
246 mutex_unlock(&inode->i_mutex);
247 if (ret) {
248 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
249 return ret;
250 }
251 }
252
253 xfs_rw_enter_trace(XFS_READ_ENTER, ip,
254 (void *)iovp, segs, *offset, ioflags);
255
256 iocb->ki_pos = *offset;
257 ret = generic_file_aio_read(iocb, iovp, segs, *offset);
258 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
259 ret = wait_on_sync_kiocb(iocb);
260 if (ret > 0)
261 XFS_STATS_ADD(xs_read_bytes, ret);
262
263 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
264 return ret;
265}
266
267ssize_t
268xfs_splice_read(
269 xfs_inode_t *ip,
270 struct file *infilp,
271 loff_t *ppos,
272 struct pipe_inode_info *pipe,
273 size_t count,
274 int flags,
275 int ioflags)
276{
277 xfs_mount_t *mp = ip->i_mount;
278 ssize_t ret;
279
280 XFS_STATS_INC(xs_read_calls);
281 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
282 return -EIO;
283
284 xfs_ilock(ip, XFS_IOLOCK_SHARED);
285
286 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
287 int iolock = XFS_IOLOCK_SHARED;
288 int error;
289
290 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
291 FILP_DELAY_FLAG(infilp), &iolock);
292 if (error) {
293 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
294 return -error;
295 }
296 }
297 xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip,
298 pipe, count, *ppos, ioflags);
299 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
300 if (ret > 0)
301 XFS_STATS_ADD(xs_read_bytes, ret);
302
303 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
304 return ret;
305}
306
307ssize_t
308xfs_splice_write(
309 xfs_inode_t *ip,
310 struct pipe_inode_info *pipe,
311 struct file *outfilp,
312 loff_t *ppos,
313 size_t count,
314 int flags,
315 int ioflags)
316{
317 xfs_mount_t *mp = ip->i_mount;
318 ssize_t ret;
319 struct inode *inode = outfilp->f_mapping->host;
320 xfs_fsize_t isize, new_size;
321
322 XFS_STATS_INC(xs_write_calls);
323 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
324 return -EIO;
325
326 xfs_ilock(ip, XFS_IOLOCK_EXCL);
327
328 if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
329 int iolock = XFS_IOLOCK_EXCL;
330 int error;
331
332 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
333 FILP_DELAY_FLAG(outfilp), &iolock);
334 if (error) {
335 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
336 return -error;
337 }
338 }
339
340 new_size = *ppos + count;
341
342 xfs_ilock(ip, XFS_ILOCK_EXCL);
343 if (new_size > ip->i_size)
344 ip->i_new_size = new_size;
345 xfs_iunlock(ip, XFS_ILOCK_EXCL);
346
347 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip,
348 pipe, count, *ppos, ioflags);
349 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
350 if (ret > 0)
351 XFS_STATS_ADD(xs_write_bytes, ret);
352
353 isize = i_size_read(inode);
354 if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
355 *ppos = isize;
356
357 if (*ppos > ip->i_size) {
358 xfs_ilock(ip, XFS_ILOCK_EXCL);
359 if (*ppos > ip->i_size)
360 ip->i_size = *ppos;
361 xfs_iunlock(ip, XFS_ILOCK_EXCL);
362 }
363
364 if (ip->i_new_size) {
365 xfs_ilock(ip, XFS_ILOCK_EXCL);
366 ip->i_new_size = 0;
367 if (ip->i_d.di_size > ip->i_size)
368 ip->i_d.di_size = ip->i_size;
369 xfs_iunlock(ip, XFS_ILOCK_EXCL);
370 }
371 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
372 return ret;
373}
374
375/*
376 * This routine is called to handle zeroing any space in the last
377 * block of the file that is beyond the EOF. We do this since the
378 * size is being increased without writing anything to that block
379 * and we don't want anyone to read the garbage on the disk.
380 */
381STATIC int /* error (positive) */
382xfs_zero_last_block(
383 xfs_inode_t *ip,
384 xfs_fsize_t offset,
385 xfs_fsize_t isize)
386{
387 xfs_fileoff_t last_fsb;
388 xfs_mount_t *mp = ip->i_mount;
389 int nimaps;
390 int zero_offset;
391 int zero_len;
392 int error = 0;
393 xfs_bmbt_irec_t imap;
394
395 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
396
397 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
398 if (zero_offset == 0) {
399 /*
400 * There are no extra bytes in the last block on disk to
401 * zero, so return.
402 */
403 return 0;
404 }
405
406 last_fsb = XFS_B_TO_FSBT(mp, isize);
407 nimaps = 1;
408 error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
409 &nimaps, NULL, NULL);
410 if (error) {
411 return error;
412 }
413 ASSERT(nimaps > 0);
414 /*
415 * If the block underlying isize is just a hole, then there
416 * is nothing to zero.
417 */
418 if (imap.br_startblock == HOLESTARTBLOCK) {
419 return 0;
420 }
421 /*
422 * Zero the part of the last block beyond the EOF, and write it
423 * out sync. We need to drop the ilock while we do this so we
424 * don't deadlock when the buffer cache calls back to us.
425 */
426 xfs_iunlock(ip, XFS_ILOCK_EXCL);
427
428 zero_len = mp->m_sb.sb_blocksize - zero_offset;
429 if (isize + zero_len > offset)
430 zero_len = offset - isize;
431 error = xfs_iozero(ip, isize, zero_len);
432
433 xfs_ilock(ip, XFS_ILOCK_EXCL);
434 ASSERT(error >= 0);
435 return error;
436}
437
438/*
439 * Zero any on disk space between the current EOF and the new,
440 * larger EOF. This handles the normal case of zeroing the remainder
441 * of the last block in the file and the unusual case of zeroing blocks
442 * out beyond the size of the file. This second case only happens
443 * with fixed size extents and when the system crashes before the inode
444 * size was updated but after blocks were allocated. If fill is set,
445 * then any holes in the range are filled and zeroed. If not, the holes
446 * are left alone as holes.
447 */
448
449int /* error (positive) */
450xfs_zero_eof(
451 xfs_inode_t *ip,
452 xfs_off_t offset, /* starting I/O offset */
453 xfs_fsize_t isize) /* current inode size */
454{
455 xfs_mount_t *mp = ip->i_mount;
456 xfs_fileoff_t start_zero_fsb;
457 xfs_fileoff_t end_zero_fsb;
458 xfs_fileoff_t zero_count_fsb;
459 xfs_fileoff_t last_fsb;
460 xfs_fileoff_t zero_off;
461 xfs_fsize_t zero_len;
462 int nimaps;
463 int error = 0;
464 xfs_bmbt_irec_t imap;
465
466 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
467 ASSERT(offset > isize);
468
469 /*
470 * First handle zeroing the block on which isize resides.
471 * We only zero a part of that block so it is handled specially.
472 */
473 error = xfs_zero_last_block(ip, offset, isize);
474 if (error) {
475 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
476 return error;
477 }
478
479 /*
480 * Calculate the range between the new size and the old
481 * where blocks needing to be zeroed may exist. To get the
482 * block where the last byte in the file currently resides,
483 * we need to subtract one from the size and truncate back
484 * to a block boundary. We subtract 1 in case the size is
485 * exactly on a block boundary.
486 */
487 last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
488 start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
489 end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
490 ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
491 if (last_fsb == end_zero_fsb) {
492 /*
493 * The size was only incremented on its last block.
494 * We took care of that above, so just return.
495 */
496 return 0;
497 }
498
499 ASSERT(start_zero_fsb <= end_zero_fsb);
500 while (start_zero_fsb <= end_zero_fsb) {
501 nimaps = 1;
502 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
503 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
504 0, NULL, 0, &imap, &nimaps, NULL, NULL);
505 if (error) {
506 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
507 return error;
508 }
509 ASSERT(nimaps > 0);
510
511 if (imap.br_state == XFS_EXT_UNWRITTEN ||
512 imap.br_startblock == HOLESTARTBLOCK) {
513 /*
514 * This loop handles initializing pages that were
515 * partially initialized by the code below this
516 * loop. It basically zeroes the part of the page
517 * that sits on a hole and sets the page as P_HOLE
518 * and calls remapf if it is a mapped file.
519 */
520 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
521 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
522 continue;
523 }
524
525 /*
526 * There are blocks we need to zero.
527 * Drop the inode lock while we're doing the I/O.
528 * We'll still have the iolock to protect us.
529 */
530 xfs_iunlock(ip, XFS_ILOCK_EXCL);
531
532 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
533 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
534
535 if ((zero_off + zero_len) > offset)
536 zero_len = offset - zero_off;
537
538 error = xfs_iozero(ip, zero_off, zero_len);
539 if (error) {
540 goto out_lock;
541 }
542
543 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
544 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
545
546 xfs_ilock(ip, XFS_ILOCK_EXCL);
547 }
548
549 return 0;
550
551out_lock:
552 xfs_ilock(ip, XFS_ILOCK_EXCL);
553 ASSERT(error >= 0);
554 return error;
555}
556
557ssize_t /* bytes written, or (-) error */
558xfs_write(
559 struct xfs_inode *xip,
560 struct kiocb *iocb,
561 const struct iovec *iovp,
562 unsigned int nsegs,
563 loff_t *offset,
564 int ioflags)
565{
566 struct file *file = iocb->ki_filp;
567 struct address_space *mapping = file->f_mapping;
568 struct inode *inode = mapping->host;
569 unsigned long segs = nsegs;
570 xfs_mount_t *mp;
571 ssize_t ret = 0, error = 0;
572 xfs_fsize_t isize, new_size;
573 int iolock;
574 int eventsent = 0;
575 size_t ocount = 0, count;
576 loff_t pos;
577 int need_i_mutex;
578
579 XFS_STATS_INC(xs_write_calls);
580
581 error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
582 if (error)
583 return error;
584
585 count = ocount;
586 pos = *offset;
587
588 if (count == 0)
589 return 0;
590
591 mp = xip->i_mount;
592
593 xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
594
595 if (XFS_FORCED_SHUTDOWN(mp))
596 return -EIO;
597
598relock:
599 if (ioflags & IO_ISDIRECT) {
600 iolock = XFS_IOLOCK_SHARED;
601 need_i_mutex = 0;
602 } else {
603 iolock = XFS_IOLOCK_EXCL;
604 need_i_mutex = 1;
605 mutex_lock(&inode->i_mutex);
606 }
607
608 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
609
610start:
611 error = -generic_write_checks(file, &pos, &count,
612 S_ISBLK(inode->i_mode));
613 if (error) {
614 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
615 goto out_unlock_mutex;
616 }
617
618 if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) &&
619 !(ioflags & IO_INVIS) && !eventsent)) {
620 int dmflags = FILP_DELAY_FLAG(file);
621
622 if (need_i_mutex)
623 dmflags |= DM_FLAGS_IMUX;
624
625 xfs_iunlock(xip, XFS_ILOCK_EXCL);
626 error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
627 pos, count, dmflags, &iolock);
628 if (error) {
629 goto out_unlock_internal;
630 }
631 xfs_ilock(xip, XFS_ILOCK_EXCL);
632 eventsent = 1;
633
634 /*
635 * The iolock was dropped and reacquired in XFS_SEND_DATA
636 * so we have to recheck the size when appending.
637 * We will only "goto start;" once, since having sent the
638 * event prevents another call to XFS_SEND_DATA, which is
639 * what allows the size to change in the first place.
640 */
641 if ((file->f_flags & O_APPEND) && pos != xip->i_size)
642 goto start;
643 }
644
645 if (ioflags & IO_ISDIRECT) {
646 xfs_buftarg_t *target =
647 XFS_IS_REALTIME_INODE(xip) ?
648 mp->m_rtdev_targp : mp->m_ddev_targp;
649
650 if ((pos & target->bt_smask) || (count & target->bt_smask)) {
651 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
652 return XFS_ERROR(-EINVAL);
653 }
654
655 if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
656 xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
657 iolock = XFS_IOLOCK_EXCL;
658 need_i_mutex = 1;
659 mutex_lock(&inode->i_mutex);
660 xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
661 goto start;
662 }
663 }
664
665 new_size = pos + count;
666 if (new_size > xip->i_size)
667 xip->i_new_size = new_size;
668
669 if (likely(!(ioflags & IO_INVIS)))
670 file_update_time(file);
671
672 /*
673 * If the offset is beyond the size of the file, we have a couple
674 * of things to do. First, if there is already space allocated
675 * we need to either create holes or zero the disk or ...
676 *
677 * If there is a page where the previous size lands, we need
678 * to zero it out up to the new size.
679 */
680
681 if (pos > xip->i_size) {
682 error = xfs_zero_eof(xip, pos, xip->i_size);
683 if (error) {
684 xfs_iunlock(xip, XFS_ILOCK_EXCL);
685 goto out_unlock_internal;
686 }
687 }
688 xfs_iunlock(xip, XFS_ILOCK_EXCL);
689
690 /*
691 * If we're writing the file then make sure to clear the
692 * setuid and setgid bits if the process is not being run
693 * by root. This keeps people from modifying setuid and
694 * setgid binaries.
695 */
696
697 if (((xip->i_d.di_mode & S_ISUID) ||
698 ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) ==
699 (S_ISGID | S_IXGRP))) &&
700 !capable(CAP_FSETID)) {
701 error = xfs_write_clear_setuid(xip);
702 if (likely(!error))
703 error = -file_remove_suid(file);
704 if (unlikely(error)) {
705 goto out_unlock_internal;
706 }
707 }
708
709 /* We can write back this queue in page reclaim */
710 current->backing_dev_info = mapping->backing_dev_info;
711
712 if ((ioflags & IO_ISDIRECT)) {
713 if (mapping->nrpages) {
714 WARN_ON(need_i_mutex == 0);
715 xfs_inval_cached_trace(xip, pos, -1,
716 (pos & PAGE_CACHE_MASK), -1);
717 error = xfs_flushinval_pages(xip,
718 (pos & PAGE_CACHE_MASK),
719 -1, FI_REMAPF_LOCKED);
720 if (error)
721 goto out_unlock_internal;
722 }
723
724 if (need_i_mutex) {
725 /* demote the lock now the cached pages are gone */
726 xfs_ilock_demote(xip, XFS_IOLOCK_EXCL);
727 mutex_unlock(&inode->i_mutex);
728
729 iolock = XFS_IOLOCK_SHARED;
730 need_i_mutex = 0;
731 }
732
733 xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs,
734 *offset, ioflags);
735 ret = generic_file_direct_write(iocb, iovp,
736 &segs, pos, offset, count, ocount);
737
738 /*
739 * direct-io write to a hole: fall through to buffered I/O
740 * for completing the rest of the request.
741 */
742 if (ret >= 0 && ret != count) {
743 XFS_STATS_ADD(xs_write_bytes, ret);
744
745 pos += ret;
746 count -= ret;
747
748 ioflags &= ~IO_ISDIRECT;
749 xfs_iunlock(xip, iolock);
750 goto relock;
751 }
752 } else {
753 int enospc = 0;
754 ssize_t ret2 = 0;
755
756write_retry:
757 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
758 *offset, ioflags);
759 ret2 = generic_file_buffered_write(iocb, iovp, segs,
760 pos, offset, count, ret);
761 /*
762 * if we just got an ENOSPC, flush the inode now we
763 * aren't holding any page locks and retry *once*
764 */
765 if (ret2 == -ENOSPC && !enospc) {
766 error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
767 if (error)
768 goto out_unlock_internal;
769 enospc = 1;
770 goto write_retry;
771 }
772 ret = ret2;
773 }
774
775 current->backing_dev_info = NULL;
776
777 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
778 ret = wait_on_sync_kiocb(iocb);
779
780 isize = i_size_read(inode);
781 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
782 *offset = isize;
783
784 if (*offset > xip->i_size) {
785 xfs_ilock(xip, XFS_ILOCK_EXCL);
786 if (*offset > xip->i_size)
787 xip->i_size = *offset;
788 xfs_iunlock(xip, XFS_ILOCK_EXCL);
789 }
790
791 if (ret == -ENOSPC &&
792 DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
793 xfs_iunlock(xip, iolock);
794 if (need_i_mutex)
795 mutex_unlock(&inode->i_mutex);
796 error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
797 DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
798 0, 0, 0); /* Delay flag intentionally unused */
799 if (need_i_mutex)
800 mutex_lock(&inode->i_mutex);
801 xfs_ilock(xip, iolock);
802 if (error)
803 goto out_unlock_internal;
804 goto start;
805 }
806
807 error = -ret;
808 if (ret <= 0)
809 goto out_unlock_internal;
810
811 XFS_STATS_ADD(xs_write_bytes, ret);
812
813 /* Handle various SYNC-type writes */
814 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
815 loff_t end = pos + ret - 1;
816 int error2;
817
818 xfs_iunlock(xip, iolock);
819 if (need_i_mutex)
820 mutex_unlock(&inode->i_mutex);
821
822 error2 = filemap_write_and_wait_range(mapping, pos, end);
823 if (!error)
824 error = error2;
825 if (need_i_mutex)
826 mutex_lock(&inode->i_mutex);
827 xfs_ilock(xip, iolock);
828
829 error2 = xfs_fsync(xip);
830 if (!error)
831 error = error2;
832 }
833
834 out_unlock_internal:
835 if (xip->i_new_size) {
836 xfs_ilock(xip, XFS_ILOCK_EXCL);
837 xip->i_new_size = 0;
838 /*
839 * If this was a direct or synchronous I/O that failed (such
840 * as ENOSPC) then part of the I/O may have been written to
841 * disk before the error occured. In this case the on-disk
842 * file size may have been adjusted beyond the in-memory file
843 * size and now needs to be truncated back.
844 */
845 if (xip->i_d.di_size > xip->i_size)
846 xip->i_d.di_size = xip->i_size;
847 xfs_iunlock(xip, XFS_ILOCK_EXCL);
848 }
849 xfs_iunlock(xip, iolock);
850 out_unlock_mutex:
851 if (need_i_mutex)
852 mutex_unlock(&inode->i_mutex);
853 return -error;
854}
855
856/*
857 * All xfs metadata buffers except log state machine buffers
858 * get this attached as their b_bdstrat callback function.
859 * This is so that we can catch a buffer
860 * after prematurely unpinning it to forcibly shutdown the filesystem.
861 */
862int
863xfs_bdstrat_cb(struct xfs_buf *bp)
864{
865 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
866 xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
867 /*
868 * Metadata write that didn't get logged but
869 * written delayed anyway. These aren't associated
870 * with a transaction, and can be ignored.
871 */
872 if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
873 (XFS_BUF_ISREAD(bp)) == 0)
874 return (xfs_bioerror_relse(bp));
875 else
876 return (xfs_bioerror(bp));
877 }
878
879 xfs_buf_iorequest(bp);
880 return 0;
881}
882
883/*
884 * Wrapper around bdstrat so that we can stop data from going to disk in case
885 * we are shutting down the filesystem. Typically user data goes thru this
886 * path; one of the exceptions is the superblock.
887 */
888void
889xfsbdstrat(
890 struct xfs_mount *mp,
891 struct xfs_buf *bp)
892{
893 ASSERT(mp);
894 if (!XFS_FORCED_SHUTDOWN(mp)) {
895 xfs_buf_iorequest(bp);
896 return;
897 }
898
899 xfs_buftrace("XFSBDSTRAT IOERROR", bp);
900 xfs_bioerror_relse(bp);
901}
902
903/*
904 * If the underlying (data/log/rt) device is readonly, there are some
905 * operations that cannot proceed.
906 */
907int
908xfs_dev_is_read_only(
909 xfs_mount_t *mp,
910 char *message)
911{
912 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
913 xfs_readonly_buftarg(mp->m_logdev_targp) ||
914 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
915 cmn_err(CE_NOTE,
916 "XFS: %s required on read-only device.", message);
917 cmn_err(CE_NOTE,
918 "XFS: write access unavailable, cannot proceed.");
919 return EROFS;
920 }
921 return 0;
922}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
deleted file mode 100644
index e6be37dbd0e9..000000000000
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ /dev/null
@@ -1,77 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_LRW_H__
19#define __XFS_LRW_H__
20
21struct xfs_mount;
22struct xfs_inode;
23struct xfs_bmbt_irec;
24struct xfs_buf;
25struct xfs_iomap;
26
27#if defined(XFS_RW_TRACE)
28/*
29 * Defines for the trace mechanisms in xfs_lrw.c.
30 */
31#define XFS_RW_KTRACE_SIZE 128
32
33#define XFS_READ_ENTER 1
34#define XFS_WRITE_ENTER 2
35#define XFS_IOMAP_READ_ENTER 3
36#define XFS_IOMAP_WRITE_ENTER 4
37#define XFS_IOMAP_READ_MAP 5
38#define XFS_IOMAP_WRITE_MAP 6
39#define XFS_IOMAP_WRITE_NOSPACE 7
40#define XFS_ITRUNC_START 8
41#define XFS_ITRUNC_FINISH1 9
42#define XFS_ITRUNC_FINISH2 10
43#define XFS_CTRUNC1 11
44#define XFS_CTRUNC2 12
45#define XFS_CTRUNC3 13
46#define XFS_CTRUNC4 14
47#define XFS_CTRUNC5 15
48#define XFS_CTRUNC6 16
49#define XFS_BUNMAP 17
50#define XFS_INVAL_CACHED 18
51#define XFS_DIORD_ENTER 19
52#define XFS_DIOWR_ENTER 20
53#define XFS_WRITEPAGE_ENTER 22
54#define XFS_RELEASEPAGE_ENTER 23
55#define XFS_INVALIDPAGE_ENTER 24
56#define XFS_IOMAP_ALLOC_ENTER 25
57#define XFS_IOMAP_ALLOC_MAP 26
58#define XFS_IOMAP_UNWRITTEN 27
59#define XFS_SPLICE_READ_ENTER 28
60#define XFS_SPLICE_WRITE_ENTER 29
61extern void xfs_rw_enter_trace(int, struct xfs_inode *,
62 void *, size_t, loff_t, int);
63extern void xfs_inval_cached_trace(struct xfs_inode *,
64 xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
65#else
66#define xfs_rw_enter_trace(tag, ip, data, size, offset, ioflags)
67#define xfs_inval_cached_trace(ip, offset, len, first, last)
68#endif
69
70/* errors from xfsbdstrat() must be extracted from the buffer */
71extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
72extern int xfs_bdstrat_cb(struct xfs_buf *);
73extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
74
75extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
76
77#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 3d4a0c84d634..1947514ce1ad 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -44,20 +44,6 @@ xfs_quota_type(int type)
44} 44}
45 45
46STATIC int 46STATIC int
47xfs_fs_quota_sync(
48 struct super_block *sb,
49 int type)
50{
51 struct xfs_mount *mp = XFS_M(sb);
52
53 if (sb->s_flags & MS_RDONLY)
54 return -EROFS;
55 if (!XFS_IS_QUOTA_RUNNING(mp))
56 return -ENOSYS;
57 return -xfs_sync_data(mp, 0);
58}
59
60STATIC int
61xfs_fs_get_xstate( 47xfs_fs_get_xstate(
62 struct super_block *sb, 48 struct super_block *sb,
63 struct fs_quota_stat *fqs) 49 struct fs_quota_stat *fqs)
@@ -82,8 +68,6 @@ xfs_fs_set_xstate(
82 return -EROFS; 68 return -EROFS;
83 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) 69 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
84 return -ENOSYS; 70 return -ENOSYS;
85 if (!capable(CAP_SYS_ADMIN))
86 return -EPERM;
87 71
88 if (uflags & XFS_QUOTA_UDQ_ACCT) 72 if (uflags & XFS_QUOTA_UDQ_ACCT)
89 flags |= XFS_UQUOTA_ACCT; 73 flags |= XFS_UQUOTA_ACCT;
@@ -144,14 +128,11 @@ xfs_fs_set_xquota(
144 return -ENOSYS; 128 return -ENOSYS;
145 if (!XFS_IS_QUOTA_ON(mp)) 129 if (!XFS_IS_QUOTA_ON(mp))
146 return -ESRCH; 130 return -ESRCH;
147 if (!capable(CAP_SYS_ADMIN))
148 return -EPERM;
149 131
150 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); 132 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
151} 133}
152 134
153const struct quotactl_ops xfs_quotactl_operations = { 135const struct quotactl_ops xfs_quotactl_operations = {
154 .quota_sync = xfs_fs_quota_sync,
155 .get_xstate = xfs_fs_get_xstate, 136 .get_xstate = xfs_fs_get_xstate,
156 .set_xstate = xfs_fs_set_xstate, 137 .set_xstate = xfs_fs_set_xstate,
157 .get_xquota = xfs_fs_get_xquota, 138 .get_xquota = xfs_fs_get_xquota,
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 18a4b8e11df2..29f1edca76de 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -15,6 +15,7 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18
18#include "xfs.h" 19#include "xfs.h"
19#include "xfs_bit.h" 20#include "xfs_bit.h"
20#include "xfs_log.h" 21#include "xfs_log.h"
@@ -52,14 +53,15 @@
52#include "xfs_trans_priv.h" 53#include "xfs_trans_priv.h"
53#include "xfs_filestream.h" 54#include "xfs_filestream.h"
54#include "xfs_da_btree.h" 55#include "xfs_da_btree.h"
55#include "xfs_dir2_trace.h"
56#include "xfs_extfree_item.h" 56#include "xfs_extfree_item.h"
57#include "xfs_mru_cache.h" 57#include "xfs_mru_cache.h"
58#include "xfs_inode_item.h" 58#include "xfs_inode_item.h"
59#include "xfs_sync.h" 59#include "xfs_sync.h"
60#include "xfs_trace.h"
60 61
61#include <linux/namei.h> 62#include <linux/namei.h>
62#include <linux/init.h> 63#include <linux/init.h>
64#include <linux/slab.h>
63#include <linux/mount.h> 65#include <linux/mount.h>
64#include <linux/mempool.h> 66#include <linux/mempool.h>
65#include <linux/writeback.h> 67#include <linux/writeback.h>
@@ -876,12 +878,11 @@ xfsaild(
876{ 878{
877 struct xfs_ail *ailp = data; 879 struct xfs_ail *ailp = data;
878 xfs_lsn_t last_pushed_lsn = 0; 880 xfs_lsn_t last_pushed_lsn = 0;
879 long tout = 0; 881 long tout = 0; /* milliseconds */
880 882
881 while (!kthread_should_stop()) { 883 while (!kthread_should_stop()) {
882 if (tout) 884 schedule_timeout_interruptible(tout ?
883 schedule_timeout_interruptible(msecs_to_jiffies(tout)); 885 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
884 tout = 1000;
885 886
886 /* swsusp */ 887 /* swsusp */
887 try_to_freeze(); 888 try_to_freeze();
@@ -930,13 +931,37 @@ xfs_fs_alloc_inode(
930 */ 931 */
931STATIC void 932STATIC void
932xfs_fs_destroy_inode( 933xfs_fs_destroy_inode(
933 struct inode *inode) 934 struct inode *inode)
934{ 935{
935 xfs_inode_t *ip = XFS_I(inode); 936 struct xfs_inode *ip = XFS_I(inode);
937
938 xfs_itrace_entry(ip);
936 939
937 XFS_STATS_INC(vn_reclaim); 940 XFS_STATS_INC(vn_reclaim);
938 if (xfs_reclaim(ip)) 941
939 panic("%s: cannot reclaim 0x%p\n", __func__, inode); 942 /* bad inode, get out here ASAP */
943 if (is_bad_inode(inode))
944 goto out_reclaim;
945
946 xfs_ioend_wait(ip);
947
948 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
949
950 /*
951 * We should never get here with one of the reclaim flags already set.
952 */
953 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
955
956 /*
957 * We always use background reclaim here because even if the
958 * inode is clean, it still may be under IO and hence we have
959 * to take the flush lock. The background reclaim path handles
960 * this more efficiently than we can here, so simply let background
961 * reclaim tear down all inodes.
962 */
963out_reclaim:
964 xfs_inode_set_reclaim_tag(ip);
940} 965}
941 966
942/* 967/*
@@ -973,7 +998,6 @@ xfs_fs_inode_init_once(
973 998
974 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 999 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
975 "xfsino", ip->i_ino); 1000 "xfsino", ip->i_ino);
976 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
977} 1001}
978 1002
979/* 1003/*
@@ -998,59 +1022,108 @@ xfs_fs_dirty_inode(
998 XFS_I(inode)->i_update_core = 1; 1022 XFS_I(inode)->i_update_core = 1;
999} 1023}
1000 1024
1001/* 1025STATIC int
1002 * Attempt to flush the inode, this will actually fail 1026xfs_log_inode(
1003 * if the inode is pinned, but we dirty the inode again 1027 struct xfs_inode *ip)
1004 * at the point when it is unpinned after a log write, 1028{
1005 * since this is when the inode itself becomes flushable. 1029 struct xfs_mount *mp = ip->i_mount;
1006 */ 1030 struct xfs_trans *tp;
1031 int error;
1032
1033 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1034 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1035 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
1036
1037 if (error) {
1038 xfs_trans_cancel(tp, 0);
1039 /* we need to return with the lock hold shared */
1040 xfs_ilock(ip, XFS_ILOCK_SHARED);
1041 return error;
1042 }
1043
1044 xfs_ilock(ip, XFS_ILOCK_EXCL);
1045
1046 /*
1047 * Note - it's possible that we might have pushed ourselves out of the
1048 * way during trans_reserve which would flush the inode. But there's
1049 * no guarantee that the inode buffer has actually gone out yet (it's
1050 * delwri). Plus the buffer could be pinned anyway if it's part of
1051 * an inode in another recent transaction. So we play it safe and
1052 * fire off the transaction anyway.
1053 */
1054 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1055 xfs_trans_ihold(tp, ip);
1056 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1057 xfs_trans_set_sync(tp);
1058 error = xfs_trans_commit(tp, 0);
1059 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1060
1061 return error;
1062}
1063
1007STATIC int 1064STATIC int
1008xfs_fs_write_inode( 1065xfs_fs_write_inode(
1009 struct inode *inode, 1066 struct inode *inode,
1010 int sync) 1067 struct writeback_control *wbc)
1011{ 1068{
1012 struct xfs_inode *ip = XFS_I(inode); 1069 struct xfs_inode *ip = XFS_I(inode);
1013 struct xfs_mount *mp = ip->i_mount; 1070 struct xfs_mount *mp = ip->i_mount;
1014 int error = 0; 1071 int error = EAGAIN;
1015 1072
1016 xfs_itrace_entry(ip); 1073 xfs_itrace_entry(ip);
1017 1074
1018 if (XFS_FORCED_SHUTDOWN(mp)) 1075 if (XFS_FORCED_SHUTDOWN(mp))
1019 return XFS_ERROR(EIO); 1076 return XFS_ERROR(EIO);
1020 1077
1021 if (sync) { 1078 if (wbc->sync_mode == WB_SYNC_ALL) {
1022 error = xfs_wait_on_pages(ip, 0, -1); 1079 /*
1023 if (error) 1080 * Make sure the inode has hit stable storage. By using the
1081 * log and the fsync transactions we reduce the IOs we have
1082 * to do here from two (log and inode) to just the log.
1083 *
1084 * Note: We still need to do a delwri write of the inode after
1085 * this to flush it to the backing buffer so that bulkstat
1086 * works properly if this is the first time the inode has been
1087 * written. Because we hold the ilock atomically over the
1088 * transaction commit and the inode flush we are guaranteed
1089 * that the inode is not pinned when it returns. If the flush
1090 * lock is already held, then the inode has already been
1091 * flushed once and we don't need to flush it again. Hence
1092 * the code will only flush the inode if it isn't already
1093 * being flushed.
1094 */
1095 xfs_ilock(ip, XFS_ILOCK_SHARED);
1096 if (ip->i_update_core) {
1097 error = xfs_log_inode(ip);
1098 if (error)
1099 goto out_unlock;
1100 }
1101 } else {
1102 /*
1103 * We make this non-blocking if the inode is contended, return
1104 * EAGAIN to indicate to the caller that they did not succeed.
1105 * This prevents the flush path from blocking on inodes inside
1106 * another operation right now, they get caught later by xfs_sync.
1107 */
1108 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1024 goto out; 1109 goto out;
1025 } 1110 }
1026 1111
1027 /* 1112 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1028 * Bypass inodes which have already been cleaned by 1113 goto out_unlock;
1029 * the inode flush clustering code inside xfs_iflush
1030 */
1031 if (xfs_inode_clean(ip))
1032 goto out;
1033 1114
1034 /* 1115 /*
1035 * We make this non-blocking if the inode is contended, return 1116 * Now we have the flush lock and the inode is not pinned, we can check
1036 * EAGAIN to indicate to the caller that they did not succeed. 1117 * if the inode is really clean as we know that there are no pending
1037 * This prevents the flush path from blocking on inodes inside 1118 * transaction completions, it is not waiting on the delayed write
1038 * another operation right now, they get caught later by xfs_sync. 1119 * queue and there is no IO in progress.
1039 */ 1120 */
1040 if (sync) { 1121 if (xfs_inode_clean(ip)) {
1041 xfs_ilock(ip, XFS_ILOCK_SHARED); 1122 xfs_ifunlock(ip);
1042 xfs_iflock(ip); 1123 error = 0;
1043 1124 goto out_unlock;
1044 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
1045 } else {
1046 error = EAGAIN;
1047 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1048 goto out;
1049 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
1050 goto out_unlock;
1051
1052 error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK);
1053 } 1125 }
1126 error = xfs_iflush(ip, 0);
1054 1127
1055 out_unlock: 1128 out_unlock:
1056 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1129 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1075,6 +1148,20 @@ xfs_fs_clear_inode(
1075 XFS_STATS_INC(vn_remove); 1148 XFS_STATS_INC(vn_remove);
1076 XFS_STATS_DEC(vn_active); 1149 XFS_STATS_DEC(vn_active);
1077 1150
1151 /*
1152 * The iolock is used by the file system to coordinate reads,
1153 * writes, and block truncates. Up to this point the lock
1154 * protected concurrent accesses by users of the inode. But
1155 * from here forward we're doing some final processing of the
1156 * inode because we're done with it, and although we reuse the
1157 * iolock for protection it is really a distinct lock class
1158 * (in the lockdep sense) from before. To keep lockdep happy
1159 * (and basically indicate what we are doing), we explicitly
1160 * re-init the iolock here.
1161 */
1162 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
1163 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
1164
1078 xfs_inactive(ip); 1165 xfs_inactive(ip);
1079} 1166}
1080 1167
@@ -1092,8 +1179,6 @@ xfs_fs_put_super(
1092 struct super_block *sb) 1179 struct super_block *sb)
1093{ 1180{
1094 struct xfs_mount *mp = XFS_M(sb); 1181 struct xfs_mount *mp = XFS_M(sb);
1095 struct xfs_inode *rip = mp->m_rootip;
1096 int unmount_event_flags = 0;
1097 1182
1098 xfs_syncd_stop(mp); 1183 xfs_syncd_stop(mp);
1099 1184
@@ -1109,20 +1194,7 @@ xfs_fs_put_super(
1109 xfs_sync_attr(mp, 0); 1194 xfs_sync_attr(mp, 0);
1110 } 1195 }
1111 1196
1112#ifdef HAVE_DMAPI 1197 XFS_SEND_PREUNMOUNT(mp);
1113 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1114 unmount_event_flags =
1115 (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
1116 0 : DM_FLAGS_UNWANTED;
1117 /*
1118 * Ignore error from dmapi here, first unmount is not allowed
1119 * to fail anyway, and second we wouldn't want to fail a
1120 * unmount because of dmapi.
1121 */
1122 XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
1123 NULL, NULL, 0, 0, unmount_event_flags);
1124 }
1125#endif
1126 1198
1127 /* 1199 /*
1128 * Blow away any referenced inode in the filestreams cache. 1200 * Blow away any referenced inode in the filestreams cache.
@@ -1133,13 +1205,11 @@ xfs_fs_put_super(
1133 1205
1134 XFS_bflush(mp->m_ddev_targp); 1206 XFS_bflush(mp->m_ddev_targp);
1135 1207
1136 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1208 XFS_SEND_UNMOUNT(mp);
1137 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
1138 unmount_event_flags);
1139 }
1140 1209
1141 xfs_unmountfs(mp); 1210 xfs_unmountfs(mp);
1142 xfs_freesb(mp); 1211 xfs_freesb(mp);
1212 xfs_inode_shrinker_unregister(mp);
1143 xfs_icsb_destroy_counters(mp); 1213 xfs_icsb_destroy_counters(mp);
1144 xfs_close_devices(mp); 1214 xfs_close_devices(mp);
1145 xfs_dmops_put(mp); 1215 xfs_dmops_put(mp);
@@ -1237,6 +1307,29 @@ xfs_fs_statfs(
1237 return 0; 1307 return 0;
1238} 1308}
1239 1309
1310STATIC void
1311xfs_save_resvblks(struct xfs_mount *mp)
1312{
1313 __uint64_t resblks = 0;
1314
1315 mp->m_resblks_save = mp->m_resblks;
1316 xfs_reserve_blocks(mp, &resblks, NULL);
1317}
1318
1319STATIC void
1320xfs_restore_resvblks(struct xfs_mount *mp)
1321{
1322 __uint64_t resblks;
1323
1324 if (mp->m_resblks_save) {
1325 resblks = mp->m_resblks_save;
1326 mp->m_resblks_save = 0;
1327 } else
1328 resblks = xfs_default_resblks(mp);
1329
1330 xfs_reserve_blocks(mp, &resblks, NULL);
1331}
1332
1240STATIC int 1333STATIC int
1241xfs_fs_remount( 1334xfs_fs_remount(
1242 struct super_block *sb, 1335 struct super_block *sb,
@@ -1316,11 +1409,27 @@ xfs_fs_remount(
1316 } 1409 }
1317 mp->m_update_flags = 0; 1410 mp->m_update_flags = 0;
1318 } 1411 }
1412
1413 /*
1414 * Fill out the reserve pool if it is empty. Use the stashed
1415 * value if it is non-zero, otherwise go with the default.
1416 */
1417 xfs_restore_resvblks(mp);
1319 } 1418 }
1320 1419
1321 /* rw -> ro */ 1420 /* rw -> ro */
1322 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { 1421 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1422 /*
1423 * After we have synced the data but before we sync the
1424 * metadata, we need to free up the reserve block pool so that
1425 * the used block count in the superblock on disk is correct at
1426 * the end of the remount. Stash the current reserve pool size
1427 * so that if we get remounted rw, we can return it to the same
1428 * size.
1429 */
1430
1323 xfs_quiesce_data(mp); 1431 xfs_quiesce_data(mp);
1432 xfs_save_resvblks(mp);
1324 xfs_quiesce_attr(mp); 1433 xfs_quiesce_attr(mp);
1325 mp->m_flags |= XFS_MOUNT_RDONLY; 1434 mp->m_flags |= XFS_MOUNT_RDONLY;
1326 } 1435 }
@@ -1339,11 +1448,22 @@ xfs_fs_freeze(
1339{ 1448{
1340 struct xfs_mount *mp = XFS_M(sb); 1449 struct xfs_mount *mp = XFS_M(sb);
1341 1450
1451 xfs_save_resvblks(mp);
1342 xfs_quiesce_attr(mp); 1452 xfs_quiesce_attr(mp);
1343 return -xfs_fs_log_dummy(mp); 1453 return -xfs_fs_log_dummy(mp);
1344} 1454}
1345 1455
1346STATIC int 1456STATIC int
1457xfs_fs_unfreeze(
1458 struct super_block *sb)
1459{
1460 struct xfs_mount *mp = XFS_M(sb);
1461
1462 xfs_restore_resvblks(mp);
1463 return 0;
1464}
1465
1466STATIC int
1347xfs_fs_show_options( 1467xfs_fs_show_options(
1348 struct seq_file *m, 1468 struct seq_file *m,
1349 struct vfsmount *mnt) 1469 struct vfsmount *mnt)
@@ -1503,9 +1623,9 @@ xfs_fs_fill_super(
1503 if (error) 1623 if (error)
1504 goto fail_vnrele; 1624 goto fail_vnrele;
1505 1625
1506 kfree(mtpt); 1626 xfs_inode_shrinker_register(mp);
1507 1627
1508 xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); 1628 kfree(mtpt);
1509 return 0; 1629 return 0;
1510 1630
1511 out_filestream_unmount: 1631 out_filestream_unmount:
@@ -1567,6 +1687,7 @@ static const struct super_operations xfs_super_operations = {
1567 .put_super = xfs_fs_put_super, 1687 .put_super = xfs_fs_put_super,
1568 .sync_fs = xfs_fs_sync_fs, 1688 .sync_fs = xfs_fs_sync_fs,
1569 .freeze_fs = xfs_fs_freeze, 1689 .freeze_fs = xfs_fs_freeze,
1690 .unfreeze_fs = xfs_fs_unfreeze,
1570 .statfs = xfs_fs_statfs, 1691 .statfs = xfs_fs_statfs,
1571 .remount_fs = xfs_fs_remount, 1692 .remount_fs = xfs_fs_remount,
1572 .show_options = xfs_fs_show_options, 1693 .show_options = xfs_fs_show_options,
@@ -1581,94 +1702,6 @@ static struct file_system_type xfs_fs_type = {
1581}; 1702};
1582 1703
1583STATIC int __init 1704STATIC int __init
1584xfs_alloc_trace_bufs(void)
1585{
1586#ifdef XFS_ALLOC_TRACE
1587 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
1588 if (!xfs_alloc_trace_buf)
1589 goto out;
1590#endif
1591#ifdef XFS_BMAP_TRACE
1592 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
1593 if (!xfs_bmap_trace_buf)
1594 goto out_free_alloc_trace;
1595#endif
1596#ifdef XFS_BTREE_TRACE
1597 xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
1598 KM_MAYFAIL);
1599 if (!xfs_allocbt_trace_buf)
1600 goto out_free_bmap_trace;
1601
1602 xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
1603 if (!xfs_inobt_trace_buf)
1604 goto out_free_allocbt_trace;
1605
1606 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1607 if (!xfs_bmbt_trace_buf)
1608 goto out_free_inobt_trace;
1609#endif
1610#ifdef XFS_ATTR_TRACE
1611 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
1612 if (!xfs_attr_trace_buf)
1613 goto out_free_bmbt_trace;
1614#endif
1615#ifdef XFS_DIR2_TRACE
1616 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
1617 if (!xfs_dir2_trace_buf)
1618 goto out_free_attr_trace;
1619#endif
1620
1621 return 0;
1622
1623#ifdef XFS_DIR2_TRACE
1624 out_free_attr_trace:
1625#endif
1626#ifdef XFS_ATTR_TRACE
1627 ktrace_free(xfs_attr_trace_buf);
1628 out_free_bmbt_trace:
1629#endif
1630#ifdef XFS_BTREE_TRACE
1631 ktrace_free(xfs_bmbt_trace_buf);
1632 out_free_inobt_trace:
1633 ktrace_free(xfs_inobt_trace_buf);
1634 out_free_allocbt_trace:
1635 ktrace_free(xfs_allocbt_trace_buf);
1636 out_free_bmap_trace:
1637#endif
1638#ifdef XFS_BMAP_TRACE
1639 ktrace_free(xfs_bmap_trace_buf);
1640 out_free_alloc_trace:
1641#endif
1642#ifdef XFS_ALLOC_TRACE
1643 ktrace_free(xfs_alloc_trace_buf);
1644 out:
1645#endif
1646 return -ENOMEM;
1647}
1648
1649STATIC void
1650xfs_free_trace_bufs(void)
1651{
1652#ifdef XFS_DIR2_TRACE
1653 ktrace_free(xfs_dir2_trace_buf);
1654#endif
1655#ifdef XFS_ATTR_TRACE
1656 ktrace_free(xfs_attr_trace_buf);
1657#endif
1658#ifdef XFS_BTREE_TRACE
1659 ktrace_free(xfs_bmbt_trace_buf);
1660 ktrace_free(xfs_inobt_trace_buf);
1661 ktrace_free(xfs_allocbt_trace_buf);
1662#endif
1663#ifdef XFS_BMAP_TRACE
1664 ktrace_free(xfs_bmap_trace_buf);
1665#endif
1666#ifdef XFS_ALLOC_TRACE
1667 ktrace_free(xfs_alloc_trace_buf);
1668#endif
1669}
1670
1671STATIC int __init
1672xfs_init_zones(void) 1705xfs_init_zones(void)
1673{ 1706{
1674 1707
@@ -1809,7 +1842,6 @@ init_xfs_fs(void)
1809 printk(KERN_INFO XFS_VERSION_STRING " with " 1842 printk(KERN_INFO XFS_VERSION_STRING " with "
1810 XFS_BUILD_OPTIONS " enabled\n"); 1843 XFS_BUILD_OPTIONS " enabled\n");
1811 1844
1812 ktrace_init(64);
1813 xfs_ioend_init(); 1845 xfs_ioend_init();
1814 xfs_dir_startup(); 1846 xfs_dir_startup();
1815 1847
@@ -1817,13 +1849,9 @@ init_xfs_fs(void)
1817 if (error) 1849 if (error)
1818 goto out; 1850 goto out;
1819 1851
1820 error = xfs_alloc_trace_bufs();
1821 if (error)
1822 goto out_destroy_zones;
1823
1824 error = xfs_mru_cache_init(); 1852 error = xfs_mru_cache_init();
1825 if (error) 1853 if (error)
1826 goto out_free_trace_buffers; 1854 goto out_destroy_zones;
1827 1855
1828 error = xfs_filestream_init(); 1856 error = xfs_filestream_init();
1829 if (error) 1857 if (error)
@@ -1842,6 +1870,7 @@ init_xfs_fs(void)
1842 goto out_cleanup_procfs; 1870 goto out_cleanup_procfs;
1843 1871
1844 vfs_initquota(); 1872 vfs_initquota();
1873 xfs_inode_shrinker_init();
1845 1874
1846 error = register_filesystem(&xfs_fs_type); 1875 error = register_filesystem(&xfs_fs_type);
1847 if (error) 1876 if (error)
@@ -1858,8 +1887,6 @@ init_xfs_fs(void)
1858 xfs_filestream_uninit(); 1887 xfs_filestream_uninit();
1859 out_mru_cache_uninit: 1888 out_mru_cache_uninit:
1860 xfs_mru_cache_uninit(); 1889 xfs_mru_cache_uninit();
1861 out_free_trace_buffers:
1862 xfs_free_trace_bufs();
1863 out_destroy_zones: 1890 out_destroy_zones:
1864 xfs_destroy_zones(); 1891 xfs_destroy_zones();
1865 out: 1892 out:
@@ -1871,14 +1898,13 @@ exit_xfs_fs(void)
1871{ 1898{
1872 vfs_exitquota(); 1899 vfs_exitquota();
1873 unregister_filesystem(&xfs_fs_type); 1900 unregister_filesystem(&xfs_fs_type);
1901 xfs_inode_shrinker_destroy();
1874 xfs_sysctl_unregister(); 1902 xfs_sysctl_unregister();
1875 xfs_cleanup_procfs(); 1903 xfs_cleanup_procfs();
1876 xfs_buf_terminate(); 1904 xfs_buf_terminate();
1877 xfs_filestream_uninit(); 1905 xfs_filestream_uninit();
1878 xfs_mru_cache_uninit(); 1906 xfs_mru_cache_uninit();
1879 xfs_free_trace_bufs();
1880 xfs_destroy_zones(); 1907 xfs_destroy_zones();
1881 ktrace_uninit();
1882} 1908}
1883 1909
1884module_init(init_xfs_fs); 1910module_init(init_xfs_fs);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 18175ebd58ed..233d4b9881b1 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -56,12 +56,6 @@ extern void xfs_qm_exit(void);
56# define XFS_BIGFS_STRING 56# define XFS_BIGFS_STRING
57#endif 57#endif
58 58
59#ifdef CONFIG_XFS_TRACE
60# define XFS_TRACE_STRING "tracing, "
61#else
62# define XFS_TRACE_STRING
63#endif
64
65#ifdef CONFIG_XFS_DMAPI 59#ifdef CONFIG_XFS_DMAPI
66# define XFS_DMAPI_STRING "dmapi support, " 60# define XFS_DMAPI_STRING "dmapi support, "
67#else 61#else
@@ -78,7 +72,6 @@ extern void xfs_qm_exit(void);
78 XFS_SECURITY_STRING \ 72 XFS_SECURITY_STRING \
79 XFS_REALTIME_STRING \ 73 XFS_REALTIME_STRING \
80 XFS_BIGFS_STRING \ 74 XFS_BIGFS_STRING \
81 XFS_TRACE_STRING \
82 XFS_DMAPI_STRING \ 75 XFS_DMAPI_STRING \
83 XFS_DBG_STRING /* DBG must be last */ 76 XFS_DBG_STRING /* DBG must be last */
84 77
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 961df0a22c78..a427c638d909 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -44,6 +44,7 @@
44#include "xfs_inode_item.h" 44#include "xfs_inode_item.h"
45#include "xfs_rw.h" 45#include "xfs_rw.h"
46#include "xfs_quota.h" 46#include "xfs_quota.h"
47#include "xfs_trace.h"
47 48
48#include <linux/kthread.h> 49#include <linux/kthread.h>
49#include <linux/freezer.h> 50#include <linux/freezer.h>
@@ -64,7 +65,6 @@ xfs_inode_ag_lookup(
64 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
65 * the number of objects requested. 66 * the number of objects requested.
66 */ 67 */
67 read_lock(&pag->pag_ici_lock);
68 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
70 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -73,7 +73,7 @@ xfs_inode_ag_lookup(
73 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
74 } 74 }
75 if (!nr_found) 75 if (!nr_found)
76 goto unlock; 76 return NULL;
77 77
78 /* 78 /*
79 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -83,25 +83,21 @@ xfs_inode_ag_lookup(
83 */ 83 */
84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
86 goto unlock; 86 return NULL;
87
88 return ip; 87 return ip;
89
90unlock:
91 read_unlock(&pag->pag_ici_lock);
92 return NULL;
93} 88}
94 89
95STATIC int 90STATIC int
96xfs_inode_ag_walk( 91xfs_inode_ag_walk(
97 struct xfs_mount *mp, 92 struct xfs_mount *mp,
98 xfs_agnumber_t ag, 93 struct xfs_perag *pag,
99 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
100 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
101 int flags, 96 int flags,
102 int tag) 97 int tag,
98 int exclusive,
99 int *nr_to_scan)
103{ 100{
104 struct xfs_perag *pag = &mp->m_perag[ag];
105 uint32_t first_index; 101 uint32_t first_index;
106 int last_error = 0; 102 int last_error = 0;
107 int skipped; 103 int skipped;
@@ -113,10 +109,20 @@ restart:
113 int error = 0; 109 int error = 0;
114 xfs_inode_t *ip; 110 xfs_inode_t *ip;
115 111
112 if (exclusive)
113 write_lock(&pag->pag_ici_lock);
114 else
115 read_lock(&pag->pag_ici_lock);
116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
117 if (!ip) 117 if (!ip) {
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
118 break; 122 break;
123 }
119 124
125 /* execute releases pag->pag_ici_lock */
120 error = execute(ip, pag, flags); 126 error = execute(ip, pag, flags);
121 if (error == EAGAIN) { 127 if (error == EAGAIN) {
122 skipped++; 128 skipped++;
@@ -124,20 +130,17 @@ restart:
124 } 130 }
125 if (error) 131 if (error)
126 last_error = error; 132 last_error = error;
127 /* 133
128 * bail out if the filesystem is corrupted. 134 /* bail out if the filesystem is corrupted. */
129 */
130 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
131 break; 136 break;
132 137
133 } while (1); 138 } while ((*nr_to_scan)--);
134 139
135 if (skipped) { 140 if (skipped) {
136 delay(1); 141 delay(1);
137 goto restart; 142 goto restart;
138 } 143 }
139
140 xfs_put_perag(mp, pag);
141 return last_error; 144 return last_error;
142} 145}
143 146
@@ -147,22 +150,37 @@ xfs_inode_ag_iterator(
147 int (*execute)(struct xfs_inode *ip, 150 int (*execute)(struct xfs_inode *ip,
148 struct xfs_perag *pag, int flags), 151 struct xfs_perag *pag, int flags),
149 int flags, 152 int flags,
150 int tag) 153 int tag,
154 int exclusive,
155 int *nr_to_scan)
151{ 156{
152 int error = 0; 157 int error = 0;
153 int last_error = 0; 158 int last_error = 0;
154 xfs_agnumber_t ag; 159 xfs_agnumber_t ag;
160 int nr;
155 161
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
156 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
157 if (!mp->m_perag[ag].pag_ici_init) 164 struct xfs_perag *pag;
165
166 pag = xfs_perag_get(mp, ag);
167 if (!pag->pag_ici_init) {
168 xfs_perag_put(pag);
158 continue; 169 continue;
159 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 170 }
171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
172 exclusive, &nr);
173 xfs_perag_put(pag);
160 if (error) { 174 if (error) {
161 last_error = error; 175 last_error = error;
162 if (error == EFSCORRUPTED) 176 if (error == EFSCORRUPTED)
163 break; 177 break;
164 } 178 }
179 if (nr <= 0)
180 break;
165 } 181 }
182 if (nr_to_scan)
183 *nr_to_scan = nr;
166 return XFS_ERROR(last_error); 184 return XFS_ERROR(last_error);
167} 185}
168 186
@@ -173,30 +191,31 @@ xfs_sync_inode_valid(
173 struct xfs_perag *pag) 191 struct xfs_perag *pag)
174{ 192{
175 struct inode *inode = VFS_I(ip); 193 struct inode *inode = VFS_I(ip);
194 int error = EFSCORRUPTED;
176 195
177 /* nothing to sync during shutdown */ 196 /* nothing to sync during shutdown */
178 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 197 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
179 read_unlock(&pag->pag_ici_lock); 198 goto out_unlock;
180 return EFSCORRUPTED;
181 }
182 199
183 /* 200 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
184 * If we can't get a reference on the inode, it must be in reclaim. 201 error = ENOENT;
185 * Leave it for the reclaim code to flush. Also avoid inodes that 202 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
186 * haven't been fully initialised. 203 goto out_unlock;
187 */ 204
188 if (!igrab(inode)) { 205 /* If we can't grab the inode, it must on it's way to reclaim. */
189 read_unlock(&pag->pag_ici_lock); 206 if (!igrab(inode))
190 return ENOENT; 207 goto out_unlock;
191 }
192 read_unlock(&pag->pag_ici_lock);
193 208
194 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 209 if (is_bad_inode(inode)) {
195 IRELE(ip); 210 IRELE(ip);
196 return ENOENT; 211 goto out_unlock;
197 } 212 }
198 213
199 return 0; 214 /* inode is valid */
215 error = 0;
216out_unlock:
217 read_unlock(&pag->pag_ici_lock);
218 return error;
200} 219}
201 220
202STATIC int 221STATIC int
@@ -223,7 +242,7 @@ xfs_sync_inode_data(
223 } 242 }
224 243
225 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? 244 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
226 0 : XFS_B_ASYNC, FI_NONE); 245 0 : XBF_ASYNC, FI_NONE);
227 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 246 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
228 247
229 out_wait: 248 out_wait:
@@ -259,8 +278,7 @@ xfs_sync_inode_attr(
259 goto out_unlock; 278 goto out_unlock;
260 } 279 }
261 280
262 error = xfs_iflush(ip, (flags & SYNC_WAIT) ? 281 error = xfs_iflush(ip, flags);
263 XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
264 282
265 out_unlock: 283 out_unlock:
266 xfs_iunlock(ip, XFS_ILOCK_SHARED); 284 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -281,14 +299,11 @@ xfs_sync_data(
281 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 299 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
282 300
283 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 301 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
284 XFS_ICI_NO_TAG); 302 XFS_ICI_NO_TAG, 0, NULL);
285 if (error) 303 if (error)
286 return XFS_ERROR(error); 304 return XFS_ERROR(error);
287 305
288 xfs_log_force(mp, 0, 306 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
289 (flags & SYNC_WAIT) ?
290 XFS_LOG_FORCE | XFS_LOG_SYNC :
291 XFS_LOG_FORCE);
292 return 0; 307 return 0;
293} 308}
294 309
@@ -303,7 +318,7 @@ xfs_sync_attr(
303 ASSERT((flags & ~SYNC_WAIT) == 0); 318 ASSERT((flags & ~SYNC_WAIT) == 0);
304 319
305 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 320 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
306 XFS_ICI_NO_TAG); 321 XFS_ICI_NO_TAG, 0, NULL);
307} 322}
308 323
309STATIC int 324STATIC int
@@ -314,10 +329,6 @@ xfs_commit_dummy_trans(
314 struct xfs_inode *ip = mp->m_rootip; 329 struct xfs_inode *ip = mp->m_rootip;
315 struct xfs_trans *tp; 330 struct xfs_trans *tp;
316 int error; 331 int error;
317 int log_flags = XFS_LOG_FORCE;
318
319 if (flags & SYNC_WAIT)
320 log_flags |= XFS_LOG_SYNC;
321 332
322 /* 333 /*
323 * Put a dummy transaction in the log to tell recovery 334 * Put a dummy transaction in the log to tell recovery
@@ -339,11 +350,11 @@ xfs_commit_dummy_trans(
339 xfs_iunlock(ip, XFS_ILOCK_EXCL); 350 xfs_iunlock(ip, XFS_ILOCK_EXCL);
340 351
341 /* the log force ensures this transaction is pushed to disk */ 352 /* the log force ensures this transaction is pushed to disk */
342 xfs_log_force(mp, 0, log_flags); 353 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
343 return error; 354 return error;
344} 355}
345 356
346int 357STATIC int
347xfs_sync_fsdata( 358xfs_sync_fsdata(
348 struct xfs_mount *mp, 359 struct xfs_mount *mp,
349 int flags) 360 int flags)
@@ -359,7 +370,7 @@ xfs_sync_fsdata(
359 if (flags & SYNC_TRYLOCK) { 370 if (flags & SYNC_TRYLOCK) {
360 ASSERT(!(flags & SYNC_WAIT)); 371 ASSERT(!(flags & SYNC_WAIT));
361 372
362 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); 373 bp = xfs_getsb(mp, XBF_TRYLOCK);
363 if (!bp) 374 if (!bp)
364 goto out; 375 goto out;
365 376
@@ -379,7 +390,7 @@ xfs_sync_fsdata(
379 * become pinned in between there and here. 390 * become pinned in between there and here.
380 */ 391 */
381 if (XFS_BUF_ISPINNED(bp)) 392 if (XFS_BUF_ISPINNED(bp))
382 xfs_log_force(mp, 0, XFS_LOG_FORCE); 393 xfs_log_force(mp, 0);
383 } 394 }
384 395
385 396
@@ -440,9 +451,6 @@ xfs_quiesce_data(
440 xfs_sync_data(mp, SYNC_WAIT); 451 xfs_sync_data(mp, SYNC_WAIT);
441 xfs_qm_sync(mp, SYNC_WAIT); 452 xfs_qm_sync(mp, SYNC_WAIT);
442 453
443 /* drop inode references pinned by filestreams */
444 xfs_filestream_flush(mp);
445
446 /* write superblock and hoover up shutdown errors */ 454 /* write superblock and hoover up shutdown errors */
447 error = xfs_sync_fsdata(mp, SYNC_WAIT); 455 error = xfs_sync_fsdata(mp, SYNC_WAIT);
448 456
@@ -459,16 +467,18 @@ xfs_quiesce_fs(
459{ 467{
460 int count = 0, pincount; 468 int count = 0, pincount;
461 469
470 xfs_reclaim_inodes(mp, 0);
462 xfs_flush_buftarg(mp->m_ddev_targp, 0); 471 xfs_flush_buftarg(mp->m_ddev_targp, 0);
463 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
464 472
465 /* 473 /*
466 * This loop must run at least twice. The first instance of the loop 474 * This loop must run at least twice. The first instance of the loop
467 * will flush most meta data but that will generate more meta data 475 * will flush most meta data but that will generate more meta data
468 * (typically directory updates). Which then must be flushed and 476 * (typically directory updates). Which then must be flushed and
469 * logged before we can write the unmount record. 477 * logged before we can write the unmount record. We also so sync
478 * reclaim of inodes to catch any that the above delwri flush skipped.
470 */ 479 */
471 do { 480 do {
481 xfs_reclaim_inodes(mp, SYNC_WAIT);
472 xfs_sync_attr(mp, SYNC_WAIT); 482 xfs_sync_attr(mp, SYNC_WAIT);
473 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); 483 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
474 if (!pincount) { 484 if (!pincount) {
@@ -567,7 +577,7 @@ xfs_flush_inodes(
567 igrab(inode); 577 igrab(inode);
568 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); 578 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
569 wait_for_completion(&completion); 579 wait_for_completion(&completion);
570 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 580 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
571} 581}
572 582
573/* 583/*
@@ -583,8 +593,8 @@ xfs_sync_worker(
583 int error; 593 int error;
584 594
585 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 595 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
586 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 596 xfs_log_force(mp, 0);
587 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); 597 xfs_reclaim_inodes(mp, 0);
588 /* dgc: errors ignored here */ 598 /* dgc: errors ignored here */
589 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 599 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
590 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); 600 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
@@ -605,7 +615,8 @@ xfssyncd(
605 set_freezable(); 615 set_freezable();
606 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); 616 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
607 for (;;) { 617 for (;;) {
608 timeleft = schedule_timeout_interruptible(timeleft); 618 if (list_empty(&mp->m_sync_list))
619 timeleft = schedule_timeout_interruptible(timeleft);
609 /* swsusp */ 620 /* swsusp */
610 try_to_freeze(); 621 try_to_freeze();
611 if (kthread_should_stop() && list_empty(&mp->m_sync_list)) 622 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
@@ -625,8 +636,7 @@ xfssyncd(
625 list_add_tail(&mp->m_sync_work.w_list, 636 list_add_tail(&mp->m_sync_work.w_list,
626 &mp->m_sync_list); 637 &mp->m_sync_list);
627 } 638 }
628 list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list) 639 list_splice_init(&mp->m_sync_list, &tmp);
629 list_move(&work->w_list, &tmp);
630 spin_unlock(&mp->m_sync_lock); 640 spin_unlock(&mp->m_sync_lock);
631 641
632 list_for_each_entry_safe(work, n, &tmp, w_list) { 642 list_for_each_entry_safe(work, n, &tmp, w_list) {
@@ -663,67 +673,6 @@ xfs_syncd_stop(
663 kthread_stop(mp->m_sync_task); 673 kthread_stop(mp->m_sync_task);
664} 674}
665 675
666int
667xfs_reclaim_inode(
668 xfs_inode_t *ip,
669 int locked,
670 int sync_mode)
671{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674 /* The hash lock here protects a thread in xfs_iget_core from
675 * racing with us on linking the inode back with a vnode.
676 * Once we have the XFS_IRECLAIM flag set it will not touch
677 * us.
678 */
679 write_lock(&pag->pag_ici_lock);
680 spin_lock(&ip->i_flags_lock);
681 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock);
685 if (locked) {
686 xfs_ifunlock(ip);
687 xfs_iunlock(ip, XFS_ILOCK_EXCL);
688 }
689 return -EAGAIN;
690 }
691 __xfs_iflags_set(ip, XFS_IRECLAIM);
692 spin_unlock(&ip->i_flags_lock);
693 write_unlock(&pag->pag_ici_lock);
694 xfs_put_perag(ip->i_mount, pag);
695
696 /*
697 * If the inode is still dirty, then flush it out. If the inode
698 * is not in the AIL, then it will be OK to flush it delwri as
699 * long as xfs_iflush() does not keep any references to the inode.
700 * We leave that decision up to xfs_iflush() since it has the
701 * knowledge of whether it's OK to simply do a delwri flush of
702 * the inode or whether we need to wait until the inode is
703 * pulled from the AIL.
704 * We get the flush lock regardless, though, just to make sure
705 * we don't free it while it is being flushed.
706 */
707 if (!locked) {
708 xfs_ilock(ip, XFS_ILOCK_EXCL);
709 xfs_iflock(ip);
710 }
711
712 /*
713 * In the case of a forced shutdown we rely on xfs_iflush() to
714 * wait for the inode to be unpinned before returning an error.
715 */
716 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
717 /* synchronize with xfs_iflush_done */
718 xfs_iflock(ip);
719 xfs_ifunlock(ip);
720 }
721
722 xfs_iunlock(ip, XFS_ILOCK_EXCL);
723 xfs_ireclaim(ip);
724 return 0;
725}
726
727void 676void
728__xfs_inode_set_reclaim_tag( 677__xfs_inode_set_reclaim_tag(
729 struct xfs_perag *pag, 678 struct xfs_perag *pag,
@@ -732,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
732 radix_tree_tag_set(&pag->pag_ici_root, 681 radix_tree_tag_set(&pag->pag_ici_root,
733 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 682 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
734 XFS_ICI_RECLAIM_TAG); 683 XFS_ICI_RECLAIM_TAG);
684 pag->pag_ici_reclaimable++;
735} 685}
736 686
737/* 687/*
@@ -743,16 +693,17 @@ void
743xfs_inode_set_reclaim_tag( 693xfs_inode_set_reclaim_tag(
744 xfs_inode_t *ip) 694 xfs_inode_t *ip)
745{ 695{
746 xfs_mount_t *mp = ip->i_mount; 696 struct xfs_mount *mp = ip->i_mount;
747 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 697 struct xfs_perag *pag;
748 698
749 read_lock(&pag->pag_ici_lock); 699 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
700 write_lock(&pag->pag_ici_lock);
750 spin_lock(&ip->i_flags_lock); 701 spin_lock(&ip->i_flags_lock);
751 __xfs_inode_set_reclaim_tag(pag, ip); 702 __xfs_inode_set_reclaim_tag(pag, ip);
752 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 703 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
753 spin_unlock(&ip->i_flags_lock); 704 spin_unlock(&ip->i_flags_lock);
754 read_unlock(&pag->pag_ici_lock); 705 write_unlock(&pag->pag_ici_lock);
755 xfs_put_perag(mp, pag); 706 xfs_perag_put(pag);
756} 707}
757 708
758void 709void
@@ -763,22 +714,148 @@ __xfs_inode_clear_reclaim_tag(
763{ 714{
764 radix_tree_tag_clear(&pag->pag_ici_root, 715 radix_tree_tag_clear(&pag->pag_ici_root,
765 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 716 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
717 pag->pag_ici_reclaimable--;
766} 718}
767 719
720/*
721 * Inodes in different states need to be treated differently, and the return
722 * value of xfs_iflush is not sufficient to get this right. The following table
723 * lists the inode states and the reclaim actions necessary for non-blocking
724 * reclaim:
725 *
726 *
727 * inode state iflush ret required action
728 * --------------- ---------- ---------------
729 * bad - reclaim
730 * shutdown EIO unpin and reclaim
731 * clean, unpinned 0 reclaim
732 * stale, unpinned 0 reclaim
733 * clean, pinned(*) 0 requeue
734 * stale, pinned EAGAIN requeue
735 * dirty, delwri ok 0 requeue
736 * dirty, delwri blocked EAGAIN requeue
737 * dirty, sync flush 0 reclaim
738 *
739 * (*) dgc: I don't think the clean, pinned state is possible but it gets
740 * handled anyway given the order of checks implemented.
741 *
742 * As can be seen from the table, the return value of xfs_iflush() is not
743 * sufficient to correctly decide the reclaim action here. The checks in
744 * xfs_iflush() might look like duplicates, but they are not.
745 *
746 * Also, because we get the flush lock first, we know that any inode that has
747 * been flushed delwri has had the flush completed by the time we check that
748 * the inode is clean. The clean inode check needs to be done before flushing
749 * the inode delwri otherwise we would loop forever requeuing clean inodes as
750 * we cannot tell apart a successful delwri flush and a clean inode from the
751 * return value of xfs_iflush().
752 *
753 * Note that because the inode is flushed delayed write by background
754 * writeback, the flush lock may already be held here and waiting on it can
755 * result in very long latencies. Hence for sync reclaims, where we wait on the
756 * flush lock, the caller should push out delayed write inodes first before
757 * trying to reclaim them to minimise the amount of time spent waiting. For
758 * background relaim, we just requeue the inode for the next pass.
759 *
760 * Hence the order of actions after gaining the locks should be:
761 * bad => reclaim
762 * shutdown => unpin and reclaim
763 * pinned, delwri => requeue
764 * pinned, sync => unpin
765 * stale => reclaim
766 * clean => reclaim
767 * dirty, delwri => flush and requeue
768 * dirty, sync => flush, wait and reclaim
769 */
768STATIC int 770STATIC int
769xfs_reclaim_inode_now( 771xfs_reclaim_inode(
770 struct xfs_inode *ip, 772 struct xfs_inode *ip,
771 struct xfs_perag *pag, 773 struct xfs_perag *pag,
772 int flags) 774 int sync_mode)
773{ 775{
774 /* ignore if already under reclaim */ 776 int error = 0;
775 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 777
776 read_unlock(&pag->pag_ici_lock); 778 /*
779 * The radix tree lock here protects a thread in xfs_iget from racing
780 * with us starting reclaim on the inode. Once we have the
781 * XFS_IRECLAIM flag set it will not touch us.
782 */
783 spin_lock(&ip->i_flags_lock);
784 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
785 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
786 /* ignore as it is already under reclaim */
787 spin_unlock(&ip->i_flags_lock);
788 write_unlock(&pag->pag_ici_lock);
777 return 0; 789 return 0;
778 } 790 }
779 read_unlock(&pag->pag_ici_lock); 791 __xfs_iflags_set(ip, XFS_IRECLAIM);
792 spin_unlock(&ip->i_flags_lock);
793 write_unlock(&pag->pag_ici_lock);
794
795 xfs_ilock(ip, XFS_ILOCK_EXCL);
796 if (!xfs_iflock_nowait(ip)) {
797 if (!(sync_mode & SYNC_WAIT))
798 goto out;
799 xfs_iflock(ip);
800 }
801
802 if (is_bad_inode(VFS_I(ip)))
803 goto reclaim;
804 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
805 xfs_iunpin_wait(ip);
806 goto reclaim;
807 }
808 if (xfs_ipincount(ip)) {
809 if (!(sync_mode & SYNC_WAIT)) {
810 xfs_ifunlock(ip);
811 goto out;
812 }
813 xfs_iunpin_wait(ip);
814 }
815 if (xfs_iflags_test(ip, XFS_ISTALE))
816 goto reclaim;
817 if (xfs_inode_clean(ip))
818 goto reclaim;
819
820 /* Now we have an inode that needs flushing */
821 error = xfs_iflush(ip, sync_mode);
822 if (sync_mode & SYNC_WAIT) {
823 xfs_iflock(ip);
824 goto reclaim;
825 }
826
827 /*
828 * When we have to flush an inode but don't have SYNC_WAIT set, we
829 * flush the inode out using a delwri buffer and wait for the next
830 * call into reclaim to find it in a clean state instead of waiting for
831 * it now. We also don't return errors here - if the error is transient
832 * then the next reclaim pass will flush the inode, and if the error
833 * is permanent then the next sync reclaim will reclaim the inode and
834 * pass on the error.
835 */
836 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
837 xfs_fs_cmn_err(CE_WARN, ip->i_mount,
838 "inode 0x%llx background reclaim flush failed with %d",
839 (long long)ip->i_ino, error);
840 }
841out:
842 xfs_iflags_clear(ip, XFS_IRECLAIM);
843 xfs_iunlock(ip, XFS_ILOCK_EXCL);
844 /*
845 * We could return EAGAIN here to make reclaim rescan the inode tree in
846 * a short while. However, this just burns CPU time scanning the tree
847 * waiting for IO to complete and xfssyncd never goes back to the idle
848 * state. Instead, return 0 to let the next scheduled background reclaim
849 * attempt to reclaim the inode again.
850 */
851 return 0;
852
853reclaim:
854 xfs_ifunlock(ip);
855 xfs_iunlock(ip, XFS_ILOCK_EXCL);
856 xfs_ireclaim(ip);
857 return error;
780 858
781 return xfs_reclaim_inode(ip, 0, flags);
782} 859}
783 860
784int 861int
@@ -786,6 +863,94 @@ xfs_reclaim_inodes(
786 xfs_mount_t *mp, 863 xfs_mount_t *mp,
787 int mode) 864 int mode)
788{ 865{
789 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 866 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
790 XFS_ICI_RECLAIM_TAG); 867 XFS_ICI_RECLAIM_TAG, 1, NULL);
868}
869
870/*
871 * Shrinker infrastructure.
872 *
873 * This is all far more complex than it needs to be. It adds a global list of
874 * mounts because the shrinkers can only call a global context. We need to make
875 * the shrinkers pass a context to avoid the need for global state.
876 */
877static LIST_HEAD(xfs_mount_list);
878static struct rw_semaphore xfs_mount_list_lock;
879
880static int
881xfs_reclaim_inode_shrink(
882 int nr_to_scan,
883 gfp_t gfp_mask)
884{
885 struct xfs_mount *mp;
886 struct xfs_perag *pag;
887 xfs_agnumber_t ag;
888 int reclaimable = 0;
889
890 if (nr_to_scan) {
891 if (!(gfp_mask & __GFP_FS))
892 return -1;
893
894 down_read(&xfs_mount_list_lock);
895 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
896 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
897 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
898 if (nr_to_scan <= 0)
899 break;
900 }
901 up_read(&xfs_mount_list_lock);
902 }
903
904 down_read(&xfs_mount_list_lock);
905 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
906 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
907
908 pag = xfs_perag_get(mp, ag);
909 if (!pag->pag_ici_init) {
910 xfs_perag_put(pag);
911 continue;
912 }
913 reclaimable += pag->pag_ici_reclaimable;
914 xfs_perag_put(pag);
915 }
916 }
917 up_read(&xfs_mount_list_lock);
918 return reclaimable;
919}
920
921static struct shrinker xfs_inode_shrinker = {
922 .shrink = xfs_reclaim_inode_shrink,
923 .seeks = DEFAULT_SEEKS,
924};
925
926void __init
927xfs_inode_shrinker_init(void)
928{
929 init_rwsem(&xfs_mount_list_lock);
930 register_shrinker(&xfs_inode_shrinker);
931}
932
933void
934xfs_inode_shrinker_destroy(void)
935{
936 ASSERT(list_empty(&xfs_mount_list));
937 unregister_shrinker(&xfs_inode_shrinker);
938}
939
940void
941xfs_inode_shrinker_register(
942 struct xfs_mount *mp)
943{
944 down_write(&xfs_mount_list_lock);
945 list_add_tail(&mp->m_mplist, &xfs_mount_list);
946 up_write(&xfs_mount_list_lock);
947}
948
949void
950xfs_inode_shrinker_unregister(
951 struct xfs_mount *mp)
952{
953 down_write(&xfs_mount_list_lock);
954 list_del(&mp->m_mplist);
955 up_write(&xfs_mount_list_lock);
791} 956}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 27920eb7a820..cdcbaaca9880 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -37,14 +37,12 @@ void xfs_syncd_stop(struct xfs_mount *mp);
37 37
38int xfs_sync_attr(struct xfs_mount *mp, int flags); 38int xfs_sync_attr(struct xfs_mount *mp, int flags);
39int xfs_sync_data(struct xfs_mount *mp, int flags); 39int xfs_sync_data(struct xfs_mount *mp, int flags);
40int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
41 40
42int xfs_quiesce_data(struct xfs_mount *mp); 41int xfs_quiesce_data(struct xfs_mount *mp);
43void xfs_quiesce_attr(struct xfs_mount *mp); 42void xfs_quiesce_attr(struct xfs_mount *mp);
44 43
45void xfs_flush_inodes(struct xfs_inode *ip); 44void xfs_flush_inodes(struct xfs_inode *ip);
46 45
47int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 46int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 47
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 48void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
@@ -55,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
55int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
56int xfs_inode_ag_iterator(struct xfs_mount *mp, 54int xfs_inode_ag_iterator(struct xfs_mount *mp,
57 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
58 int flags, int tag); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
59 62
60#endif 63#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index c5bc67c4e3bb..7bb5092d6ae4 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -55,170 +55,140 @@ xfs_stats_clear_proc_handler(
55 55
56static ctl_table xfs_table[] = { 56static ctl_table xfs_table[] = {
57 { 57 {
58 .ctl_name = XFS_SGID_INHERIT,
59 .procname = "irix_sgid_inherit", 58 .procname = "irix_sgid_inherit",
60 .data = &xfs_params.sgid_inherit.val, 59 .data = &xfs_params.sgid_inherit.val,
61 .maxlen = sizeof(int), 60 .maxlen = sizeof(int),
62 .mode = 0644, 61 .mode = 0644,
63 .proc_handler = &proc_dointvec_minmax, 62 .proc_handler = proc_dointvec_minmax,
64 .strategy = &sysctl_intvec,
65 .extra1 = &xfs_params.sgid_inherit.min, 63 .extra1 = &xfs_params.sgid_inherit.min,
66 .extra2 = &xfs_params.sgid_inherit.max 64 .extra2 = &xfs_params.sgid_inherit.max
67 }, 65 },
68 { 66 {
69 .ctl_name = XFS_SYMLINK_MODE,
70 .procname = "irix_symlink_mode", 67 .procname = "irix_symlink_mode",
71 .data = &xfs_params.symlink_mode.val, 68 .data = &xfs_params.symlink_mode.val,
72 .maxlen = sizeof(int), 69 .maxlen = sizeof(int),
73 .mode = 0644, 70 .mode = 0644,
74 .proc_handler = &proc_dointvec_minmax, 71 .proc_handler = proc_dointvec_minmax,
75 .strategy = &sysctl_intvec,
76 .extra1 = &xfs_params.symlink_mode.min, 72 .extra1 = &xfs_params.symlink_mode.min,
77 .extra2 = &xfs_params.symlink_mode.max 73 .extra2 = &xfs_params.symlink_mode.max
78 }, 74 },
79 { 75 {
80 .ctl_name = XFS_PANIC_MASK,
81 .procname = "panic_mask", 76 .procname = "panic_mask",
82 .data = &xfs_params.panic_mask.val, 77 .data = &xfs_params.panic_mask.val,
83 .maxlen = sizeof(int), 78 .maxlen = sizeof(int),
84 .mode = 0644, 79 .mode = 0644,
85 .proc_handler = &proc_dointvec_minmax, 80 .proc_handler = proc_dointvec_minmax,
86 .strategy = &sysctl_intvec,
87 .extra1 = &xfs_params.panic_mask.min, 81 .extra1 = &xfs_params.panic_mask.min,
88 .extra2 = &xfs_params.panic_mask.max 82 .extra2 = &xfs_params.panic_mask.max
89 }, 83 },
90 84
91 { 85 {
92 .ctl_name = XFS_ERRLEVEL,
93 .procname = "error_level", 86 .procname = "error_level",
94 .data = &xfs_params.error_level.val, 87 .data = &xfs_params.error_level.val,
95 .maxlen = sizeof(int), 88 .maxlen = sizeof(int),
96 .mode = 0644, 89 .mode = 0644,
97 .proc_handler = &proc_dointvec_minmax, 90 .proc_handler = proc_dointvec_minmax,
98 .strategy = &sysctl_intvec,
99 .extra1 = &xfs_params.error_level.min, 91 .extra1 = &xfs_params.error_level.min,
100 .extra2 = &xfs_params.error_level.max 92 .extra2 = &xfs_params.error_level.max
101 }, 93 },
102 { 94 {
103 .ctl_name = XFS_SYNCD_TIMER,
104 .procname = "xfssyncd_centisecs", 95 .procname = "xfssyncd_centisecs",
105 .data = &xfs_params.syncd_timer.val, 96 .data = &xfs_params.syncd_timer.val,
106 .maxlen = sizeof(int), 97 .maxlen = sizeof(int),
107 .mode = 0644, 98 .mode = 0644,
108 .proc_handler = &proc_dointvec_minmax, 99 .proc_handler = proc_dointvec_minmax,
109 .strategy = &sysctl_intvec,
110 .extra1 = &xfs_params.syncd_timer.min, 100 .extra1 = &xfs_params.syncd_timer.min,
111 .extra2 = &xfs_params.syncd_timer.max 101 .extra2 = &xfs_params.syncd_timer.max
112 }, 102 },
113 { 103 {
114 .ctl_name = XFS_INHERIT_SYNC,
115 .procname = "inherit_sync", 104 .procname = "inherit_sync",
116 .data = &xfs_params.inherit_sync.val, 105 .data = &xfs_params.inherit_sync.val,
117 .maxlen = sizeof(int), 106 .maxlen = sizeof(int),
118 .mode = 0644, 107 .mode = 0644,
119 .proc_handler = &proc_dointvec_minmax, 108 .proc_handler = proc_dointvec_minmax,
120 .strategy = &sysctl_intvec,
121 .extra1 = &xfs_params.inherit_sync.min, 109 .extra1 = &xfs_params.inherit_sync.min,
122 .extra2 = &xfs_params.inherit_sync.max 110 .extra2 = &xfs_params.inherit_sync.max
123 }, 111 },
124 { 112 {
125 .ctl_name = XFS_INHERIT_NODUMP,
126 .procname = "inherit_nodump", 113 .procname = "inherit_nodump",
127 .data = &xfs_params.inherit_nodump.val, 114 .data = &xfs_params.inherit_nodump.val,
128 .maxlen = sizeof(int), 115 .maxlen = sizeof(int),
129 .mode = 0644, 116 .mode = 0644,
130 .proc_handler = &proc_dointvec_minmax, 117 .proc_handler = proc_dointvec_minmax,
131 .strategy = &sysctl_intvec,
132 .extra1 = &xfs_params.inherit_nodump.min, 118 .extra1 = &xfs_params.inherit_nodump.min,
133 .extra2 = &xfs_params.inherit_nodump.max 119 .extra2 = &xfs_params.inherit_nodump.max
134 }, 120 },
135 { 121 {
136 .ctl_name = XFS_INHERIT_NOATIME,
137 .procname = "inherit_noatime", 122 .procname = "inherit_noatime",
138 .data = &xfs_params.inherit_noatim.val, 123 .data = &xfs_params.inherit_noatim.val,
139 .maxlen = sizeof(int), 124 .maxlen = sizeof(int),
140 .mode = 0644, 125 .mode = 0644,
141 .proc_handler = &proc_dointvec_minmax, 126 .proc_handler = proc_dointvec_minmax,
142 .strategy = &sysctl_intvec,
143 .extra1 = &xfs_params.inherit_noatim.min, 127 .extra1 = &xfs_params.inherit_noatim.min,
144 .extra2 = &xfs_params.inherit_noatim.max 128 .extra2 = &xfs_params.inherit_noatim.max
145 }, 129 },
146 { 130 {
147 .ctl_name = XFS_BUF_TIMER,
148 .procname = "xfsbufd_centisecs", 131 .procname = "xfsbufd_centisecs",
149 .data = &xfs_params.xfs_buf_timer.val, 132 .data = &xfs_params.xfs_buf_timer.val,
150 .maxlen = sizeof(int), 133 .maxlen = sizeof(int),
151 .mode = 0644, 134 .mode = 0644,
152 .proc_handler = &proc_dointvec_minmax, 135 .proc_handler = proc_dointvec_minmax,
153 .strategy = &sysctl_intvec,
154 .extra1 = &xfs_params.xfs_buf_timer.min, 136 .extra1 = &xfs_params.xfs_buf_timer.min,
155 .extra2 = &xfs_params.xfs_buf_timer.max 137 .extra2 = &xfs_params.xfs_buf_timer.max
156 }, 138 },
157 { 139 {
158 .ctl_name = XFS_BUF_AGE,
159 .procname = "age_buffer_centisecs", 140 .procname = "age_buffer_centisecs",
160 .data = &xfs_params.xfs_buf_age.val, 141 .data = &xfs_params.xfs_buf_age.val,
161 .maxlen = sizeof(int), 142 .maxlen = sizeof(int),
162 .mode = 0644, 143 .mode = 0644,
163 .proc_handler = &proc_dointvec_minmax, 144 .proc_handler = proc_dointvec_minmax,
164 .strategy = &sysctl_intvec,
165 .extra1 = &xfs_params.xfs_buf_age.min, 145 .extra1 = &xfs_params.xfs_buf_age.min,
166 .extra2 = &xfs_params.xfs_buf_age.max 146 .extra2 = &xfs_params.xfs_buf_age.max
167 }, 147 },
168 { 148 {
169 .ctl_name = XFS_INHERIT_NOSYM,
170 .procname = "inherit_nosymlinks", 149 .procname = "inherit_nosymlinks",
171 .data = &xfs_params.inherit_nosym.val, 150 .data = &xfs_params.inherit_nosym.val,
172 .maxlen = sizeof(int), 151 .maxlen = sizeof(int),
173 .mode = 0644, 152 .mode = 0644,
174 .proc_handler = &proc_dointvec_minmax, 153 .proc_handler = proc_dointvec_minmax,
175 .strategy = &sysctl_intvec,
176 .extra1 = &xfs_params.inherit_nosym.min, 154 .extra1 = &xfs_params.inherit_nosym.min,
177 .extra2 = &xfs_params.inherit_nosym.max 155 .extra2 = &xfs_params.inherit_nosym.max
178 }, 156 },
179 { 157 {
180 .ctl_name = XFS_ROTORSTEP,
181 .procname = "rotorstep", 158 .procname = "rotorstep",
182 .data = &xfs_params.rotorstep.val, 159 .data = &xfs_params.rotorstep.val,
183 .maxlen = sizeof(int), 160 .maxlen = sizeof(int),
184 .mode = 0644, 161 .mode = 0644,
185 .proc_handler = &proc_dointvec_minmax, 162 .proc_handler = proc_dointvec_minmax,
186 .strategy = &sysctl_intvec,
187 .extra1 = &xfs_params.rotorstep.min, 163 .extra1 = &xfs_params.rotorstep.min,
188 .extra2 = &xfs_params.rotorstep.max 164 .extra2 = &xfs_params.rotorstep.max
189 }, 165 },
190 { 166 {
191 .ctl_name = XFS_INHERIT_NODFRG,
192 .procname = "inherit_nodefrag", 167 .procname = "inherit_nodefrag",
193 .data = &xfs_params.inherit_nodfrg.val, 168 .data = &xfs_params.inherit_nodfrg.val,
194 .maxlen = sizeof(int), 169 .maxlen = sizeof(int),
195 .mode = 0644, 170 .mode = 0644,
196 .proc_handler = &proc_dointvec_minmax, 171 .proc_handler = proc_dointvec_minmax,
197 .strategy = &sysctl_intvec,
198 .extra1 = &xfs_params.inherit_nodfrg.min, 172 .extra1 = &xfs_params.inherit_nodfrg.min,
199 .extra2 = &xfs_params.inherit_nodfrg.max 173 .extra2 = &xfs_params.inherit_nodfrg.max
200 }, 174 },
201 { 175 {
202 .ctl_name = XFS_FILESTREAM_TIMER,
203 .procname = "filestream_centisecs", 176 .procname = "filestream_centisecs",
204 .data = &xfs_params.fstrm_timer.val, 177 .data = &xfs_params.fstrm_timer.val,
205 .maxlen = sizeof(int), 178 .maxlen = sizeof(int),
206 .mode = 0644, 179 .mode = 0644,
207 .proc_handler = &proc_dointvec_minmax, 180 .proc_handler = proc_dointvec_minmax,
208 .strategy = &sysctl_intvec,
209 .extra1 = &xfs_params.fstrm_timer.min, 181 .extra1 = &xfs_params.fstrm_timer.min,
210 .extra2 = &xfs_params.fstrm_timer.max, 182 .extra2 = &xfs_params.fstrm_timer.max,
211 }, 183 },
212 /* please keep this the last entry */ 184 /* please keep this the last entry */
213#ifdef CONFIG_PROC_FS 185#ifdef CONFIG_PROC_FS
214 { 186 {
215 .ctl_name = XFS_STATS_CLEAR,
216 .procname = "stats_clear", 187 .procname = "stats_clear",
217 .data = &xfs_params.stats_clear.val, 188 .data = &xfs_params.stats_clear.val,
218 .maxlen = sizeof(int), 189 .maxlen = sizeof(int),
219 .mode = 0644, 190 .mode = 0644,
220 .proc_handler = &xfs_stats_clear_proc_handler, 191 .proc_handler = xfs_stats_clear_proc_handler,
221 .strategy = &sysctl_intvec,
222 .extra1 = &xfs_params.stats_clear.min, 192 .extra1 = &xfs_params.stats_clear.min,
223 .extra2 = &xfs_params.stats_clear.max 193 .extra2 = &xfs_params.stats_clear.max
224 }, 194 },
@@ -229,7 +199,6 @@ static ctl_table xfs_table[] = {
229 199
230static ctl_table xfs_dir_table[] = { 200static ctl_table xfs_dir_table[] = {
231 { 201 {
232 .ctl_name = FS_XFS,
233 .procname = "xfs", 202 .procname = "xfs",
234 .mode = 0555, 203 .mode = 0555,
235 .child = xfs_table 204 .child = xfs_table
@@ -239,7 +208,6 @@ static ctl_table xfs_dir_table[] = {
239 208
240static ctl_table xfs_root_table[] = { 209static ctl_table xfs_root_table[] = {
241 { 210 {
242 .ctl_name = CTL_FS,
243 .procname = "fs", 211 .procname = "fs",
244 .mode = 0555, 212 .mode = 0555,
245 .child = xfs_dir_table 213 .child = xfs_dir_table
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
new file mode 100644
index 000000000000..5a107601e969
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -0,0 +1,59 @@
1/*
2 * Copyright (c) 2009, Christoph Hellwig
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h"
32#include "xfs_dir2_sf.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dinode.h"
35#include "xfs_inode.h"
36#include "xfs_btree.h"
37#include "xfs_dmapi.h"
38#include "xfs_mount.h"
39#include "xfs_ialloc.h"
40#include "xfs_itable.h"
41#include "xfs_alloc.h"
42#include "xfs_bmap.h"
43#include "xfs_attr.h"
44#include "xfs_attr_sf.h"
45#include "xfs_attr_leaf.h"
46#include "xfs_log_priv.h"
47#include "xfs_buf_item.h"
48#include "xfs_quota.h"
49#include "xfs_iomap.h"
50#include "xfs_aops.h"
51#include "quota/xfs_dquot_item.h"
52#include "quota/xfs_dquot.h"
53
54/*
55 * We include this last to have the helpers above available for the trace
56 * event implementations.
57 */
58#define CREATE_TRACE_POINTS
59#include "xfs_trace.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
new file mode 100644
index 000000000000..fcaa62f0799e
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -0,0 +1,1503 @@
1/*
2 * Copyright (c) 2009, Christoph Hellwig
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#undef TRACE_SYSTEM
19#define TRACE_SYSTEM xfs
20
21#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
22#define _TRACE_XFS_H
23
24#include <linux/tracepoint.h>
25
26struct xfs_agf;
27struct xfs_alloc_arg;
28struct xfs_attr_list_context;
29struct xfs_buf_log_item;
30struct xfs_da_args;
31struct xfs_da_node_entry;
32struct xfs_dquot;
33struct xlog_ticket;
34struct log;
35
36DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx),
38 TP_ARGS(ctx),
39 TP_STRUCT__entry(
40 __field(dev_t, dev)
41 __field(xfs_ino_t, ino)
42 __field(u32, hashval)
43 __field(u32, blkno)
44 __field(u32, offset)
45 __field(void *, alist)
46 __field(int, bufsize)
47 __field(int, count)
48 __field(int, firstu)
49 __field(int, dupcnt)
50 __field(int, flags)
51 ),
52 TP_fast_assign(
53 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
54 __entry->ino = ctx->dp->i_ino;
55 __entry->hashval = ctx->cursor->hashval;
56 __entry->blkno = ctx->cursor->blkno;
57 __entry->offset = ctx->cursor->offset;
58 __entry->alist = ctx->alist;
59 __entry->bufsize = ctx->bufsize;
60 __entry->count = ctx->count;
61 __entry->firstu = ctx->firstu;
62 __entry->flags = ctx->flags;
63 ),
64 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
65 "alist 0x%p size %u count %u firstu %u flags %d %s",
66 MAJOR(__entry->dev), MINOR(__entry->dev),
67 __entry->ino,
68 __entry->hashval,
69 __entry->blkno,
70 __entry->offset,
71 __entry->dupcnt,
72 __entry->alist,
73 __entry->bufsize,
74 __entry->count,
75 __entry->firstu,
76 __entry->flags,
77 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
78 )
79)
80
81#define DEFINE_PERAG_REF_EVENT(name) \
82TRACE_EVENT(name, \
83 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
84 unsigned long caller_ip), \
85 TP_ARGS(mp, agno, refcount, caller_ip), \
86 TP_STRUCT__entry( \
87 __field(dev_t, dev) \
88 __field(xfs_agnumber_t, agno) \
89 __field(int, refcount) \
90 __field(unsigned long, caller_ip) \
91 ), \
92 TP_fast_assign( \
93 __entry->dev = mp->m_super->s_dev; \
94 __entry->agno = agno; \
95 __entry->refcount = refcount; \
96 __entry->caller_ip = caller_ip; \
97 ), \
98 TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
99 MAJOR(__entry->dev), MINOR(__entry->dev), \
100 __entry->agno, \
101 __entry->refcount, \
102 (char *)__entry->caller_ip) \
103);
104
105DEFINE_PERAG_REF_EVENT(xfs_perag_get)
106DEFINE_PERAG_REF_EVENT(xfs_perag_put)
107
108#define DEFINE_ATTR_LIST_EVENT(name) \
109DEFINE_EVENT(xfs_attr_list_class, name, \
110 TP_PROTO(struct xfs_attr_list_context *ctx), \
111 TP_ARGS(ctx))
112DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
113DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
114DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
115DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
116DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
117DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
118DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
119DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
120
121TRACE_EVENT(xfs_attr_list_node_descend,
122 TP_PROTO(struct xfs_attr_list_context *ctx,
123 struct xfs_da_node_entry *btree),
124 TP_ARGS(ctx, btree),
125 TP_STRUCT__entry(
126 __field(dev_t, dev)
127 __field(xfs_ino_t, ino)
128 __field(u32, hashval)
129 __field(u32, blkno)
130 __field(u32, offset)
131 __field(void *, alist)
132 __field(int, bufsize)
133 __field(int, count)
134 __field(int, firstu)
135 __field(int, dupcnt)
136 __field(int, flags)
137 __field(u32, bt_hashval)
138 __field(u32, bt_before)
139 ),
140 TP_fast_assign(
141 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
142 __entry->ino = ctx->dp->i_ino;
143 __entry->hashval = ctx->cursor->hashval;
144 __entry->blkno = ctx->cursor->blkno;
145 __entry->offset = ctx->cursor->offset;
146 __entry->alist = ctx->alist;
147 __entry->bufsize = ctx->bufsize;
148 __entry->count = ctx->count;
149 __entry->firstu = ctx->firstu;
150 __entry->flags = ctx->flags;
151 __entry->bt_hashval = be32_to_cpu(btree->hashval);
152 __entry->bt_before = be32_to_cpu(btree->before);
153 ),
154 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
155 "alist 0x%p size %u count %u firstu %u flags %d %s "
156 "node hashval %u, node before %u",
157 MAJOR(__entry->dev), MINOR(__entry->dev),
158 __entry->ino,
159 __entry->hashval,
160 __entry->blkno,
161 __entry->offset,
162 __entry->dupcnt,
163 __entry->alist,
164 __entry->bufsize,
165 __entry->count,
166 __entry->firstu,
167 __entry->flags,
168 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
169 __entry->bt_hashval,
170 __entry->bt_before)
171);
172
173TRACE_EVENT(xfs_iext_insert,
174 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
175 struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
176 TP_ARGS(ip, idx, r, state, caller_ip),
177 TP_STRUCT__entry(
178 __field(dev_t, dev)
179 __field(xfs_ino_t, ino)
180 __field(xfs_extnum_t, idx)
181 __field(xfs_fileoff_t, startoff)
182 __field(xfs_fsblock_t, startblock)
183 __field(xfs_filblks_t, blockcount)
184 __field(xfs_exntst_t, state)
185 __field(int, bmap_state)
186 __field(unsigned long, caller_ip)
187 ),
188 TP_fast_assign(
189 __entry->dev = VFS_I(ip)->i_sb->s_dev;
190 __entry->ino = ip->i_ino;
191 __entry->idx = idx;
192 __entry->startoff = r->br_startoff;
193 __entry->startblock = r->br_startblock;
194 __entry->blockcount = r->br_blockcount;
195 __entry->state = r->br_state;
196 __entry->bmap_state = state;
197 __entry->caller_ip = caller_ip;
198 ),
199 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
200 "offset %lld block %lld count %lld flag %d caller %pf",
201 MAJOR(__entry->dev), MINOR(__entry->dev),
202 __entry->ino,
203 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
204 (long)__entry->idx,
205 __entry->startoff,
206 (__int64_t)__entry->startblock,
207 __entry->blockcount,
208 __entry->state,
209 (char *)__entry->caller_ip)
210);
211
212DECLARE_EVENT_CLASS(xfs_bmap_class,
213 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
214 unsigned long caller_ip),
215 TP_ARGS(ip, idx, state, caller_ip),
216 TP_STRUCT__entry(
217 __field(dev_t, dev)
218 __field(xfs_ino_t, ino)
219 __field(xfs_extnum_t, idx)
220 __field(xfs_fileoff_t, startoff)
221 __field(xfs_fsblock_t, startblock)
222 __field(xfs_filblks_t, blockcount)
223 __field(xfs_exntst_t, state)
224 __field(int, bmap_state)
225 __field(unsigned long, caller_ip)
226 ),
227 TP_fast_assign(
228 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
229 ip->i_afp : &ip->i_df;
230 struct xfs_bmbt_irec r;
231
232 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
233 __entry->dev = VFS_I(ip)->i_sb->s_dev;
234 __entry->ino = ip->i_ino;
235 __entry->idx = idx;
236 __entry->startoff = r.br_startoff;
237 __entry->startblock = r.br_startblock;
238 __entry->blockcount = r.br_blockcount;
239 __entry->state = r.br_state;
240 __entry->bmap_state = state;
241 __entry->caller_ip = caller_ip;
242 ),
243 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
244 "offset %lld block %lld count %lld flag %d caller %pf",
245 MAJOR(__entry->dev), MINOR(__entry->dev),
246 __entry->ino,
247 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
248 (long)__entry->idx,
249 __entry->startoff,
250 (__int64_t)__entry->startblock,
251 __entry->blockcount,
252 __entry->state,
253 (char *)__entry->caller_ip)
254)
255
256#define DEFINE_BMAP_EVENT(name) \
257DEFINE_EVENT(xfs_bmap_class, name, \
258 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
259 unsigned long caller_ip), \
260 TP_ARGS(ip, idx, state, caller_ip))
261DEFINE_BMAP_EVENT(xfs_iext_remove);
262DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
263DEFINE_BMAP_EVENT(xfs_bmap_post_update);
264DEFINE_BMAP_EVENT(xfs_extlist);
265
266DECLARE_EVENT_CLASS(xfs_buf_class,
267 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
268 TP_ARGS(bp, caller_ip),
269 TP_STRUCT__entry(
270 __field(dev_t, dev)
271 __field(xfs_daddr_t, bno)
272 __field(size_t, buffer_length)
273 __field(int, hold)
274 __field(int, pincount)
275 __field(unsigned, lockval)
276 __field(unsigned, flags)
277 __field(unsigned long, caller_ip)
278 ),
279 TP_fast_assign(
280 __entry->dev = bp->b_target->bt_dev;
281 __entry->bno = bp->b_bn;
282 __entry->buffer_length = bp->b_buffer_length;
283 __entry->hold = atomic_read(&bp->b_hold);
284 __entry->pincount = atomic_read(&bp->b_pin_count);
285 __entry->lockval = xfs_buf_lock_value(bp);
286 __entry->flags = bp->b_flags;
287 __entry->caller_ip = caller_ip;
288 ),
289 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
290 "lock %d flags %s caller %pf",
291 MAJOR(__entry->dev), MINOR(__entry->dev),
292 (unsigned long long)__entry->bno,
293 __entry->buffer_length,
294 __entry->hold,
295 __entry->pincount,
296 __entry->lockval,
297 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
298 (void *)__entry->caller_ip)
299)
300
301#define DEFINE_BUF_EVENT(name) \
302DEFINE_EVENT(xfs_buf_class, name, \
303 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
304 TP_ARGS(bp, caller_ip))
305DEFINE_BUF_EVENT(xfs_buf_init);
306DEFINE_BUF_EVENT(xfs_buf_free);
307DEFINE_BUF_EVENT(xfs_buf_hold);
308DEFINE_BUF_EVENT(xfs_buf_rele);
309DEFINE_BUF_EVENT(xfs_buf_pin);
310DEFINE_BUF_EVENT(xfs_buf_unpin);
311DEFINE_BUF_EVENT(xfs_buf_iodone);
312DEFINE_BUF_EVENT(xfs_buf_iorequest);
313DEFINE_BUF_EVENT(xfs_buf_bawrite);
314DEFINE_BUF_EVENT(xfs_buf_bdwrite);
315DEFINE_BUF_EVENT(xfs_buf_lock);
316DEFINE_BUF_EVENT(xfs_buf_lock_done);
317DEFINE_BUF_EVENT(xfs_buf_cond_lock);
318DEFINE_BUF_EVENT(xfs_buf_unlock);
319DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
320DEFINE_BUF_EVENT(xfs_buf_iowait);
321DEFINE_BUF_EVENT(xfs_buf_iowait_done);
322DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
323DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
324DEFINE_BUF_EVENT(xfs_buf_delwri_split);
325DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
326DEFINE_BUF_EVENT(xfs_bdstrat_shut);
327DEFINE_BUF_EVENT(xfs_buf_item_relse);
328DEFINE_BUF_EVENT(xfs_buf_item_iodone);
329DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
330DEFINE_BUF_EVENT(xfs_buf_error_relse);
331DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
332DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
333
334/* not really buffer traces, but the buf provides useful information */
335DEFINE_BUF_EVENT(xfs_btree_corrupt);
336DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
337DEFINE_BUF_EVENT(xfs_reset_dqcounts);
338DEFINE_BUF_EVENT(xfs_inode_item_push);
339
340/* pass flags explicitly */
341DECLARE_EVENT_CLASS(xfs_buf_flags_class,
342 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
343 TP_ARGS(bp, flags, caller_ip),
344 TP_STRUCT__entry(
345 __field(dev_t, dev)
346 __field(xfs_daddr_t, bno)
347 __field(size_t, buffer_length)
348 __field(int, hold)
349 __field(int, pincount)
350 __field(unsigned, lockval)
351 __field(unsigned, flags)
352 __field(unsigned long, caller_ip)
353 ),
354 TP_fast_assign(
355 __entry->dev = bp->b_target->bt_dev;
356 __entry->bno = bp->b_bn;
357 __entry->buffer_length = bp->b_buffer_length;
358 __entry->flags = flags;
359 __entry->hold = atomic_read(&bp->b_hold);
360 __entry->pincount = atomic_read(&bp->b_pin_count);
361 __entry->lockval = xfs_buf_lock_value(bp);
362 __entry->caller_ip = caller_ip;
363 ),
364 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
365 "lock %d flags %s caller %pf",
366 MAJOR(__entry->dev), MINOR(__entry->dev),
367 (unsigned long long)__entry->bno,
368 __entry->buffer_length,
369 __entry->hold,
370 __entry->pincount,
371 __entry->lockval,
372 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
373 (void *)__entry->caller_ip)
374)
375
376#define DEFINE_BUF_FLAGS_EVENT(name) \
377DEFINE_EVENT(xfs_buf_flags_class, name, \
378 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
379 TP_ARGS(bp, flags, caller_ip))
380DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
381DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
382DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
383
384TRACE_EVENT(xfs_buf_ioerror,
385 TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
386 TP_ARGS(bp, error, caller_ip),
387 TP_STRUCT__entry(
388 __field(dev_t, dev)
389 __field(xfs_daddr_t, bno)
390 __field(size_t, buffer_length)
391 __field(unsigned, flags)
392 __field(int, hold)
393 __field(int, pincount)
394 __field(unsigned, lockval)
395 __field(int, error)
396 __field(unsigned long, caller_ip)
397 ),
398 TP_fast_assign(
399 __entry->dev = bp->b_target->bt_dev;
400 __entry->bno = bp->b_bn;
401 __entry->buffer_length = bp->b_buffer_length;
402 __entry->hold = atomic_read(&bp->b_hold);
403 __entry->pincount = atomic_read(&bp->b_pin_count);
404 __entry->lockval = xfs_buf_lock_value(bp);
405 __entry->error = error;
406 __entry->flags = bp->b_flags;
407 __entry->caller_ip = caller_ip;
408 ),
409 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
410 "lock %d error %d flags %s caller %pf",
411 MAJOR(__entry->dev), MINOR(__entry->dev),
412 (unsigned long long)__entry->bno,
413 __entry->buffer_length,
414 __entry->hold,
415 __entry->pincount,
416 __entry->lockval,
417 __entry->error,
418 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
419 (void *)__entry->caller_ip)
420);
421
422DECLARE_EVENT_CLASS(xfs_buf_item_class,
423 TP_PROTO(struct xfs_buf_log_item *bip),
424 TP_ARGS(bip),
425 TP_STRUCT__entry(
426 __field(dev_t, dev)
427 __field(xfs_daddr_t, buf_bno)
428 __field(size_t, buf_len)
429 __field(int, buf_hold)
430 __field(int, buf_pincount)
431 __field(int, buf_lockval)
432 __field(unsigned, buf_flags)
433 __field(unsigned, bli_recur)
434 __field(int, bli_refcount)
435 __field(unsigned, bli_flags)
436 __field(void *, li_desc)
437 __field(unsigned, li_flags)
438 ),
439 TP_fast_assign(
440 __entry->dev = bip->bli_buf->b_target->bt_dev;
441 __entry->bli_flags = bip->bli_flags;
442 __entry->bli_recur = bip->bli_recur;
443 __entry->bli_refcount = atomic_read(&bip->bli_refcount);
444 __entry->buf_bno = bip->bli_buf->b_bn;
445 __entry->buf_len = bip->bli_buf->b_buffer_length;
446 __entry->buf_flags = bip->bli_buf->b_flags;
447 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
448 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
449 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
450 __entry->li_desc = bip->bli_item.li_desc;
451 __entry->li_flags = bip->bli_item.li_flags;
452 ),
453 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
454 "lock %d flags %s recur %d refcount %d bliflags %s "
455 "lidesc 0x%p liflags %s",
456 MAJOR(__entry->dev), MINOR(__entry->dev),
457 (unsigned long long)__entry->buf_bno,
458 __entry->buf_len,
459 __entry->buf_hold,
460 __entry->buf_pincount,
461 __entry->buf_lockval,
462 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
463 __entry->bli_recur,
464 __entry->bli_refcount,
465 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
466 __entry->li_desc,
467 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
468)
469
470#define DEFINE_BUF_ITEM_EVENT(name) \
471DEFINE_EVENT(xfs_buf_item_class, name, \
472 TP_PROTO(struct xfs_buf_log_item *bip), \
473 TP_ARGS(bip))
474DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
475DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
476DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
477DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
478DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
479DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
480DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
481DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
482DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
483DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
484DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
485DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
486DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
487DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
488DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
489DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
490DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
491DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
492DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
493DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
494DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
495DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
496DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
497DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
498DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
499
500DECLARE_EVENT_CLASS(xfs_lock_class,
501 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
502 unsigned long caller_ip),
503 TP_ARGS(ip, lock_flags, caller_ip),
504 TP_STRUCT__entry(
505 __field(dev_t, dev)
506 __field(xfs_ino_t, ino)
507 __field(int, lock_flags)
508 __field(unsigned long, caller_ip)
509 ),
510 TP_fast_assign(
511 __entry->dev = VFS_I(ip)->i_sb->s_dev;
512 __entry->ino = ip->i_ino;
513 __entry->lock_flags = lock_flags;
514 __entry->caller_ip = caller_ip;
515 ),
516 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
517 MAJOR(__entry->dev), MINOR(__entry->dev),
518 __entry->ino,
519 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
520 (void *)__entry->caller_ip)
521)
522
523#define DEFINE_LOCK_EVENT(name) \
524DEFINE_EVENT(xfs_lock_class, name, \
525 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
526 unsigned long caller_ip), \
527 TP_ARGS(ip, lock_flags, caller_ip))
528DEFINE_LOCK_EVENT(xfs_ilock);
529DEFINE_LOCK_EVENT(xfs_ilock_nowait);
530DEFINE_LOCK_EVENT(xfs_ilock_demote);
531DEFINE_LOCK_EVENT(xfs_iunlock);
532
533DECLARE_EVENT_CLASS(xfs_iget_class,
534 TP_PROTO(struct xfs_inode *ip),
535 TP_ARGS(ip),
536 TP_STRUCT__entry(
537 __field(dev_t, dev)
538 __field(xfs_ino_t, ino)
539 ),
540 TP_fast_assign(
541 __entry->dev = VFS_I(ip)->i_sb->s_dev;
542 __entry->ino = ip->i_ino;
543 ),
544 TP_printk("dev %d:%d ino 0x%llx",
545 MAJOR(__entry->dev), MINOR(__entry->dev),
546 __entry->ino)
547)
548
549#define DEFINE_IGET_EVENT(name) \
550DEFINE_EVENT(xfs_iget_class, name, \
551 TP_PROTO(struct xfs_inode *ip), \
552 TP_ARGS(ip))
553DEFINE_IGET_EVENT(xfs_iget_skip);
554DEFINE_IGET_EVENT(xfs_iget_reclaim);
555DEFINE_IGET_EVENT(xfs_iget_found);
556DEFINE_IGET_EVENT(xfs_iget_alloc);
557
558DECLARE_EVENT_CLASS(xfs_inode_class,
559 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
560 TP_ARGS(ip, caller_ip),
561 TP_STRUCT__entry(
562 __field(dev_t, dev)
563 __field(xfs_ino_t, ino)
564 __field(int, count)
565 __field(unsigned long, caller_ip)
566 ),
567 TP_fast_assign(
568 __entry->dev = VFS_I(ip)->i_sb->s_dev;
569 __entry->ino = ip->i_ino;
570 __entry->count = atomic_read(&VFS_I(ip)->i_count);
571 __entry->caller_ip = caller_ip;
572 ),
573 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
574 MAJOR(__entry->dev), MINOR(__entry->dev),
575 __entry->ino,
576 __entry->count,
577 (char *)__entry->caller_ip)
578)
579
580#define DEFINE_INODE_EVENT(name) \
581DEFINE_EVENT(xfs_inode_class, name, \
582 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
583 TP_ARGS(ip, caller_ip))
584DEFINE_INODE_EVENT(xfs_ihold);
585DEFINE_INODE_EVENT(xfs_irele);
586/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
587DEFINE_INODE_EVENT(xfs_inode);
588#define xfs_itrace_entry(ip) \
589 trace_xfs_inode(ip, _THIS_IP_)
590
591DECLARE_EVENT_CLASS(xfs_dquot_class,
592 TP_PROTO(struct xfs_dquot *dqp),
593 TP_ARGS(dqp),
594 TP_STRUCT__entry(
595 __field(dev_t, dev)
596 __field(u32, id)
597 __field(unsigned, flags)
598 __field(unsigned, nrefs)
599 __field(unsigned long long, res_bcount)
600 __field(unsigned long long, bcount)
601 __field(unsigned long long, icount)
602 __field(unsigned long long, blk_hardlimit)
603 __field(unsigned long long, blk_softlimit)
604 __field(unsigned long long, ino_hardlimit)
605 __field(unsigned long long, ino_softlimit)
606 ), \
607 TP_fast_assign(
608 __entry->dev = dqp->q_mount->m_super->s_dev;
609 __entry->id = be32_to_cpu(dqp->q_core.d_id);
610 __entry->flags = dqp->dq_flags;
611 __entry->nrefs = dqp->q_nrefs;
612 __entry->res_bcount = dqp->q_res_bcount;
613 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
614 __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
615 __entry->blk_hardlimit =
616 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
617 __entry->blk_softlimit =
618 be64_to_cpu(dqp->q_core.d_blk_softlimit);
619 __entry->ino_hardlimit =
620 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
621 __entry->ino_softlimit =
622 be64_to_cpu(dqp->q_core.d_ino_softlimit);
623 ),
624 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
625 "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
626 "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
627 MAJOR(__entry->dev), MINOR(__entry->dev),
628 __entry->id,
629 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
630 __entry->nrefs,
631 __entry->res_bcount,
632 __entry->bcount,
633 __entry->blk_hardlimit,
634 __entry->blk_softlimit,
635 __entry->icount,
636 __entry->ino_hardlimit,
637 __entry->ino_softlimit)
638)
639
640#define DEFINE_DQUOT_EVENT(name) \
641DEFINE_EVENT(xfs_dquot_class, name, \
642 TP_PROTO(struct xfs_dquot *dqp), \
643 TP_ARGS(dqp))
644DEFINE_DQUOT_EVENT(xfs_dqadjust);
645DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
646DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
647DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
648DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
649DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
650DEFINE_DQUOT_EVENT(xfs_dqattach_found);
651DEFINE_DQUOT_EVENT(xfs_dqattach_get);
652DEFINE_DQUOT_EVENT(xfs_dqinit);
653DEFINE_DQUOT_EVENT(xfs_dqreuse);
654DEFINE_DQUOT_EVENT(xfs_dqalloc);
655DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
656DEFINE_DQUOT_EVENT(xfs_dqread);
657DEFINE_DQUOT_EVENT(xfs_dqread_fail);
658DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
659DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
660DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
661DEFINE_DQUOT_EVENT(xfs_dqlookup_move);
662DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
663DEFINE_DQUOT_EVENT(xfs_dqget_hit);
664DEFINE_DQUOT_EVENT(xfs_dqget_miss);
665DEFINE_DQUOT_EVENT(xfs_dqput);
666DEFINE_DQUOT_EVENT(xfs_dqput_wait);
667DEFINE_DQUOT_EVENT(xfs_dqput_free);
668DEFINE_DQUOT_EVENT(xfs_dqrele);
669DEFINE_DQUOT_EVENT(xfs_dqflush);
670DEFINE_DQUOT_EVENT(xfs_dqflush_force);
671DEFINE_DQUOT_EVENT(xfs_dqflush_done);
672/* not really iget events, but we re-use the format */
673DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
674DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
675
676DECLARE_EVENT_CLASS(xfs_loggrant_class,
677 TP_PROTO(struct log *log, struct xlog_ticket *tic),
678 TP_ARGS(log, tic),
679 TP_STRUCT__entry(
680 __field(dev_t, dev)
681 __field(unsigned, trans_type)
682 __field(char, ocnt)
683 __field(char, cnt)
684 __field(int, curr_res)
685 __field(int, unit_res)
686 __field(unsigned int, flags)
687 __field(void *, reserve_headq)
688 __field(void *, write_headq)
689 __field(int, grant_reserve_cycle)
690 __field(int, grant_reserve_bytes)
691 __field(int, grant_write_cycle)
692 __field(int, grant_write_bytes)
693 __field(int, curr_cycle)
694 __field(int, curr_block)
695 __field(xfs_lsn_t, tail_lsn)
696 ),
697 TP_fast_assign(
698 __entry->dev = log->l_mp->m_super->s_dev;
699 __entry->trans_type = tic->t_trans_type;
700 __entry->ocnt = tic->t_ocnt;
701 __entry->cnt = tic->t_cnt;
702 __entry->curr_res = tic->t_curr_res;
703 __entry->unit_res = tic->t_unit_res;
704 __entry->flags = tic->t_flags;
705 __entry->reserve_headq = log->l_reserve_headq;
706 __entry->write_headq = log->l_write_headq;
707 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
708 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
709 __entry->grant_write_cycle = log->l_grant_write_cycle;
710 __entry->grant_write_bytes = log->l_grant_write_bytes;
711 __entry->curr_cycle = log->l_curr_cycle;
712 __entry->curr_block = log->l_curr_block;
713 __entry->tail_lsn = log->l_tail_lsn;
714 ),
715 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
716 "t_unit_res %u t_flags %s reserve_headq 0x%p "
717 "write_headq 0x%p grant_reserve_cycle %d "
718 "grant_reserve_bytes %d grant_write_cycle %d "
719 "grant_write_bytes %d curr_cycle %d curr_block %d "
720 "tail_cycle %d tail_block %d",
721 MAJOR(__entry->dev), MINOR(__entry->dev),
722 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
723 __entry->ocnt,
724 __entry->cnt,
725 __entry->curr_res,
726 __entry->unit_res,
727 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
728 __entry->reserve_headq,
729 __entry->write_headq,
730 __entry->grant_reserve_cycle,
731 __entry->grant_reserve_bytes,
732 __entry->grant_write_cycle,
733 __entry->grant_write_bytes,
734 __entry->curr_cycle,
735 __entry->curr_block,
736 CYCLE_LSN(__entry->tail_lsn),
737 BLOCK_LSN(__entry->tail_lsn)
738 )
739)
740
741#define DEFINE_LOGGRANT_EVENT(name) \
742DEFINE_EVENT(xfs_loggrant_class, name, \
743 TP_PROTO(struct log *log, struct xlog_ticket *tic), \
744 TP_ARGS(log, tic))
745DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
746DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
747DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
748DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
749DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
750DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
751DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
752DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
753DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
754DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
755DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
756DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
757DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
758DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
759DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
760DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
761DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
762DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
763DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
764DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
765DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
766DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
767DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
768DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
769
770#define DEFINE_RW_EVENT(name) \
771TRACE_EVENT(name, \
772 TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
773 TP_ARGS(ip, count, offset, flags), \
774 TP_STRUCT__entry( \
775 __field(dev_t, dev) \
776 __field(xfs_ino_t, ino) \
777 __field(xfs_fsize_t, size) \
778 __field(xfs_fsize_t, new_size) \
779 __field(loff_t, offset) \
780 __field(size_t, count) \
781 __field(int, flags) \
782 ), \
783 TP_fast_assign( \
784 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
785 __entry->ino = ip->i_ino; \
786 __entry->size = ip->i_d.di_size; \
787 __entry->new_size = ip->i_new_size; \
788 __entry->offset = offset; \
789 __entry->count = count; \
790 __entry->flags = flags; \
791 ), \
792 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
793 "offset 0x%llx count 0x%zx ioflags %s", \
794 MAJOR(__entry->dev), MINOR(__entry->dev), \
795 __entry->ino, \
796 __entry->size, \
797 __entry->new_size, \
798 __entry->offset, \
799 __entry->count, \
800 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
801)
802DEFINE_RW_EVENT(xfs_file_read);
803DEFINE_RW_EVENT(xfs_file_buffered_write);
804DEFINE_RW_EVENT(xfs_file_direct_write);
805DEFINE_RW_EVENT(xfs_file_splice_read);
806DEFINE_RW_EVENT(xfs_file_splice_write);
807
808
809#define DEFINE_PAGE_EVENT(name) \
810TRACE_EVENT(name, \
811 TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
812 TP_ARGS(inode, page, off), \
813 TP_STRUCT__entry( \
814 __field(dev_t, dev) \
815 __field(xfs_ino_t, ino) \
816 __field(pgoff_t, pgoff) \
817 __field(loff_t, size) \
818 __field(unsigned long, offset) \
819 __field(int, delalloc) \
820 __field(int, unmapped) \
821 __field(int, unwritten) \
822 ), \
823 TP_fast_assign( \
824 int delalloc = -1, unmapped = -1, unwritten = -1; \
825 \
826 if (page_has_buffers(page)) \
827 xfs_count_page_state(page, &delalloc, \
828 &unmapped, &unwritten); \
829 __entry->dev = inode->i_sb->s_dev; \
830 __entry->ino = XFS_I(inode)->i_ino; \
831 __entry->pgoff = page_offset(page); \
832 __entry->size = i_size_read(inode); \
833 __entry->offset = off; \
834 __entry->delalloc = delalloc; \
835 __entry->unmapped = unmapped; \
836 __entry->unwritten = unwritten; \
837 ), \
838 TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \
839 "delalloc %d unmapped %d unwritten %d", \
840 MAJOR(__entry->dev), MINOR(__entry->dev), \
841 __entry->ino, \
842 __entry->pgoff, \
843 __entry->size, \
844 __entry->offset, \
845 __entry->delalloc, \
846 __entry->unmapped, \
847 __entry->unwritten) \
848)
849DEFINE_PAGE_EVENT(xfs_writepage);
850DEFINE_PAGE_EVENT(xfs_releasepage);
851DEFINE_PAGE_EVENT(xfs_invalidatepage);
852
853#define DEFINE_IOMAP_EVENT(name) \
854TRACE_EVENT(name, \
855 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
856 int flags, struct xfs_bmbt_irec *irec), \
857 TP_ARGS(ip, offset, count, flags, irec), \
858 TP_STRUCT__entry( \
859 __field(dev_t, dev) \
860 __field(xfs_ino_t, ino) \
861 __field(loff_t, size) \
862 __field(loff_t, new_size) \
863 __field(loff_t, offset) \
864 __field(size_t, count) \
865 __field(int, flags) \
866 __field(xfs_fileoff_t, startoff) \
867 __field(xfs_fsblock_t, startblock) \
868 __field(xfs_filblks_t, blockcount) \
869 ), \
870 TP_fast_assign( \
871 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
872 __entry->ino = ip->i_ino; \
873 __entry->size = ip->i_d.di_size; \
874 __entry->new_size = ip->i_new_size; \
875 __entry->offset = offset; \
876 __entry->count = count; \
877 __entry->flags = flags; \
878 __entry->startoff = irec ? irec->br_startoff : 0; \
879 __entry->startblock = irec ? irec->br_startblock : 0; \
880 __entry->blockcount = irec ? irec->br_blockcount : 0; \
881 ), \
882 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
883 "offset 0x%llx count %zd flags %s " \
884 "startoff 0x%llx startblock %lld blockcount 0x%llx", \
885 MAJOR(__entry->dev), MINOR(__entry->dev), \
886 __entry->ino, \
887 __entry->size, \
888 __entry->new_size, \
889 __entry->offset, \
890 __entry->count, \
891 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
892 __entry->startoff, \
893 (__int64_t)__entry->startblock, \
894 __entry->blockcount) \
895)
896DEFINE_IOMAP_EVENT(xfs_iomap_enter);
897DEFINE_IOMAP_EVENT(xfs_iomap_found);
898DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
899
900#define DEFINE_SIMPLE_IO_EVENT(name) \
901TRACE_EVENT(name, \
902 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
903 TP_ARGS(ip, offset, count), \
904 TP_STRUCT__entry( \
905 __field(dev_t, dev) \
906 __field(xfs_ino_t, ino) \
907 __field(loff_t, size) \
908 __field(loff_t, new_size) \
909 __field(loff_t, offset) \
910 __field(size_t, count) \
911 ), \
912 TP_fast_assign( \
913 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
914 __entry->ino = ip->i_ino; \
915 __entry->size = ip->i_d.di_size; \
916 __entry->new_size = ip->i_new_size; \
917 __entry->offset = offset; \
918 __entry->count = count; \
919 ), \
920 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
921 "offset 0x%llx count %zd", \
922 MAJOR(__entry->dev), MINOR(__entry->dev), \
923 __entry->ino, \
924 __entry->size, \
925 __entry->new_size, \
926 __entry->offset, \
927 __entry->count) \
928);
929DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
930DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
931
932
933TRACE_EVENT(xfs_itruncate_start,
934 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
935 xfs_off_t toss_start, xfs_off_t toss_finish),
936 TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
937 TP_STRUCT__entry(
938 __field(dev_t, dev)
939 __field(xfs_ino_t, ino)
940 __field(xfs_fsize_t, size)
941 __field(xfs_fsize_t, new_size)
942 __field(xfs_off_t, toss_start)
943 __field(xfs_off_t, toss_finish)
944 __field(int, flag)
945 ),
946 TP_fast_assign(
947 __entry->dev = VFS_I(ip)->i_sb->s_dev;
948 __entry->ino = ip->i_ino;
949 __entry->size = ip->i_d.di_size;
950 __entry->new_size = new_size;
951 __entry->toss_start = toss_start;
952 __entry->toss_finish = toss_finish;
953 __entry->flag = flag;
954 ),
955 TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
956 "toss start 0x%llx toss finish 0x%llx",
957 MAJOR(__entry->dev), MINOR(__entry->dev),
958 __entry->ino,
959 __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
960 __entry->size,
961 __entry->new_size,
962 __entry->toss_start,
963 __entry->toss_finish)
964);
965
966DECLARE_EVENT_CLASS(xfs_itrunc_class,
967 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
968 TP_ARGS(ip, new_size),
969 TP_STRUCT__entry(
970 __field(dev_t, dev)
971 __field(xfs_ino_t, ino)
972 __field(xfs_fsize_t, size)
973 __field(xfs_fsize_t, new_size)
974 ),
975 TP_fast_assign(
976 __entry->dev = VFS_I(ip)->i_sb->s_dev;
977 __entry->ino = ip->i_ino;
978 __entry->size = ip->i_d.di_size;
979 __entry->new_size = new_size;
980 ),
981 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
982 MAJOR(__entry->dev), MINOR(__entry->dev),
983 __entry->ino,
984 __entry->size,
985 __entry->new_size)
986)
987
988#define DEFINE_ITRUNC_EVENT(name) \
989DEFINE_EVENT(xfs_itrunc_class, name, \
990 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
991 TP_ARGS(ip, new_size))
992DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
993DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
994
995TRACE_EVENT(xfs_pagecache_inval,
996 TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
997 TP_ARGS(ip, start, finish),
998 TP_STRUCT__entry(
999 __field(dev_t, dev)
1000 __field(xfs_ino_t, ino)
1001 __field(xfs_fsize_t, size)
1002 __field(xfs_off_t, start)
1003 __field(xfs_off_t, finish)
1004 ),
1005 TP_fast_assign(
1006 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1007 __entry->ino = ip->i_ino;
1008 __entry->size = ip->i_d.di_size;
1009 __entry->start = start;
1010 __entry->finish = finish;
1011 ),
1012 TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
1013 MAJOR(__entry->dev), MINOR(__entry->dev),
1014 __entry->ino,
1015 __entry->size,
1016 __entry->start,
1017 __entry->finish)
1018);
1019
1020TRACE_EVENT(xfs_bunmap,
1021 TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
1022 int flags, unsigned long caller_ip),
1023 TP_ARGS(ip, bno, len, flags, caller_ip),
1024 TP_STRUCT__entry(
1025 __field(dev_t, dev)
1026 __field(xfs_ino_t, ino)
1027 __field(xfs_fsize_t, size)
1028 __field(xfs_fileoff_t, bno)
1029 __field(xfs_filblks_t, len)
1030 __field(unsigned long, caller_ip)
1031 __field(int, flags)
1032 ),
1033 TP_fast_assign(
1034 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1035 __entry->ino = ip->i_ino;
1036 __entry->size = ip->i_d.di_size;
1037 __entry->bno = bno;
1038 __entry->len = len;
1039 __entry->caller_ip = caller_ip;
1040 __entry->flags = flags;
1041 ),
1042 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
1043 "flags %s caller %pf",
1044 MAJOR(__entry->dev), MINOR(__entry->dev),
1045 __entry->ino,
1046 __entry->size,
1047 __entry->bno,
1048 __entry->len,
1049 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
1050 (void *)__entry->caller_ip)
1051
1052);
1053
1054TRACE_EVENT(xfs_alloc_busy,
1055 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1056 xfs_extlen_t len, int slot),
1057 TP_ARGS(mp, agno, agbno, len, slot),
1058 TP_STRUCT__entry(
1059 __field(dev_t, dev)
1060 __field(xfs_agnumber_t, agno)
1061 __field(xfs_agblock_t, agbno)
1062 __field(xfs_extlen_t, len)
1063 __field(int, slot)
1064 ),
1065 TP_fast_assign(
1066 __entry->dev = mp->m_super->s_dev;
1067 __entry->agno = agno;
1068 __entry->agbno = agbno;
1069 __entry->len = len;
1070 __entry->slot = slot;
1071 ),
1072 TP_printk("dev %d:%d agno %u agbno %u len %u slot %d",
1073 MAJOR(__entry->dev), MINOR(__entry->dev),
1074 __entry->agno,
1075 __entry->agbno,
1076 __entry->len,
1077 __entry->slot)
1078
1079);
1080
1081#define XFS_BUSY_STATES \
1082 { 0, "found" }, \
1083 { 1, "missing" }
1084
1085TRACE_EVENT(xfs_alloc_unbusy,
1086 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1087 int slot, int found),
1088 TP_ARGS(mp, agno, slot, found),
1089 TP_STRUCT__entry(
1090 __field(dev_t, dev)
1091 __field(xfs_agnumber_t, agno)
1092 __field(int, slot)
1093 __field(int, found)
1094 ),
1095 TP_fast_assign(
1096 __entry->dev = mp->m_super->s_dev;
1097 __entry->agno = agno;
1098 __entry->slot = slot;
1099 __entry->found = found;
1100 ),
1101 TP_printk("dev %d:%d agno %u slot %d %s",
1102 MAJOR(__entry->dev), MINOR(__entry->dev),
1103 __entry->agno,
1104 __entry->slot,
1105 __print_symbolic(__entry->found, XFS_BUSY_STATES))
1106);
1107
1108TRACE_EVENT(xfs_alloc_busysearch,
1109 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1110 xfs_extlen_t len, xfs_lsn_t lsn),
1111 TP_ARGS(mp, agno, agbno, len, lsn),
1112 TP_STRUCT__entry(
1113 __field(dev_t, dev)
1114 __field(xfs_agnumber_t, agno)
1115 __field(xfs_agblock_t, agbno)
1116 __field(xfs_extlen_t, len)
1117 __field(xfs_lsn_t, lsn)
1118 ),
1119 TP_fast_assign(
1120 __entry->dev = mp->m_super->s_dev;
1121 __entry->agno = agno;
1122 __entry->agbno = agbno;
1123 __entry->len = len;
1124 __entry->lsn = lsn;
1125 ),
1126 TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
1127 MAJOR(__entry->dev), MINOR(__entry->dev),
1128 __entry->agno,
1129 __entry->agbno,
1130 __entry->len,
1131 __entry->lsn)
1132);
1133
1134TRACE_EVENT(xfs_agf,
1135 TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
1136 unsigned long caller_ip),
1137 TP_ARGS(mp, agf, flags, caller_ip),
1138 TP_STRUCT__entry(
1139 __field(dev_t, dev)
1140 __field(xfs_agnumber_t, agno)
1141 __field(int, flags)
1142 __field(__u32, length)
1143 __field(__u32, bno_root)
1144 __field(__u32, cnt_root)
1145 __field(__u32, bno_level)
1146 __field(__u32, cnt_level)
1147 __field(__u32, flfirst)
1148 __field(__u32, fllast)
1149 __field(__u32, flcount)
1150 __field(__u32, freeblks)
1151 __field(__u32, longest)
1152 __field(unsigned long, caller_ip)
1153 ),
1154 TP_fast_assign(
1155 __entry->dev = mp->m_super->s_dev;
1156 __entry->agno = be32_to_cpu(agf->agf_seqno),
1157 __entry->flags = flags;
1158 __entry->length = be32_to_cpu(agf->agf_length),
1159 __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
1160 __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
1161 __entry->bno_level =
1162 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
1163 __entry->cnt_level =
1164 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
1165 __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
1166 __entry->fllast = be32_to_cpu(agf->agf_fllast),
1167 __entry->flcount = be32_to_cpu(agf->agf_flcount),
1168 __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
1169 __entry->longest = be32_to_cpu(agf->agf_longest);
1170 __entry->caller_ip = caller_ip;
1171 ),
1172 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
1173 "levels b %u c %u flfirst %u fllast %u flcount %u "
1174 "freeblks %u longest %u caller %pf",
1175 MAJOR(__entry->dev), MINOR(__entry->dev),
1176 __entry->agno,
1177 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
1178 __entry->length,
1179 __entry->bno_root,
1180 __entry->cnt_root,
1181 __entry->bno_level,
1182 __entry->cnt_level,
1183 __entry->flfirst,
1184 __entry->fllast,
1185 __entry->flcount,
1186 __entry->freeblks,
1187 __entry->longest,
1188 (void *)__entry->caller_ip)
1189);
1190
1191TRACE_EVENT(xfs_free_extent,
1192 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1193 xfs_extlen_t len, bool isfl, int haveleft, int haveright),
1194 TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
1195 TP_STRUCT__entry(
1196 __field(dev_t, dev)
1197 __field(xfs_agnumber_t, agno)
1198 __field(xfs_agblock_t, agbno)
1199 __field(xfs_extlen_t, len)
1200 __field(int, isfl)
1201 __field(int, haveleft)
1202 __field(int, haveright)
1203 ),
1204 TP_fast_assign(
1205 __entry->dev = mp->m_super->s_dev;
1206 __entry->agno = agno;
1207 __entry->agbno = agbno;
1208 __entry->len = len;
1209 __entry->isfl = isfl;
1210 __entry->haveleft = haveleft;
1211 __entry->haveright = haveright;
1212 ),
1213 TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
1214 MAJOR(__entry->dev), MINOR(__entry->dev),
1215 __entry->agno,
1216 __entry->agbno,
1217 __entry->len,
1218 __entry->isfl,
1219 __entry->haveleft ?
1220 (__entry->haveright ? "both" : "left") :
1221 (__entry->haveright ? "right" : "none"))
1222
1223);
1224
1225DECLARE_EVENT_CLASS(xfs_alloc_class,
1226 TP_PROTO(struct xfs_alloc_arg *args),
1227 TP_ARGS(args),
1228 TP_STRUCT__entry(
1229 __field(dev_t, dev)
1230 __field(xfs_agnumber_t, agno)
1231 __field(xfs_agblock_t, agbno)
1232 __field(xfs_extlen_t, minlen)
1233 __field(xfs_extlen_t, maxlen)
1234 __field(xfs_extlen_t, mod)
1235 __field(xfs_extlen_t, prod)
1236 __field(xfs_extlen_t, minleft)
1237 __field(xfs_extlen_t, total)
1238 __field(xfs_extlen_t, alignment)
1239 __field(xfs_extlen_t, minalignslop)
1240 __field(xfs_extlen_t, len)
1241 __field(short, type)
1242 __field(short, otype)
1243 __field(char, wasdel)
1244 __field(char, wasfromfl)
1245 __field(char, isfl)
1246 __field(char, userdata)
1247 __field(xfs_fsblock_t, firstblock)
1248 ),
1249 TP_fast_assign(
1250 __entry->dev = args->mp->m_super->s_dev;
1251 __entry->agno = args->agno;
1252 __entry->agbno = args->agbno;
1253 __entry->minlen = args->minlen;
1254 __entry->maxlen = args->maxlen;
1255 __entry->mod = args->mod;
1256 __entry->prod = args->prod;
1257 __entry->minleft = args->minleft;
1258 __entry->total = args->total;
1259 __entry->alignment = args->alignment;
1260 __entry->minalignslop = args->minalignslop;
1261 __entry->len = args->len;
1262 __entry->type = args->type;
1263 __entry->otype = args->otype;
1264 __entry->wasdel = args->wasdel;
1265 __entry->wasfromfl = args->wasfromfl;
1266 __entry->isfl = args->isfl;
1267 __entry->userdata = args->userdata;
1268 __entry->firstblock = args->firstblock;
1269 ),
1270 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
1271 "prod %u minleft %u total %u alignment %u minalignslop %u "
1272 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
1273 "userdata %d firstblock 0x%llx",
1274 MAJOR(__entry->dev), MINOR(__entry->dev),
1275 __entry->agno,
1276 __entry->agbno,
1277 __entry->minlen,
1278 __entry->maxlen,
1279 __entry->mod,
1280 __entry->prod,
1281 __entry->minleft,
1282 __entry->total,
1283 __entry->alignment,
1284 __entry->minalignslop,
1285 __entry->len,
1286 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
1287 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
1288 __entry->wasdel,
1289 __entry->wasfromfl,
1290 __entry->isfl,
1291 __entry->userdata,
1292 __entry->firstblock)
1293)
1294
1295#define DEFINE_ALLOC_EVENT(name) \
1296DEFINE_EVENT(xfs_alloc_class, name, \
1297 TP_PROTO(struct xfs_alloc_arg *args), \
1298 TP_ARGS(args))
1299DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1300DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1301DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
1302DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
1303DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
1304DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
1305DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
1306DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
1307DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
1308DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
1309DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
1310DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
1311DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
1312DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
1313DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
1314DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
1315DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
1316DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
1317DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
1318DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1319DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1320
1321DECLARE_EVENT_CLASS(xfs_dir2_class,
1322 TP_PROTO(struct xfs_da_args *args),
1323 TP_ARGS(args),
1324 TP_STRUCT__entry(
1325 __field(dev_t, dev)
1326 __field(xfs_ino_t, ino)
1327 __dynamic_array(char, name, args->namelen)
1328 __field(int, namelen)
1329 __field(xfs_dahash_t, hashval)
1330 __field(xfs_ino_t, inumber)
1331 __field(int, op_flags)
1332 ),
1333 TP_fast_assign(
1334 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1335 __entry->ino = args->dp->i_ino;
1336 if (args->namelen)
1337 memcpy(__get_str(name), args->name, args->namelen);
1338 __entry->namelen = args->namelen;
1339 __entry->hashval = args->hashval;
1340 __entry->inumber = args->inumber;
1341 __entry->op_flags = args->op_flags;
1342 ),
1343 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
1344 "inumber 0x%llx op_flags %s",
1345 MAJOR(__entry->dev), MINOR(__entry->dev),
1346 __entry->ino,
1347 __entry->namelen,
1348 __entry->namelen ? __get_str(name) : NULL,
1349 __entry->namelen,
1350 __entry->hashval,
1351 __entry->inumber,
1352 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
1353)
1354
1355#define DEFINE_DIR2_EVENT(name) \
1356DEFINE_EVENT(xfs_dir2_class, name, \
1357 TP_PROTO(struct xfs_da_args *args), \
1358 TP_ARGS(args))
1359DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
1360DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
1361DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
1362DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
1363DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
1364DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
1365DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
1366DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
1367DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
1368DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
1369DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
1370DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
1371DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
1372DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
1373DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
1374DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
1375DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
1376DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
1377DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
1378DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
1379DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
1380DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
1381DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1382DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1383DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
1384
1385DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1386 TP_PROTO(struct xfs_da_args *args, int idx),
1387 TP_ARGS(args, idx),
1388 TP_STRUCT__entry(
1389 __field(dev_t, dev)
1390 __field(xfs_ino_t, ino)
1391 __field(int, op_flags)
1392 __field(int, idx)
1393 ),
1394 TP_fast_assign(
1395 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1396 __entry->ino = args->dp->i_ino;
1397 __entry->op_flags = args->op_flags;
1398 __entry->idx = idx;
1399 ),
1400 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
1401 MAJOR(__entry->dev), MINOR(__entry->dev),
1402 __entry->ino,
1403 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1404 __entry->idx)
1405)
1406
1407#define DEFINE_DIR2_SPACE_EVENT(name) \
1408DEFINE_EVENT(xfs_dir2_space_class, name, \
1409 TP_PROTO(struct xfs_da_args *args, int idx), \
1410 TP_ARGS(args, idx))
1411DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
1412DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
1413DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
1414DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
1415
1416TRACE_EVENT(xfs_dir2_leafn_moveents,
1417 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
1418 TP_ARGS(args, src_idx, dst_idx, count),
1419 TP_STRUCT__entry(
1420 __field(dev_t, dev)
1421 __field(xfs_ino_t, ino)
1422 __field(int, op_flags)
1423 __field(int, src_idx)
1424 __field(int, dst_idx)
1425 __field(int, count)
1426 ),
1427 TP_fast_assign(
1428 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1429 __entry->ino = args->dp->i_ino;
1430 __entry->op_flags = args->op_flags;
1431 __entry->src_idx = src_idx;
1432 __entry->dst_idx = dst_idx;
1433 __entry->count = count;
1434 ),
1435 TP_printk("dev %d:%d ino 0x%llx op_flags %s "
1436 "src_idx %d dst_idx %d count %d",
1437 MAJOR(__entry->dev), MINOR(__entry->dev),
1438 __entry->ino,
1439 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1440 __entry->src_idx,
1441 __entry->dst_idx,
1442 __entry->count)
1443);
1444
1445#define XFS_SWAPEXT_INODES \
1446 { 0, "target" }, \
1447 { 1, "temp" }
1448
1449#define XFS_INODE_FORMAT_STR \
1450 { 0, "invalid" }, \
1451 { 1, "local" }, \
1452 { 2, "extent" }, \
1453 { 3, "btree" }
1454
1455DECLARE_EVENT_CLASS(xfs_swap_extent_class,
1456 TP_PROTO(struct xfs_inode *ip, int which),
1457 TP_ARGS(ip, which),
1458 TP_STRUCT__entry(
1459 __field(dev_t, dev)
1460 __field(int, which)
1461 __field(xfs_ino_t, ino)
1462 __field(int, format)
1463 __field(int, nex)
1464 __field(int, max_nex)
1465 __field(int, broot_size)
1466 __field(int, fork_off)
1467 ),
1468 TP_fast_assign(
1469 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1470 __entry->which = which;
1471 __entry->ino = ip->i_ino;
1472 __entry->format = ip->i_d.di_format;
1473 __entry->nex = ip->i_d.di_nextents;
1474 __entry->max_nex = ip->i_df.if_ext_max;
1475 __entry->broot_size = ip->i_df.if_broot_bytes;
1476 __entry->fork_off = XFS_IFORK_BOFF(ip);
1477 ),
1478 TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
1479 "Max in-fork extents %d, broot size %d, fork offset %d",
1480 MAJOR(__entry->dev), MINOR(__entry->dev),
1481 __entry->ino,
1482 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
1483 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
1484 __entry->nex,
1485 __entry->max_nex,
1486 __entry->broot_size,
1487 __entry->fork_off)
1488)
1489
1490#define DEFINE_SWAPEXT_EVENT(name) \
1491DEFINE_EVENT(xfs_swap_extent_class, name, \
1492 TP_PROTO(struct xfs_inode *ip, int which), \
1493 TP_ARGS(ip, which))
1494
1495DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
1496DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
1497
1498#endif /* _TRACE_XFS_H */
1499
1500#undef TRACE_INCLUDE_PATH
1501#define TRACE_INCLUDE_PATH .
1502#define TRACE_INCLUDE_FILE xfs_trace
1503#include <trace/define_trace.h>
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index ad7fbead4c97..7c220b4227bc 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -36,10 +36,13 @@ struct attrlist_cursor_kern;
36/* 36/*
37 * Flags for read/write calls - same values as IRIX 37 * Flags for read/write calls - same values as IRIX
38 */ 38 */
39#define IO_ISAIO 0x00001 /* don't wait for completion */
40#define IO_ISDIRECT 0x00004 /* bypass page cache */ 39#define IO_ISDIRECT 0x00004 /* bypass page cache */
41#define IO_INVIS 0x00020 /* don't update inode timestamps */ 40#define IO_INVIS 0x00020 /* don't update inode timestamps */
42 41
42#define XFS_IO_FLAGS \
43 { IO_ISDIRECT, "DIRECT" }, \
44 { IO_INVIS, "INVIS"}
45
43/* 46/*
44 * Flush/Invalidate options for vop_toss/flush/flushinval_pages. 47 * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
45 */ 48 */
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc1..fa01b9daba6b 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
30 30
31 31
32static int 32static int
33__xfs_xattr_get(struct inode *inode, const char *name, 33xfs_xattr_get(struct dentry *dentry, const char *name,
34 void *value, size_t size, int xflags) 34 void *value, size_t size, int xflags)
35{ 35{
36 struct xfs_inode *ip = XFS_I(inode); 36 struct xfs_inode *ip = XFS_I(dentry->d_inode);
37 int error, asize = size; 37 int error, asize = size;
38 38
39 if (strcmp(name, "") == 0) 39 if (strcmp(name, "") == 0)
@@ -45,17 +45,17 @@ __xfs_xattr_get(struct inode *inode, const char *name,
45 value = NULL; 45 value = NULL;
46 } 46 }
47 47
48 error = -xfs_attr_get(ip, name, value, &asize, xflags); 48 error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
49 if (error) 49 if (error)
50 return error; 50 return error;
51 return asize; 51 return asize;
52} 52}
53 53
54static int 54static int
55__xfs_xattr_set(struct inode *inode, const char *name, const void *value, 55xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
56 size_t size, int flags, int xflags) 56 size_t size, int flags, int xflags)
57{ 57{
58 struct xfs_inode *ip = XFS_I(inode); 58 struct xfs_inode *ip = XFS_I(dentry->d_inode);
59 59
60 if (strcmp(name, "") == 0) 60 if (strcmp(name, "") == 0)
61 return -EINVAL; 61 return -EINVAL;
@@ -67,79 +67,39 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
67 xflags |= ATTR_REPLACE; 67 xflags |= ATTR_REPLACE;
68 68
69 if (!value) 69 if (!value)
70 return -xfs_attr_remove(ip, name, xflags); 70 return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
71 return -xfs_attr_set(ip, name, (void *)value, size, xflags); 71 return -xfs_attr_set(ip, (unsigned char *)name,
72} 72 (void *)value, size, xflags);
73
74static int
75xfs_xattr_user_get(struct inode *inode, const char *name,
76 void *value, size_t size)
77{
78 return __xfs_xattr_get(inode, name, value, size, 0);
79}
80
81static int
82xfs_xattr_user_set(struct inode *inode, const char *name,
83 const void *value, size_t size, int flags)
84{
85 return __xfs_xattr_set(inode, name, value, size, flags, 0);
86} 73}
87 74
88static struct xattr_handler xfs_xattr_user_handler = { 75static struct xattr_handler xfs_xattr_user_handler = {
89 .prefix = XATTR_USER_PREFIX, 76 .prefix = XATTR_USER_PREFIX,
90 .get = xfs_xattr_user_get, 77 .flags = 0, /* no flags implies user namespace */
91 .set = xfs_xattr_user_set, 78 .get = xfs_xattr_get,
79 .set = xfs_xattr_set,
92}; 80};
93 81
94
95static int
96xfs_xattr_trusted_get(struct inode *inode, const char *name,
97 void *value, size_t size)
98{
99 return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
100}
101
102static int
103xfs_xattr_trusted_set(struct inode *inode, const char *name,
104 const void *value, size_t size, int flags)
105{
106 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
107}
108
109static struct xattr_handler xfs_xattr_trusted_handler = { 82static struct xattr_handler xfs_xattr_trusted_handler = {
110 .prefix = XATTR_TRUSTED_PREFIX, 83 .prefix = XATTR_TRUSTED_PREFIX,
111 .get = xfs_xattr_trusted_get, 84 .flags = ATTR_ROOT,
112 .set = xfs_xattr_trusted_set, 85 .get = xfs_xattr_get,
86 .set = xfs_xattr_set,
113}; 87};
114 88
115
116static int
117xfs_xattr_secure_get(struct inode *inode, const char *name,
118 void *value, size_t size)
119{
120 return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
121}
122
123static int
124xfs_xattr_secure_set(struct inode *inode, const char *name,
125 const void *value, size_t size, int flags)
126{
127 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
128}
129
130static struct xattr_handler xfs_xattr_security_handler = { 89static struct xattr_handler xfs_xattr_security_handler = {
131 .prefix = XATTR_SECURITY_PREFIX, 90 .prefix = XATTR_SECURITY_PREFIX,
132 .get = xfs_xattr_secure_get, 91 .flags = ATTR_SECURE,
133 .set = xfs_xattr_secure_set, 92 .get = xfs_xattr_get,
93 .set = xfs_xattr_set,
134}; 94};
135 95
136
137struct xattr_handler *xfs_xattr_handlers[] = { 96struct xattr_handler *xfs_xattr_handlers[] = {
138 &xfs_xattr_user_handler, 97 &xfs_xattr_user_handler,
139 &xfs_xattr_trusted_handler, 98 &xfs_xattr_trusted_handler,
140 &xfs_xattr_security_handler, 99 &xfs_xattr_security_handler,
141#ifdef CONFIG_XFS_POSIX_ACL 100#ifdef CONFIG_XFS_POSIX_ACL
142 &xfs_xattr_system_handler, 101 &xfs_xattr_acl_access_handler,
102 &xfs_xattr_acl_default_handler,
143#endif 103#endif
144 NULL 104 NULL
145}; 105};
@@ -165,8 +125,13 @@ static const char *xfs_xattr_prefix(int flags)
165} 125}
166 126
167static int 127static int
168xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, 128xfs_xattr_put_listent(
169 char *name, int namelen, int valuelen, char *value) 129 struct xfs_attr_list_context *context,
130 int flags,
131 unsigned char *name,
132 int namelen,
133 int valuelen,
134 unsigned char *value)
170{ 135{
171 unsigned int prefix_len = xfs_xattr_prefix_len(flags); 136 unsigned int prefix_len = xfs_xattr_prefix_len(flags);
172 char *offset; 137 char *offset;
@@ -189,7 +154,7 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
189 offset = (char *)context->alist + context->count; 154 offset = (char *)context->alist + context->count;
190 strncpy(offset, xfs_xattr_prefix(flags), prefix_len); 155 strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
191 offset += prefix_len; 156 offset += prefix_len;
192 strncpy(offset, name, namelen); /* real name */ 157 strncpy(offset, (char *)name, namelen); /* real name */
193 offset += namelen; 158 offset += namelen;
194 *offset = '\0'; 159 *offset = '\0';
195 context->count += prefix_len + namelen + 1; 160 context->count += prefix_len + namelen + 1;
@@ -197,8 +162,13 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
197} 162}
198 163
199static int 164static int
200xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags, 165xfs_xattr_put_listent_sizes(
201 char *name, int namelen, int valuelen, char *value) 166 struct xfs_attr_list_context *context,
167 int flags,
168 unsigned char *name,
169 int namelen,
170 int valuelen,
171 unsigned char *value)
202{ 172{
203 context->count += xfs_xattr_prefix_len(flags) + namelen + 1; 173 context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
204 return 0; 174 return 0;