aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/kmem.c23
-rw-r--r--fs/xfs/linux-2.6/kmem.h23
-rw-r--r--fs/xfs/linux-2.6/spin.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c259
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h50
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c117
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c90
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c65
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h13
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c166
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c251
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h60
19 files changed, 564 insertions, 614 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 364ea8c386b1..4b184559f231 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -45,11 +45,11 @@
45 45
46 46
47void * 47void *
48kmem_alloc(size_t size, int flags) 48kmem_alloc(size_t size, unsigned int __nocast flags)
49{ 49{
50 int retries = 0; 50 int retries = 0;
51 int lflags = kmem_flags_convert(flags); 51 unsigned int lflags = kmem_flags_convert(flags);
52 void *ptr; 52 void *ptr;
53 53
54 do { 54 do {
55 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) 55 if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
@@ -67,7 +67,7 @@ kmem_alloc(size_t size, int flags)
67} 67}
68 68
69void * 69void *
70kmem_zalloc(size_t size, int flags) 70kmem_zalloc(size_t size, unsigned int __nocast flags)
71{ 71{
72 void *ptr; 72 void *ptr;
73 73
@@ -89,7 +89,8 @@ kmem_free(void *ptr, size_t size)
89} 89}
90 90
91void * 91void *
92kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) 92kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
93 unsigned int __nocast flags)
93{ 94{
94 void *new; 95 void *new;
95 96
@@ -104,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
104} 105}
105 106
106void * 107void *
107kmem_zone_alloc(kmem_zone_t *zone, int flags) 108kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
108{ 109{
109 int retries = 0; 110 int retries = 0;
110 int lflags = kmem_flags_convert(flags); 111 unsigned int lflags = kmem_flags_convert(flags);
111 void *ptr; 112 void *ptr;
112 113
113 do { 114 do {
114 ptr = kmem_cache_alloc(zone, lflags); 115 ptr = kmem_cache_alloc(zone, lflags);
@@ -123,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, int flags)
123} 124}
124 125
125void * 126void *
126kmem_zone_zalloc(kmem_zone_t *zone, int flags) 127kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
127{ 128{
128 void *ptr; 129 void *ptr;
129 130
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 1397b669b059..109fcf27e256 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -39,10 +39,10 @@
39/* 39/*
40 * memory management routines 40 * memory management routines
41 */ 41 */
42#define KM_SLEEP 0x0001 42#define KM_SLEEP 0x0001u
43#define KM_NOSLEEP 0x0002 43#define KM_NOSLEEP 0x0002u
44#define KM_NOFS 0x0004 44#define KM_NOFS 0x0004u
45#define KM_MAYFAIL 0x0008 45#define KM_MAYFAIL 0x0008u
46 46
47#define kmem_zone kmem_cache_s 47#define kmem_zone kmem_cache_s
48#define kmem_zone_t kmem_cache_t 48#define kmem_zone_t kmem_cache_t
@@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t;
81 *(NSTATEP) = *(OSTATEP); \ 81 *(NSTATEP) = *(OSTATEP); \
82} while (0) 82} while (0)
83 83
84static __inline unsigned int kmem_flags_convert(int flags) 84static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags)
85{ 85{
86 int lflags = __GFP_NOWARN; /* we'll report problems, if need be */ 86 unsigned int lflags = __GFP_NOWARN; /* we'll report problems, if need be */
87 87
88#ifdef DEBUG 88#ifdef DEBUG
89 if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) { 89 if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
@@ -125,12 +125,13 @@ kmem_zone_destroy(kmem_zone_t *zone)
125 BUG(); 125 BUG();
126} 126}
127 127
128extern void *kmem_zone_zalloc(kmem_zone_t *, int); 128extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
129extern void *kmem_zone_alloc(kmem_zone_t *, int); 129extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
130 130
131extern void *kmem_alloc(size_t, int); 131extern void *kmem_alloc(size_t, unsigned int __nocast);
132extern void *kmem_realloc(void *, size_t, size_t, int); 132extern void *kmem_realloc(void *, size_t, size_t,
133extern void *kmem_zalloc(size_t, int); 133 unsigned int __nocast);
134extern void *kmem_zalloc(size_t, unsigned int __nocast);
134extern void kmem_free(void *, size_t); 135extern void kmem_free(void *, size_t);
135 136
136typedef struct shrinker *kmem_shaker_t; 137typedef struct shrinker *kmem_shaker_t;
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h
index bcf60a0b8df0..0039504069a5 100644
--- a/fs/xfs/linux-2.6/spin.h
+++ b/fs/xfs/linux-2.6/spin.h
@@ -45,6 +45,9 @@
45typedef spinlock_t lock_t; 45typedef spinlock_t lock_t;
46 46
47#define SPLDECL(s) unsigned long s 47#define SPLDECL(s) unsigned long s
48#ifndef DEFINE_SPINLOCK
49#define DEFINE_SPINLOCK(s) spinlock_t s = SPIN_LOCK_UNLOCKED
50#endif
48 51
49#define spinlock_init(lock, name) spin_lock_init(lock) 52#define spinlock_init(lock, name) spin_lock_init(lock)
50#define spinlock_destroy(lock) 53#define spinlock_destroy(lock)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a3a4b5aaf5d9..c6c077978fe3 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -104,66 +104,114 @@ xfs_page_trace(
104#define xfs_page_trace(tag, inode, page, mask) 104#define xfs_page_trace(tag, inode, page, mask)
105#endif 105#endif
106 106
107void 107/*
108linvfs_unwritten_done( 108 * Schedule IO completion handling on a xfsdatad if this was
109 struct buffer_head *bh, 109 * the final hold on this ioend.
110 int uptodate) 110 */
111STATIC void
112xfs_finish_ioend(
113 xfs_ioend_t *ioend)
111{ 114{
112 xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; 115 if (atomic_dec_and_test(&ioend->io_remaining))
116 queue_work(xfsdatad_workqueue, &ioend->io_work);
117}
113 118
114 ASSERT(buffer_unwritten(bh)); 119STATIC void
115 bh->b_end_io = NULL; 120xfs_destroy_ioend(
116 clear_buffer_unwritten(bh); 121 xfs_ioend_t *ioend)
117 if (!uptodate) 122{
118 pagebuf_ioerror(pb, EIO); 123 vn_iowake(ioend->io_vnode);
119 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 124 mempool_free(ioend, xfs_ioend_pool);
120 pagebuf_iodone(pb, 1, 1);
121 }
122 end_buffer_async_write(bh, uptodate);
123} 125}
124 126
125/* 127/*
126 * Issue transactions to convert a buffer range from unwritten 128 * Issue transactions to convert a buffer range from unwritten
127 * to written extents (buffered IO). 129 * to written extents.
128 */ 130 */
129STATIC void 131STATIC void
130linvfs_unwritten_convert( 132xfs_end_bio_unwritten(
131 xfs_buf_t *bp) 133 void *data)
132{ 134{
133 vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); 135 xfs_ioend_t *ioend = data;
134 int error; 136 vnode_t *vp = ioend->io_vnode;
137 xfs_off_t offset = ioend->io_offset;
138 size_t size = ioend->io_size;
139 struct buffer_head *bh, *next;
140 int error;
141
142 if (ioend->io_uptodate)
143 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
144
145 /* ioend->io_buffer_head is only non-NULL for buffered I/O */
146 for (bh = ioend->io_buffer_head; bh; bh = next) {
147 next = bh->b_private;
148
149 bh->b_end_io = NULL;
150 clear_buffer_unwritten(bh);
151 end_buffer_async_write(bh, ioend->io_uptodate);
152 }
135 153
136 BUG_ON(atomic_read(&bp->pb_hold) < 1); 154 xfs_destroy_ioend(ioend);
137 VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
138 BMAPI_UNWRITTEN, NULL, NULL, error);
139 XFS_BUF_SET_FSPRIVATE(bp, NULL);
140 XFS_BUF_CLR_IODONE_FUNC(bp);
141 XFS_BUF_UNDATAIO(bp);
142 iput(LINVFS_GET_IP(vp));
143 pagebuf_iodone(bp, 0, 0);
144} 155}
145 156
146/* 157/*
147 * Issue transactions to convert a buffer range from unwritten 158 * Allocate and initialise an IO completion structure.
148 * to written extents (direct IO). 159 * We need to track unwritten extent write completion here initially.
160 * We'll need to extend this for updating the ondisk inode size later
161 * (vs. incore size).
149 */ 162 */
150STATIC void 163STATIC xfs_ioend_t *
151linvfs_unwritten_convert_direct( 164xfs_alloc_ioend(
152 struct kiocb *iocb, 165 struct inode *inode)
153 loff_t offset,
154 ssize_t size,
155 void *private)
156{ 166{
157 struct inode *inode = iocb->ki_filp->f_dentry->d_inode; 167 xfs_ioend_t *ioend;
158 ASSERT(!private || inode == (struct inode *)private);
159 168
160 /* private indicates an unwritten extent lay beneath this IO */ 169 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
161 if (private && size > 0) {
162 vnode_t *vp = LINVFS_GET_VP(inode);
163 int error;
164 170
165 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 171 /*
166 } 172 * Set the count to 1 initially, which will prevent an I/O
173 * completion callback from happening before we have started
174 * all the I/O from calling the completion routine too early.
175 */
176 atomic_set(&ioend->io_remaining, 1);
177 ioend->io_uptodate = 1; /* cleared if any I/O fails */
178 ioend->io_vnode = LINVFS_GET_VP(inode);
179 ioend->io_buffer_head = NULL;
180 atomic_inc(&ioend->io_vnode->v_iocount);
181 ioend->io_offset = 0;
182 ioend->io_size = 0;
183
184 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
185
186 return ioend;
187}
188
189void
190linvfs_unwritten_done(
191 struct buffer_head *bh,
192 int uptodate)
193{
194 xfs_ioend_t *ioend = bh->b_private;
195 static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
196 unsigned long flags;
197
198 ASSERT(buffer_unwritten(bh));
199 bh->b_end_io = NULL;
200
201 if (!uptodate)
202 ioend->io_uptodate = 0;
203
204 /*
205 * Deep magic here. We reuse b_private in the buffer_heads to build
206 * a chain for completing the I/O from user context after we've issued
207 * a transaction to convert the unwritten extent.
208 */
209 spin_lock_irqsave(&unwritten_done_lock, flags);
210 bh->b_private = ioend->io_buffer_head;
211 ioend->io_buffer_head = bh;
212 spin_unlock_irqrestore(&unwritten_done_lock, flags);
213
214 xfs_finish_ioend(ioend);
167} 215}
168 216
169STATIC int 217STATIC int
@@ -255,7 +303,7 @@ xfs_probe_unwritten_page(
255 struct address_space *mapping, 303 struct address_space *mapping,
256 pgoff_t index, 304 pgoff_t index,
257 xfs_iomap_t *iomapp, 305 xfs_iomap_t *iomapp,
258 xfs_buf_t *pb, 306 xfs_ioend_t *ioend,
259 unsigned long max_offset, 307 unsigned long max_offset,
260 unsigned long *fsbs, 308 unsigned long *fsbs,
261 unsigned int bbits) 309 unsigned int bbits)
@@ -283,7 +331,7 @@ xfs_probe_unwritten_page(
283 break; 331 break;
284 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); 332 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
285 set_buffer_unwritten_io(bh); 333 set_buffer_unwritten_io(bh);
286 bh->b_private = pb; 334 bh->b_private = ioend;
287 p_offset += bh->b_size; 335 p_offset += bh->b_size;
288 (*fsbs)++; 336 (*fsbs)++;
289 } while ((bh = bh->b_this_page) != head); 337 } while ((bh = bh->b_this_page) != head);
@@ -434,34 +482,15 @@ xfs_map_unwritten(
434{ 482{
435 struct buffer_head *bh = curr; 483 struct buffer_head *bh = curr;
436 xfs_iomap_t *tmp; 484 xfs_iomap_t *tmp;
437 xfs_buf_t *pb; 485 xfs_ioend_t *ioend;
438 loff_t offset, size; 486 loff_t offset;
439 unsigned long nblocks = 0; 487 unsigned long nblocks = 0;
440 488
441 offset = start_page->index; 489 offset = start_page->index;
442 offset <<= PAGE_CACHE_SHIFT; 490 offset <<= PAGE_CACHE_SHIFT;
443 offset += p_offset; 491 offset += p_offset;
444 492
445 /* get an "empty" pagebuf to manage IO completion 493 ioend = xfs_alloc_ioend(inode);
446 * Proper values will be set before returning */
447 pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
448 if (!pb)
449 return -EAGAIN;
450
451 /* Take a reference to the inode to prevent it from
452 * being reclaimed while we have outstanding unwritten
453 * extent IO on it.
454 */
455 if ((igrab(inode)) != inode) {
456 pagebuf_free(pb);
457 return -EAGAIN;
458 }
459
460 /* Set the count to 1 initially, this will stop an I/O
461 * completion callout which happens before we have started
462 * all the I/O from calling pagebuf_iodone too early.
463 */
464 atomic_set(&pb->pb_io_remaining, 1);
465 494
466 /* First map forwards in the page consecutive buffers 495 /* First map forwards in the page consecutive buffers
467 * covering this unwritten extent 496 * covering this unwritten extent
@@ -474,12 +503,12 @@ xfs_map_unwritten(
474 break; 503 break;
475 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); 504 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
476 set_buffer_unwritten_io(bh); 505 set_buffer_unwritten_io(bh);
477 bh->b_private = pb; 506 bh->b_private = ioend;
478 p_offset += bh->b_size; 507 p_offset += bh->b_size;
479 nblocks++; 508 nblocks++;
480 } while ((bh = bh->b_this_page) != head); 509 } while ((bh = bh->b_this_page) != head);
481 510
482 atomic_add(nblocks, &pb->pb_io_remaining); 511 atomic_add(nblocks, &ioend->io_remaining);
483 512
484 /* If we reached the end of the page, map forwards in any 513 /* If we reached the end of the page, map forwards in any
485 * following pages which are also covered by this extent. 514 * following pages which are also covered by this extent.
@@ -496,13 +525,13 @@ xfs_map_unwritten(
496 tloff = min(tlast, tloff); 525 tloff = min(tlast, tloff);
497 for (tindex = start_page->index + 1; tindex < tloff; tindex++) { 526 for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
498 page = xfs_probe_unwritten_page(mapping, 527 page = xfs_probe_unwritten_page(mapping,
499 tindex, iomapp, pb, 528 tindex, iomapp, ioend,
500 PAGE_CACHE_SIZE, &bs, bbits); 529 PAGE_CACHE_SIZE, &bs, bbits);
501 if (!page) 530 if (!page)
502 break; 531 break;
503 nblocks += bs; 532 nblocks += bs;
504 atomic_add(bs, &pb->pb_io_remaining); 533 atomic_add(bs, &ioend->io_remaining);
505 xfs_convert_page(inode, page, iomapp, wbc, pb, 534 xfs_convert_page(inode, page, iomapp, wbc, ioend,
506 startio, all_bh); 535 startio, all_bh);
507 /* stop if converting the next page might add 536 /* stop if converting the next page might add
508 * enough blocks that the corresponding byte 537 * enough blocks that the corresponding byte
@@ -514,12 +543,12 @@ xfs_map_unwritten(
514 if (tindex == tlast && 543 if (tindex == tlast &&
515 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { 544 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
516 page = xfs_probe_unwritten_page(mapping, 545 page = xfs_probe_unwritten_page(mapping,
517 tindex, iomapp, pb, 546 tindex, iomapp, ioend,
518 pg_offset, &bs, bbits); 547 pg_offset, &bs, bbits);
519 if (page) { 548 if (page) {
520 nblocks += bs; 549 nblocks += bs;
521 atomic_add(bs, &pb->pb_io_remaining); 550 atomic_add(bs, &ioend->io_remaining);
522 xfs_convert_page(inode, page, iomapp, wbc, pb, 551 xfs_convert_page(inode, page, iomapp, wbc, ioend,
523 startio, all_bh); 552 startio, all_bh);
524 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) 553 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
525 goto enough; 554 goto enough;
@@ -528,21 +557,9 @@ xfs_map_unwritten(
528 } 557 }
529 558
530enough: 559enough:
531 size = nblocks; /* NB: using 64bit number here */ 560 ioend->io_size = (xfs_off_t)nblocks << block_bits;
532 size <<= block_bits; /* convert fsb's to byte range */ 561 ioend->io_offset = offset;
533 562 xfs_finish_ioend(ioend);
534 XFS_BUF_DATAIO(pb);
535 XFS_BUF_ASYNC(pb);
536 XFS_BUF_SET_SIZE(pb, size);
537 XFS_BUF_SET_COUNT(pb, size);
538 XFS_BUF_SET_OFFSET(pb, offset);
539 XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
540 XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
541
542 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
543 pagebuf_iodone(pb, 1, 1);
544 }
545
546 return 0; 563 return 0;
547} 564}
548 565
@@ -787,7 +804,7 @@ xfs_page_state_convert(
787 continue; 804 continue;
788 if (!iomp) { 805 if (!iomp) {
789 err = xfs_map_blocks(inode, offset, len, &iomap, 806 err = xfs_map_blocks(inode, offset, len, &iomap,
790 BMAPI_READ|BMAPI_IGNSTATE); 807 BMAPI_WRITE|BMAPI_IGNSTATE);
791 if (err) { 808 if (err) {
792 goto error; 809 goto error;
793 } 810 }
@@ -1028,6 +1045,44 @@ linvfs_get_blocks_direct(
1028 create, 1, BMAPI_WRITE|BMAPI_DIRECT); 1045 create, 1, BMAPI_WRITE|BMAPI_DIRECT);
1029} 1046}
1030 1047
1048STATIC void
1049linvfs_end_io_direct(
1050 struct kiocb *iocb,
1051 loff_t offset,
1052 ssize_t size,
1053 void *private)
1054{
1055 xfs_ioend_t *ioend = iocb->private;
1056
1057 /*
1058 * Non-NULL private data means we need to issue a transaction to
1059 * convert a range from unwritten to written extents. This needs
1060 * to happen from process contect but aio+dio I/O completion
1061 * happens from irq context so we need to defer it to a workqueue.
1062 * This is not nessecary for synchronous direct I/O, but we do
1063 * it anyway to keep the code uniform and simpler.
1064 *
1065 * The core direct I/O code might be changed to always call the
1066 * completion handler in the future, in which case all this can
1067 * go away.
1068 */
1069 if (private && size > 0) {
1070 ioend->io_offset = offset;
1071 ioend->io_size = size;
1072 xfs_finish_ioend(ioend);
1073 } else {
1074 ASSERT(size >= 0);
1075 xfs_destroy_ioend(ioend);
1076 }
1077
1078 /*
1079 * blockdev_direct_IO can return an error even afer the I/O
1080 * completion handler was called. Thus we need to protect
1081 * against double-freeing.
1082 */
1083 iocb->private = NULL;
1084}
1085
1031STATIC ssize_t 1086STATIC ssize_t
1032linvfs_direct_IO( 1087linvfs_direct_IO(
1033 int rw, 1088 int rw,
@@ -1042,16 +1097,23 @@ linvfs_direct_IO(
1042 xfs_iomap_t iomap; 1097 xfs_iomap_t iomap;
1043 int maps = 1; 1098 int maps = 1;
1044 int error; 1099 int error;
1100 ssize_t ret;
1045 1101
1046 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); 1102 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
1047 if (error) 1103 if (error)
1048 return -error; 1104 return -error;
1049 1105
1050 return blockdev_direct_IO_own_locking(rw, iocb, inode, 1106 iocb->private = xfs_alloc_ioend(inode);
1107
1108 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
1051 iomap.iomap_target->pbr_bdev, 1109 iomap.iomap_target->pbr_bdev,
1052 iov, offset, nr_segs, 1110 iov, offset, nr_segs,
1053 linvfs_get_blocks_direct, 1111 linvfs_get_blocks_direct,
1054 linvfs_unwritten_convert_direct); 1112 linvfs_end_io_direct);
1113
1114 if (unlikely(ret <= 0 && iocb->private))
1115 xfs_destroy_ioend(iocb->private);
1116 return ret;
1055} 1117}
1056 1118
1057 1119
@@ -1202,6 +1264,16 @@ out_unlock:
1202 return error; 1264 return error;
1203} 1265}
1204 1266
1267STATIC int
1268linvfs_invalidate_page(
1269 struct page *page,
1270 unsigned long offset)
1271{
1272 xfs_page_trace(XFS_INVALIDPAGE_ENTER,
1273 page->mapping->host, page, offset);
1274 return block_invalidatepage(page, offset);
1275}
1276
1205/* 1277/*
1206 * Called to move a page into cleanable state - and from there 1278 * Called to move a page into cleanable state - and from there
1207 * to be released. Possibly the page is already clean. We always 1279 * to be released. Possibly the page is already clean. We always
@@ -1279,6 +1351,7 @@ struct address_space_operations linvfs_aops = {
1279 .writepage = linvfs_writepage, 1351 .writepage = linvfs_writepage,
1280 .sync_page = block_sync_page, 1352 .sync_page = block_sync_page,
1281 .releasepage = linvfs_release_page, 1353 .releasepage = linvfs_release_page,
1354 .invalidatepage = linvfs_invalidate_page,
1282 .prepare_write = linvfs_prepare_write, 1355 .prepare_write = linvfs_prepare_write,
1283 .commit_write = generic_commit_write, 1356 .commit_write = generic_commit_write,
1284 .bmap = linvfs_bmap, 1357 .bmap = linvfs_bmap,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
new file mode 100644
index 000000000000..2fa62974a04d
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -0,0 +1,50 @@
1/*
2 * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32#ifndef __XFS_AOPS_H__
33#define __XFS_AOPS_H__
34
35extern struct workqueue_struct *xfsdatad_workqueue;
36extern mempool_t *xfs_ioend_pool;
37
38typedef void (*xfs_ioend_func_t)(void *);
39
40typedef struct xfs_ioend {
41 unsigned int io_uptodate; /* I/O status register */
42 atomic_t io_remaining; /* hold count */
43 struct vnode *io_vnode; /* file being written to */
44 struct buffer_head *io_buffer_head;/* buffer linked list head */
45 size_t io_size; /* size of the extent */
46 xfs_off_t io_offset; /* offset in the file */
47 struct work_struct io_work; /* xfsdatad work queue */
48} xfs_ioend_t;
49
50#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index df0cba239dd5..655bf4a78afe 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -54,6 +54,7 @@
54#include <linux/percpu.h> 54#include <linux/percpu.h>
55#include <linux/blkdev.h> 55#include <linux/blkdev.h>
56#include <linux/hash.h> 56#include <linux/hash.h>
57#include <linux/kthread.h>
57 58
58#include "xfs_linux.h" 59#include "xfs_linux.h"
59 60
@@ -67,7 +68,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int);
67STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 68STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
68 69
69STATIC struct workqueue_struct *xfslogd_workqueue; 70STATIC struct workqueue_struct *xfslogd_workqueue;
70STATIC struct workqueue_struct *xfsdatad_workqueue; 71struct workqueue_struct *xfsdatad_workqueue;
71 72
72/* 73/*
73 * Pagebuf debugging 74 * Pagebuf debugging
@@ -590,8 +591,10 @@ found:
590 PB_SET_OWNER(pb); 591 PB_SET_OWNER(pb);
591 } 592 }
592 593
593 if (pb->pb_flags & PBF_STALE) 594 if (pb->pb_flags & PBF_STALE) {
595 ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);
594 pb->pb_flags &= PBF_MAPPED; 596 pb->pb_flags &= PBF_MAPPED;
597 }
595 PB_TRACE(pb, "got_lock", 0); 598 PB_TRACE(pb, "got_lock", 0);
596 XFS_STATS_INC(pb_get_locked); 599 XFS_STATS_INC(pb_get_locked);
597 return (pb); 600 return (pb);
@@ -700,25 +703,6 @@ xfs_buf_read_flags(
700} 703}
701 704
702/* 705/*
703 * Create a skeletal pagebuf (no pages associated with it).
704 */
705xfs_buf_t *
706pagebuf_lookup(
707 xfs_buftarg_t *target,
708 loff_t ioff,
709 size_t isize,
710 page_buf_flags_t flags)
711{
712 xfs_buf_t *pb;
713
714 pb = pagebuf_allocate(flags);
715 if (pb) {
716 _pagebuf_initialize(pb, target, ioff, isize, flags);
717 }
718 return pb;
719}
720
721/*
722 * If we are not low on memory then do the readahead in a deadlock 706 * If we are not low on memory then do the readahead in a deadlock
723 * safe manner. 707 * safe manner.
724 */ 708 */
@@ -913,22 +897,23 @@ pagebuf_rele(
913 do_free = 0; 897 do_free = 0;
914 } 898 }
915 899
916 if (pb->pb_flags & PBF_DELWRI) { 900 if (pb->pb_flags & PBF_FS_MANAGED) {
917 pb->pb_flags |= PBF_ASYNC;
918 atomic_inc(&pb->pb_hold);
919 pagebuf_delwri_queue(pb, 0);
920 do_free = 0;
921 } else if (pb->pb_flags & PBF_FS_MANAGED) {
922 do_free = 0; 901 do_free = 0;
923 } 902 }
924 903
925 if (do_free) { 904 if (do_free) {
905 ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0);
926 list_del_init(&pb->pb_hash_list); 906 list_del_init(&pb->pb_hash_list);
927 spin_unlock(&hash->bh_lock); 907 spin_unlock(&hash->bh_lock);
928 pagebuf_free(pb); 908 pagebuf_free(pb);
929 } else { 909 } else {
930 spin_unlock(&hash->bh_lock); 910 spin_unlock(&hash->bh_lock);
931 } 911 }
912 } else {
913 /*
914 * Catch reference count leaks
915 */
916 ASSERT(atomic_read(&pb->pb_hold) >= 0);
932 } 917 }
933} 918}
934 919
@@ -1006,13 +991,24 @@ pagebuf_lock(
1006 * pagebuf_unlock 991 * pagebuf_unlock
1007 * 992 *
1008 * pagebuf_unlock releases the lock on the buffer object created by 993 * pagebuf_unlock releases the lock on the buffer object created by
1009 * pagebuf_lock or pagebuf_cond_lock (not any 994 * pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
1010 * pinning of underlying pages created by pagebuf_pin). 995 * created by pagebuf_pin).
996 *
997 * If the buffer is marked delwri but is not queued, do so before we
998 * unlock the buffer as we need to set flags correctly. We also need to
999 * take a reference for the delwri queue because the unlocker is going to
1000 * drop their's and they don't know we just queued it.
1011 */ 1001 */
1012void 1002void
1013pagebuf_unlock( /* unlock buffer */ 1003pagebuf_unlock( /* unlock buffer */
1014 xfs_buf_t *pb) /* buffer to unlock */ 1004 xfs_buf_t *pb) /* buffer to unlock */
1015{ 1005{
1006 if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) {
1007 atomic_inc(&pb->pb_hold);
1008 pb->pb_flags |= PBF_ASYNC;
1009 pagebuf_delwri_queue(pb, 0);
1010 }
1011
1016 PB_CLEAR_OWNER(pb); 1012 PB_CLEAR_OWNER(pb);
1017 up(&pb->pb_sema); 1013 up(&pb->pb_sema);
1018 PB_TRACE(pb, "unlock", 0); 1014 PB_TRACE(pb, "unlock", 0);
@@ -1249,8 +1245,8 @@ bio_end_io_pagebuf(
1249 int error) 1245 int error)
1250{ 1246{
1251 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; 1247 xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private;
1252 unsigned int i, blocksize = pb->pb_target->pbr_bsize; 1248 unsigned int blocksize = pb->pb_target->pbr_bsize;
1253 struct bio_vec *bvec = bio->bi_io_vec; 1249 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1254 1250
1255 if (bio->bi_size) 1251 if (bio->bi_size)
1256 return 1; 1252 return 1;
@@ -1258,10 +1254,12 @@ bio_end_io_pagebuf(
1258 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1254 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1259 pb->pb_error = EIO; 1255 pb->pb_error = EIO;
1260 1256
1261 for (i = 0; i < bio->bi_vcnt; i++, bvec++) { 1257 do {
1262 struct page *page = bvec->bv_page; 1258 struct page *page = bvec->bv_page;
1263 1259
1264 if (pb->pb_error) { 1260 if (unlikely(pb->pb_error)) {
1261 if (pb->pb_flags & PBF_READ)
1262 ClearPageUptodate(page);
1265 SetPageError(page); 1263 SetPageError(page);
1266 } else if (blocksize == PAGE_CACHE_SIZE) { 1264 } else if (blocksize == PAGE_CACHE_SIZE) {
1267 SetPageUptodate(page); 1265 SetPageUptodate(page);
@@ -1270,10 +1268,13 @@ bio_end_io_pagebuf(
1270 set_page_region(page, bvec->bv_offset, bvec->bv_len); 1268 set_page_region(page, bvec->bv_offset, bvec->bv_len);
1271 } 1269 }
1272 1270
1271 if (--bvec >= bio->bi_io_vec)
1272 prefetchw(&bvec->bv_page->flags);
1273
1273 if (_pagebuf_iolocked(pb)) { 1274 if (_pagebuf_iolocked(pb)) {
1274 unlock_page(page); 1275 unlock_page(page);
1275 } 1276 }
1276 } 1277 } while (bvec >= bio->bi_io_vec);
1277 1278
1278 _pagebuf_iodone(pb, 1); 1279 _pagebuf_iodone(pb, 1);
1279 bio_put(bio); 1280 bio_put(bio);
@@ -1511,6 +1512,11 @@ again:
1511 ASSERT(btp == bp->pb_target); 1512 ASSERT(btp == bp->pb_target);
1512 if (!(bp->pb_flags & PBF_FS_MANAGED)) { 1513 if (!(bp->pb_flags & PBF_FS_MANAGED)) {
1513 spin_unlock(&hash->bh_lock); 1514 spin_unlock(&hash->bh_lock);
1515 /*
1516 * Catch superblock reference count leaks
1517 * immediately
1518 */
1519 BUG_ON(bp->pb_bn == 0);
1514 delay(100); 1520 delay(100);
1515 goto again; 1521 goto again;
1516 } 1522 }
@@ -1686,17 +1692,20 @@ pagebuf_delwri_queue(
1686 int unlock) 1692 int unlock)
1687{ 1693{
1688 PB_TRACE(pb, "delwri_q", (long)unlock); 1694 PB_TRACE(pb, "delwri_q", (long)unlock);
1689 ASSERT(pb->pb_flags & PBF_DELWRI); 1695 ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
1696 (PBF_DELWRI|PBF_ASYNC));
1690 1697
1691 spin_lock(&pbd_delwrite_lock); 1698 spin_lock(&pbd_delwrite_lock);
1692 /* If already in the queue, dequeue and place at tail */ 1699 /* If already in the queue, dequeue and place at tail */
1693 if (!list_empty(&pb->pb_list)) { 1700 if (!list_empty(&pb->pb_list)) {
1701 ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
1694 if (unlock) { 1702 if (unlock) {
1695 atomic_dec(&pb->pb_hold); 1703 atomic_dec(&pb->pb_hold);
1696 } 1704 }
1697 list_del(&pb->pb_list); 1705 list_del(&pb->pb_list);
1698 } 1706 }
1699 1707
1708 pb->pb_flags |= _PBF_DELWRI_Q;
1700 list_add_tail(&pb->pb_list, &pbd_delwrite_queue); 1709 list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
1701 pb->pb_queuetime = jiffies; 1710 pb->pb_queuetime = jiffies;
1702 spin_unlock(&pbd_delwrite_lock); 1711 spin_unlock(&pbd_delwrite_lock);
@@ -1713,10 +1722,11 @@ pagebuf_delwri_dequeue(
1713 1722
1714 spin_lock(&pbd_delwrite_lock); 1723 spin_lock(&pbd_delwrite_lock);
1715 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { 1724 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
1725 ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
1716 list_del_init(&pb->pb_list); 1726 list_del_init(&pb->pb_list);
1717 dequeued = 1; 1727 dequeued = 1;
1718 } 1728 }
1719 pb->pb_flags &= ~PBF_DELWRI; 1729 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1720 spin_unlock(&pbd_delwrite_lock); 1730 spin_unlock(&pbd_delwrite_lock);
1721 1731
1722 if (dequeued) 1732 if (dequeued)
@@ -1733,9 +1743,7 @@ pagebuf_runall_queues(
1733} 1743}
1734 1744
1735/* Defines for pagebuf daemon */ 1745/* Defines for pagebuf daemon */
1736STATIC DECLARE_COMPLETION(xfsbufd_done);
1737STATIC struct task_struct *xfsbufd_task; 1746STATIC struct task_struct *xfsbufd_task;
1738STATIC int xfsbufd_active;
1739STATIC int xfsbufd_force_flush; 1747STATIC int xfsbufd_force_flush;
1740STATIC int xfsbufd_force_sleep; 1748STATIC int xfsbufd_force_sleep;
1741 1749
@@ -1761,14 +1769,8 @@ xfsbufd(
1761 xfs_buftarg_t *target; 1769 xfs_buftarg_t *target;
1762 xfs_buf_t *pb, *n; 1770 xfs_buf_t *pb, *n;
1763 1771
1764 /* Set up the thread */
1765 daemonize("xfsbufd");
1766 current->flags |= PF_MEMALLOC; 1772 current->flags |= PF_MEMALLOC;
1767 1773
1768 xfsbufd_task = current;
1769 xfsbufd_active = 1;
1770 barrier();
1771
1772 INIT_LIST_HEAD(&tmp); 1774 INIT_LIST_HEAD(&tmp);
1773 do { 1775 do {
1774 if (unlikely(freezing(current))) { 1776 if (unlikely(freezing(current))) {
@@ -1795,7 +1797,7 @@ xfsbufd(
1795 break; 1797 break;
1796 } 1798 }
1797 1799
1798 pb->pb_flags &= ~PBF_DELWRI; 1800 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1799 pb->pb_flags |= PBF_WRITE; 1801 pb->pb_flags |= PBF_WRITE;
1800 list_move(&pb->pb_list, &tmp); 1802 list_move(&pb->pb_list, &tmp);
1801 } 1803 }
@@ -1816,9 +1818,9 @@ xfsbufd(
1816 purge_addresses(); 1818 purge_addresses();
1817 1819
1818 xfsbufd_force_flush = 0; 1820 xfsbufd_force_flush = 0;
1819 } while (xfsbufd_active); 1821 } while (!kthread_should_stop());
1820 1822
1821 complete_and_exit(&xfsbufd_done, 0); 1823 return 0;
1822} 1824}
1823 1825
1824/* 1826/*
@@ -1845,15 +1847,13 @@ xfs_flush_buftarg(
1845 if (pb->pb_target != target) 1847 if (pb->pb_target != target)
1846 continue; 1848 continue;
1847 1849
1848 ASSERT(pb->pb_flags & PBF_DELWRI); 1850 ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
1849 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); 1851 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
1850 if (pagebuf_ispin(pb)) { 1852 if (pagebuf_ispin(pb)) {
1851 pincount++; 1853 pincount++;
1852 continue; 1854 continue;
1853 } 1855 }
1854 1856
1855 pb->pb_flags &= ~PBF_DELWRI;
1856 pb->pb_flags |= PBF_WRITE;
1857 list_move(&pb->pb_list, &tmp); 1857 list_move(&pb->pb_list, &tmp);
1858 } 1858 }
1859 spin_unlock(&pbd_delwrite_lock); 1859 spin_unlock(&pbd_delwrite_lock);
@@ -1862,12 +1862,14 @@ xfs_flush_buftarg(
1862 * Dropped the delayed write list lock, now walk the temporary list 1862 * Dropped the delayed write list lock, now walk the temporary list
1863 */ 1863 */
1864 list_for_each_entry_safe(pb, n, &tmp, pb_list) { 1864 list_for_each_entry_safe(pb, n, &tmp, pb_list) {
1865 pagebuf_lock(pb);
1866 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1867 pb->pb_flags |= PBF_WRITE;
1865 if (wait) 1868 if (wait)
1866 pb->pb_flags &= ~PBF_ASYNC; 1869 pb->pb_flags &= ~PBF_ASYNC;
1867 else 1870 else
1868 list_del_init(&pb->pb_list); 1871 list_del_init(&pb->pb_list);
1869 1872
1870 pagebuf_lock(pb);
1871 pagebuf_iostrategy(pb); 1873 pagebuf_iostrategy(pb);
1872 } 1874 }
1873 1875
@@ -1901,9 +1903,11 @@ xfs_buf_daemons_start(void)
1901 if (!xfsdatad_workqueue) 1903 if (!xfsdatad_workqueue)
1902 goto out_destroy_xfslogd_workqueue; 1904 goto out_destroy_xfslogd_workqueue;
1903 1905
1904 error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES); 1906 xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
1905 if (error < 0) 1907 if (IS_ERR(xfsbufd_task)) {
1908 error = PTR_ERR(xfsbufd_task);
1906 goto out_destroy_xfsdatad_workqueue; 1909 goto out_destroy_xfsdatad_workqueue;
1910 }
1907 return 0; 1911 return 0;
1908 1912
1909 out_destroy_xfsdatad_workqueue: 1913 out_destroy_xfsdatad_workqueue:
@@ -1920,10 +1924,7 @@ xfs_buf_daemons_start(void)
1920STATIC void 1924STATIC void
1921xfs_buf_daemons_stop(void) 1925xfs_buf_daemons_stop(void)
1922{ 1926{
1923 xfsbufd_active = 0; 1927 kthread_stop(xfsbufd_task);
1924 barrier();
1925 wait_for_completion(&xfsbufd_done);
1926
1927 destroy_workqueue(xfslogd_workqueue); 1928 destroy_workqueue(xfslogd_workqueue);
1928 destroy_workqueue(xfsdatad_workqueue); 1929 destroy_workqueue(xfsdatad_workqueue);
1929} 1930}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 3f8f69a66aea..67c19f799232 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -89,6 +89,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */
89 _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 89 _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
90 _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 90 _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
91 _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 91 _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
92 _PBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
92} page_buf_flags_t; 93} page_buf_flags_t;
93 94
94#define PBF_UPDATE (PBF_READ | PBF_WRITE) 95#define PBF_UPDATE (PBF_READ | PBF_WRITE)
@@ -206,13 +207,6 @@ extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */
206#define xfs_buf_read(target, blkno, len, flags) \ 207#define xfs_buf_read(target, blkno, len, flags) \
207 xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED) 208 xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
208 209
209extern xfs_buf_t *pagebuf_lookup(
210 xfs_buftarg_t *,
211 loff_t, /* starting offset of range */
212 size_t, /* length of range */
213 page_buf_flags_t); /* PBF_READ, PBF_WRITE, */
214 /* PBF_FORCEIO, */
215
216extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ 210extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */
217 /* no memory or disk address */ 211 /* no memory or disk address */
218 size_t len, 212 size_t len,
@@ -344,8 +338,6 @@ extern void pagebuf_trace(
344 338
345 339
346 340
347
348
349/* These are just for xfs_syncsub... it sets an internal variable 341/* These are just for xfs_syncsub... it sets an internal variable
350 * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t 342 * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
351 */ 343 */
@@ -452,7 +444,7 @@ extern void pagebuf_trace(
452 444
453#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) 445#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr)
454 446
455extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) 447static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
456{ 448{
457 if (bp->pb_flags & PBF_MAPPED) 449 if (bp->pb_flags & PBF_MAPPED)
458 return XFS_BUF_PTR(bp) + offset; 450 return XFS_BUF_PTR(bp) + offset;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f1ce4323f56e..3881622bcf08 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -311,6 +311,31 @@ linvfs_fsync(
311 311
312#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) 312#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
313 313
314#ifdef CONFIG_XFS_DMAPI
315
316STATIC struct page *
317linvfs_filemap_nopage(
318 struct vm_area_struct *area,
319 unsigned long address,
320 int *type)
321{
322 struct inode *inode = area->vm_file->f_dentry->d_inode;
323 vnode_t *vp = LINVFS_GET_VP(inode);
324 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
325 int error;
326
327 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
328
329 error = XFS_SEND_MMAP(mp, area, 0);
330 if (error)
331 return NULL;
332
333 return filemap_nopage(area, address, type);
334}
335
336#endif /* CONFIG_XFS_DMAPI */
337
338
314STATIC int 339STATIC int
315linvfs_readdir( 340linvfs_readdir(
316 struct file *filp, 341 struct file *filp,
@@ -390,14 +415,6 @@ done:
390 return -error; 415 return -error;
391} 416}
392 417
393#ifdef CONFIG_XFS_DMAPI
394STATIC void
395linvfs_mmap_close(
396 struct vm_area_struct *vma)
397{
398 xfs_dm_mm_put(vma);
399}
400#endif /* CONFIG_XFS_DMAPI */
401 418
402STATIC int 419STATIC int
403linvfs_file_mmap( 420linvfs_file_mmap(
@@ -411,16 +428,11 @@ linvfs_file_mmap(
411 428
412 vma->vm_ops = &linvfs_file_vm_ops; 429 vma->vm_ops = &linvfs_file_vm_ops;
413 430
414 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
415 xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
416
417 error = -XFS_SEND_MMAP(mp, vma, 0);
418 if (error)
419 return error;
420#ifdef CONFIG_XFS_DMAPI 431#ifdef CONFIG_XFS_DMAPI
432 if (vp->v_vfsp->vfs_flag & VFS_DMI) {
421 vma->vm_ops = &linvfs_dmapi_file_vm_ops; 433 vma->vm_ops = &linvfs_dmapi_file_vm_ops;
422#endif
423 } 434 }
435#endif /* CONFIG_XFS_DMAPI */
424 436
425 VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); 437 VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
426 if (!error) 438 if (!error)
@@ -474,6 +486,7 @@ linvfs_ioctl_invis(
474 return error; 486 return error;
475} 487}
476 488
489#ifdef CONFIG_XFS_DMAPI
477#ifdef HAVE_VMOP_MPROTECT 490#ifdef HAVE_VMOP_MPROTECT
478STATIC int 491STATIC int
479linvfs_mprotect( 492linvfs_mprotect(
@@ -494,6 +507,7 @@ linvfs_mprotect(
494 return error; 507 return error;
495} 508}
496#endif /* HAVE_VMOP_MPROTECT */ 509#endif /* HAVE_VMOP_MPROTECT */
510#endif /* CONFIG_XFS_DMAPI */
497 511
498#ifdef HAVE_FOP_OPEN_EXEC 512#ifdef HAVE_FOP_OPEN_EXEC
499/* If the user is attempting to execute a file that is offline then 513/* If the user is attempting to execute a file that is offline then
@@ -528,49 +542,10 @@ open_exec_out:
528} 542}
529#endif /* HAVE_FOP_OPEN_EXEC */ 543#endif /* HAVE_FOP_OPEN_EXEC */
530 544
531/*
532 * Temporary workaround to the AIO direct IO write problem.
533 * This code can go and we can revert to do_sync_write once
534 * the writepage(s) rework is merged.
535 */
536STATIC ssize_t
537linvfs_write(
538 struct file *filp,
539 const char __user *buf,
540 size_t len,
541 loff_t *ppos)
542{
543 struct kiocb kiocb;
544 ssize_t ret;
545
546 init_sync_kiocb(&kiocb, filp);
547 kiocb.ki_pos = *ppos;
548 ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos);
549 *ppos = kiocb.ki_pos;
550 return ret;
551}
552STATIC ssize_t
553linvfs_write_invis(
554 struct file *filp,
555 const char __user *buf,
556 size_t len,
557 loff_t *ppos)
558{
559 struct kiocb kiocb;
560 ssize_t ret;
561
562 init_sync_kiocb(&kiocb, filp);
563 kiocb.ki_pos = *ppos;
564 ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos);
565 *ppos = kiocb.ki_pos;
566 return ret;
567}
568
569
570struct file_operations linvfs_file_operations = { 545struct file_operations linvfs_file_operations = {
571 .llseek = generic_file_llseek, 546 .llseek = generic_file_llseek,
572 .read = do_sync_read, 547 .read = do_sync_read,
573 .write = linvfs_write, 548 .write = do_sync_write,
574 .readv = linvfs_readv, 549 .readv = linvfs_readv,
575 .writev = linvfs_writev, 550 .writev = linvfs_writev,
576 .aio_read = linvfs_aio_read, 551 .aio_read = linvfs_aio_read,
@@ -592,7 +567,7 @@ struct file_operations linvfs_file_operations = {
592struct file_operations linvfs_invis_file_operations = { 567struct file_operations linvfs_invis_file_operations = {
593 .llseek = generic_file_llseek, 568 .llseek = generic_file_llseek,
594 .read = do_sync_read, 569 .read = do_sync_read,
595 .write = linvfs_write_invis, 570 .write = do_sync_write,
596 .readv = linvfs_readv_invis, 571 .readv = linvfs_readv_invis,
597 .writev = linvfs_writev_invis, 572 .writev = linvfs_writev_invis,
598 .aio_read = linvfs_aio_read_invis, 573 .aio_read = linvfs_aio_read_invis,
@@ -626,8 +601,7 @@ static struct vm_operations_struct linvfs_file_vm_ops = {
626 601
627#ifdef CONFIG_XFS_DMAPI 602#ifdef CONFIG_XFS_DMAPI
628static struct vm_operations_struct linvfs_dmapi_file_vm_ops = { 603static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
629 .close = linvfs_mmap_close, 604 .nopage = linvfs_filemap_nopage,
630 .nopage = filemap_nopage,
631 .populate = filemap_populate, 605 .populate = filemap_populate,
632#ifdef HAVE_VMOP_MPROTECT 606#ifdef HAVE_VMOP_MPROTECT
633 .mprotect = linvfs_mprotect, 607 .mprotect = linvfs_mprotect,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 05a447e51cc0..6a3326bcd8d0 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -141,13 +141,19 @@ xfs_find_handle(
141 return -XFS_ERROR(EINVAL); 141 return -XFS_ERROR(EINVAL);
142 } 142 }
143 143
144 /* we need the vnode */ 144 switch (inode->i_mode & S_IFMT) {
145 vp = LINVFS_GET_VP(inode); 145 case S_IFREG:
146 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { 146 case S_IFDIR:
147 case S_IFLNK:
148 break;
149 default:
147 iput(inode); 150 iput(inode);
148 return -XFS_ERROR(EBADF); 151 return -XFS_ERROR(EBADF);
149 } 152 }
150 153
154 /* we need the vnode */
155 vp = LINVFS_GET_VP(inode);
156
151 /* now we can grab the fsid */ 157 /* now we can grab the fsid */
152 memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); 158 memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
153 hsize = sizeof(xfs_fsid_t); 159 hsize = sizeof(xfs_fsid_t);
@@ -386,7 +392,7 @@ xfs_readlink_by_handle(
386 return -error; 392 return -error;
387 393
388 /* Restrict this handle operation to symlinks only. */ 394 /* Restrict this handle operation to symlinks only. */
389 if (vp->v_type != VLNK) { 395 if (!S_ISLNK(inode->i_mode)) {
390 VN_RELE(vp); 396 VN_RELE(vp);
391 return -XFS_ERROR(EINVAL); 397 return -XFS_ERROR(EINVAL);
392 } 398 }
@@ -982,10 +988,10 @@ xfs_ioc_space(
982 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) 988 if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
983 return -XFS_ERROR(EPERM); 989 return -XFS_ERROR(EPERM);
984 990
985 if (!(filp->f_flags & FMODE_WRITE)) 991 if (!(filp->f_mode & FMODE_WRITE))
986 return -XFS_ERROR(EBADF); 992 return -XFS_ERROR(EBADF);
987 993
988 if (vp->v_type != VREG) 994 if (!VN_ISREG(vp))
989 return -XFS_ERROR(EINVAL); 995 return -XFS_ERROR(EINVAL);
990 996
991 if (copy_from_user(&bf, arg, sizeof(bf))) 997 if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0f8f1384eb36..4636b7f86f1f 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -47,8 +47,52 @@
47#include "xfs_vnode.h" 47#include "xfs_vnode.h"
48#include "xfs_dfrag.h" 48#include "xfs_dfrag.h"
49 49
50#define _NATIVE_IOC(cmd, type) \
51 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
52
50#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 53#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
51#define BROKEN_X86_ALIGNMENT 54#define BROKEN_X86_ALIGNMENT
55/* on ia32 l_start is on a 32-bit boundary */
56typedef struct xfs_flock64_32 {
57 __s16 l_type;
58 __s16 l_whence;
59 __s64 l_start __attribute__((packed));
60 /* len == 0 means until end of file */
61 __s64 l_len __attribute__((packed));
62 __s32 l_sysid;
63 __u32 l_pid;
64 __s32 l_pad[4]; /* reserve area */
65} xfs_flock64_32_t;
66
67#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32)
68#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32)
69#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32)
70#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32)
71#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32)
72#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32)
73#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32)
74#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32)
75
76/* just account for different alignment */
77STATIC unsigned long
78xfs_ioctl32_flock(
79 unsigned long arg)
80{
81 xfs_flock64_32_t __user *p32 = (void __user *)arg;
82 xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p));
83
84 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
85 copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) ||
86 copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) ||
87 copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) ||
88 copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) ||
89 copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) ||
90 copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
91 return -EFAULT;
92
93 return (unsigned long)p;
94}
95
52#else 96#else
53 97
54typedef struct xfs_fsop_bulkreq32 { 98typedef struct xfs_fsop_bulkreq32 {
@@ -103,7 +147,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
103/* not handled 147/* not handled
104 case XFS_IOC_FD_TO_HANDLE: 148 case XFS_IOC_FD_TO_HANDLE:
105 case XFS_IOC_PATH_TO_HANDLE: 149 case XFS_IOC_PATH_TO_HANDLE:
106 case XFS_IOC_PATH_TO_HANDLE:
107 case XFS_IOC_PATH_TO_FSHANDLE: 150 case XFS_IOC_PATH_TO_FSHANDLE:
108 case XFS_IOC_OPEN_BY_HANDLE: 151 case XFS_IOC_OPEN_BY_HANDLE:
109 case XFS_IOC_FSSETDM_BY_HANDLE: 152 case XFS_IOC_FSSETDM_BY_HANDLE:
@@ -124,8 +167,21 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
124 case XFS_IOC_ERROR_CLEARALL: 167 case XFS_IOC_ERROR_CLEARALL:
125 break; 168 break;
126 169
127#ifndef BROKEN_X86_ALIGNMENT 170#ifdef BROKEN_X86_ALIGNMENT
128 /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */ 171 /* xfs_flock_t has wrong u32 vs u64 alignment */
172 case XFS_IOC_ALLOCSP_32:
173 case XFS_IOC_FREESP_32:
174 case XFS_IOC_ALLOCSP64_32:
175 case XFS_IOC_FREESP64_32:
176 case XFS_IOC_RESVSP_32:
177 case XFS_IOC_UNRESVSP_32:
178 case XFS_IOC_RESVSP64_32:
179 case XFS_IOC_UNRESVSP64_32:
180 arg = xfs_ioctl32_flock(arg);
181 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
182 break;
183
184#else /* These are handled fine if no alignment issues */
129 case XFS_IOC_ALLOCSP: 185 case XFS_IOC_ALLOCSP:
130 case XFS_IOC_FREESP: 186 case XFS_IOC_FREESP:
131 case XFS_IOC_RESVSP: 187 case XFS_IOC_RESVSP:
@@ -134,6 +190,9 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
134 case XFS_IOC_FREESP64: 190 case XFS_IOC_FREESP64:
135 case XFS_IOC_RESVSP64: 191 case XFS_IOC_RESVSP64:
136 case XFS_IOC_UNRESVSP64: 192 case XFS_IOC_UNRESVSP64:
193 break;
194
195 /* xfs_bstat_t still has wrong u32 vs u64 alignment */
137 case XFS_IOC_SWAPEXT: 196 case XFS_IOC_SWAPEXT:
138 break; 197 break;
139 198
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index f252605514eb..77708a8c9f87 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -140,7 +140,6 @@ linvfs_mknod(
140 140
141 memset(&va, 0, sizeof(va)); 141 memset(&va, 0, sizeof(va));
142 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; 142 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
143 va.va_type = IFTOVT(mode);
144 va.va_mode = mode; 143 va.va_mode = mode;
145 144
146 switch (mode & S_IFMT) { 145 switch (mode & S_IFMT) {
@@ -308,14 +307,13 @@ linvfs_symlink(
308 cvp = NULL; 307 cvp = NULL;
309 308
310 memset(&va, 0, sizeof(va)); 309 memset(&va, 0, sizeof(va));
311 va.va_type = VLNK; 310 va.va_mode = S_IFLNK |
312 va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; 311 (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
313 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; 312 va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
314 313
315 error = 0; 314 error = 0;
316 VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); 315 VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
317 if (!error && cvp) { 316 if (!error && cvp) {
318 ASSERT(cvp->v_type == VLNK);
319 ip = LINVFS_GET_IP(cvp); 317 ip = LINVFS_GET_IP(cvp);
320 d_instantiate(dentry, ip); 318 d_instantiate(dentry, ip);
321 validate_fields(dir); 319 validate_fields(dir);
@@ -425,9 +423,14 @@ linvfs_follow_link(
425 return NULL; 423 return NULL;
426} 424}
427 425
428static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) 426STATIC void
427linvfs_put_link(
428 struct dentry *dentry,
429 struct nameidata *nd,
430 void *p)
429{ 431{
430 char *s = nd_get_link(nd); 432 char *s = nd_get_link(nd);
433
431 if (!IS_ERR(s)) 434 if (!IS_ERR(s))
432 kfree(s); 435 kfree(s);
433} 436}
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 42dc5e4662ed..68c5d885ed9c 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -64,7 +64,6 @@
64#include <sema.h> 64#include <sema.h>
65#include <time.h> 65#include <time.h>
66 66
67#include <support/qsort.h>
68#include <support/ktrace.h> 67#include <support/ktrace.h>
69#include <support/debug.h> 68#include <support/debug.h>
70#include <support/move.h> 69#include <support/move.h>
@@ -104,6 +103,7 @@
104#include <xfs_stats.h> 103#include <xfs_stats.h>
105#include <xfs_sysctl.h> 104#include <xfs_sysctl.h>
106#include <xfs_iops.h> 105#include <xfs_iops.h>
106#include <xfs_aops.h>
107#include <xfs_super.h> 107#include <xfs_super.h>
108#include <xfs_globals.h> 108#include <xfs_globals.h>
109#include <xfs_fs_subr.h> 109#include <xfs_fs_subr.h>
@@ -254,11 +254,18 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
254#define MAX(a,b) (max(a,b)) 254#define MAX(a,b) (max(a,b))
255#define howmany(x, y) (((x)+((y)-1))/(y)) 255#define howmany(x, y) (((x)+((y)-1))/(y))
256#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) 256#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
257#define qsort(a,n,s,fn) sort(a,n,s,fn,NULL)
257 258
259/*
260 * Various platform dependent calls that don't fit anywhere else
261 */
258#define xfs_stack_trace() dump_stack() 262#define xfs_stack_trace() dump_stack()
259
260#define xfs_itruncate_data(ip, off) \ 263#define xfs_itruncate_data(ip, off) \
261 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) 264 (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
265#define xfs_statvfs_fsid(statp, mp) \
266 ({ u64 id = huge_encode_dev((mp)->m_dev); \
267 __kernel_fsid_t *fsid = &(statp)->f_fsid; \
268 (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
262 269
263 270
264/* Move the kernel do_div definition off to one side */ 271/* Move the kernel do_div definition off to one side */
@@ -371,6 +378,4 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
371 return(x * y); 378 return(x * y);
372} 379}
373 380
374#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL)
375
376#endif /* __XFS_LINUX__ */ 381#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index acab58c48043..3b5fabe8dae9 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -660,9 +660,6 @@ xfs_write(
660 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? 660 (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
661 mp->m_rtdev_targp : mp->m_ddev_targp; 661 mp->m_rtdev_targp : mp->m_ddev_targp;
662 662
663 if (ioflags & IO_ISAIO)
664 return XFS_ERROR(-ENOSYS);
665
666 if ((pos & target->pbr_smask) || (count & target->pbr_smask)) 663 if ((pos & target->pbr_smask) || (count & target->pbr_smask))
667 return XFS_ERROR(-EINVAL); 664 return XFS_ERROR(-EINVAL);
668 665
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index f197a720e394..6294dcdb797c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -70,9 +70,10 @@ struct xfs_iomap;
70#define XFS_SENDFILE_ENTER 21 70#define XFS_SENDFILE_ENTER 21
71#define XFS_WRITEPAGE_ENTER 22 71#define XFS_WRITEPAGE_ENTER 22
72#define XFS_RELEASEPAGE_ENTER 23 72#define XFS_RELEASEPAGE_ENTER 23
73#define XFS_IOMAP_ALLOC_ENTER 24 73#define XFS_INVALIDPAGE_ENTER 24
74#define XFS_IOMAP_ALLOC_MAP 25 74#define XFS_IOMAP_ALLOC_ENTER 25
75#define XFS_IOMAP_UNWRITTEN 26 75#define XFS_IOMAP_ALLOC_MAP 26
76#define XFS_IOMAP_UNWRITTEN 27
76extern void xfs_rw_enter_trace(int, struct xfs_iocore *, 77extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
77 void *, size_t, loff_t, int); 78 void *, size_t, loff_t, int);
78extern void xfs_inval_cached_trace(struct xfs_iocore *, 79extern void xfs_inval_cached_trace(struct xfs_iocore *,
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f6dd7de25927..0da87bfc9999 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -70,11 +70,15 @@
70#include <linux/namei.h> 70#include <linux/namei.h>
71#include <linux/init.h> 71#include <linux/init.h>
72#include <linux/mount.h> 72#include <linux/mount.h>
73#include <linux/mempool.h>
73#include <linux/writeback.h> 74#include <linux/writeback.h>
75#include <linux/kthread.h>
74 76
75STATIC struct quotactl_ops linvfs_qops; 77STATIC struct quotactl_ops linvfs_qops;
76STATIC struct super_operations linvfs_sops; 78STATIC struct super_operations linvfs_sops;
77STATIC kmem_zone_t *linvfs_inode_zone; 79STATIC kmem_zone_t *xfs_vnode_zone;
80STATIC kmem_zone_t *xfs_ioend_zone;
81mempool_t *xfs_ioend_pool;
78 82
79STATIC struct xfs_mount_args * 83STATIC struct xfs_mount_args *
80xfs_args_allocate( 84xfs_args_allocate(
@@ -138,24 +142,25 @@ STATIC __inline__ void
138xfs_set_inodeops( 142xfs_set_inodeops(
139 struct inode *inode) 143 struct inode *inode)
140{ 144{
141 vnode_t *vp = LINVFS_GET_VP(inode); 145 switch (inode->i_mode & S_IFMT) {
142 146 case S_IFREG:
143 if (vp->v_type == VNON) {
144 vn_mark_bad(vp);
145 } else if (S_ISREG(inode->i_mode)) {
146 inode->i_op = &linvfs_file_inode_operations; 147 inode->i_op = &linvfs_file_inode_operations;
147 inode->i_fop = &linvfs_file_operations; 148 inode->i_fop = &linvfs_file_operations;
148 inode->i_mapping->a_ops = &linvfs_aops; 149 inode->i_mapping->a_ops = &linvfs_aops;
149 } else if (S_ISDIR(inode->i_mode)) { 150 break;
151 case S_IFDIR:
150 inode->i_op = &linvfs_dir_inode_operations; 152 inode->i_op = &linvfs_dir_inode_operations;
151 inode->i_fop = &linvfs_dir_operations; 153 inode->i_fop = &linvfs_dir_operations;
152 } else if (S_ISLNK(inode->i_mode)) { 154 break;
155 case S_IFLNK:
153 inode->i_op = &linvfs_symlink_inode_operations; 156 inode->i_op = &linvfs_symlink_inode_operations;
154 if (inode->i_blocks) 157 if (inode->i_blocks)
155 inode->i_mapping->a_ops = &linvfs_aops; 158 inode->i_mapping->a_ops = &linvfs_aops;
156 } else { 159 break;
160 default:
157 inode->i_op = &linvfs_file_inode_operations; 161 inode->i_op = &linvfs_file_inode_operations;
158 init_special_inode(inode, inode->i_mode, inode->i_rdev); 162 init_special_inode(inode, inode->i_mode, inode->i_rdev);
163 break;
159 } 164 }
160} 165}
161 166
@@ -167,16 +172,23 @@ xfs_revalidate_inode(
167{ 172{
168 struct inode *inode = LINVFS_GET_IP(vp); 173 struct inode *inode = LINVFS_GET_IP(vp);
169 174
170 inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); 175 inode->i_mode = ip->i_d.di_mode;
171 inode->i_nlink = ip->i_d.di_nlink; 176 inode->i_nlink = ip->i_d.di_nlink;
172 inode->i_uid = ip->i_d.di_uid; 177 inode->i_uid = ip->i_d.di_uid;
173 inode->i_gid = ip->i_d.di_gid; 178 inode->i_gid = ip->i_d.di_gid;
174 if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { 179
180 switch (inode->i_mode & S_IFMT) {
181 case S_IFBLK:
182 case S_IFCHR:
183 inode->i_rdev =
184 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
185 sysv_minor(ip->i_df.if_u2.if_rdev));
186 break;
187 default:
175 inode->i_rdev = 0; 188 inode->i_rdev = 0;
176 } else { 189 break;
177 xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
178 inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
179 } 190 }
191
180 inode->i_blksize = PAGE_CACHE_SIZE; 192 inode->i_blksize = PAGE_CACHE_SIZE;
181 inode->i_generation = ip->i_d.di_gen; 193 inode->i_generation = ip->i_d.di_gen;
182 i_size_write(inode, ip->i_d.di_size); 194 i_size_write(inode, ip->i_d.di_size);
@@ -231,7 +243,6 @@ xfs_initialize_vnode(
231 * finish our work. 243 * finish our work.
232 */ 244 */
233 if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) { 245 if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
234 vp->v_type = IFTOVT(ip->i_d.di_mode);
235 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); 246 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
236 xfs_set_inodeops(inode); 247 xfs_set_inodeops(inode);
237 248
@@ -274,8 +285,7 @@ linvfs_alloc_inode(
274{ 285{
275 vnode_t *vp; 286 vnode_t *vp;
276 287
277 vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, 288 vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
278 kmem_flags_convert(KM_SLEEP));
279 if (!vp) 289 if (!vp)
280 return NULL; 290 return NULL;
281 return LINVFS_GET_IP(vp); 291 return LINVFS_GET_IP(vp);
@@ -285,11 +295,11 @@ STATIC void
285linvfs_destroy_inode( 295linvfs_destroy_inode(
286 struct inode *inode) 296 struct inode *inode)
287{ 297{
288 kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode)); 298 kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
289} 299}
290 300
291STATIC void 301STATIC void
292init_once( 302linvfs_inode_init_once(
293 void *data, 303 void *data,
294 kmem_cache_t *cachep, 304 kmem_cache_t *cachep,
295 unsigned long flags) 305 unsigned long flags)
@@ -302,21 +312,41 @@ init_once(
302} 312}
303 313
304STATIC int 314STATIC int
305init_inodecache( void ) 315linvfs_init_zones(void)
306{ 316{
307 linvfs_inode_zone = kmem_cache_create("linvfs_icache", 317 xfs_vnode_zone = kmem_cache_create("xfs_vnode",
308 sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, 318 sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
309 init_once, NULL); 319 linvfs_inode_init_once, NULL);
310 if (linvfs_inode_zone == NULL) 320 if (!xfs_vnode_zone)
311 return -ENOMEM; 321 goto out;
322
323 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
324 if (!xfs_ioend_zone)
325 goto out_destroy_vnode_zone;
326
327 xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
328 mempool_alloc_slab, mempool_free_slab,
329 xfs_ioend_zone);
330 if (!xfs_ioend_pool)
331 goto out_free_ioend_zone;
332
312 return 0; 333 return 0;
334
335
336 out_free_ioend_zone:
337 kmem_zone_destroy(xfs_ioend_zone);
338 out_destroy_vnode_zone:
339 kmem_zone_destroy(xfs_vnode_zone);
340 out:
341 return -ENOMEM;
313} 342}
314 343
315STATIC void 344STATIC void
316destroy_inodecache( void ) 345linvfs_destroy_zones(void)
317{ 346{
318 if (kmem_cache_destroy(linvfs_inode_zone)) 347 mempool_destroy(xfs_ioend_pool);
319 printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); 348 kmem_zone_destroy(xfs_vnode_zone);
349 kmem_zone_destroy(xfs_ioend_zone);
320} 350}
321 351
322/* 352/*
@@ -354,17 +384,38 @@ linvfs_clear_inode(
354 struct inode *inode) 384 struct inode *inode)
355{ 385{
356 vnode_t *vp = LINVFS_GET_VP(inode); 386 vnode_t *vp = LINVFS_GET_VP(inode);
387 int error, cache;
357 388
358 if (vp) { 389 vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address);
359 vn_rele(vp); 390
360 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 391 XFS_STATS_INC(vn_rele);
361 /* 392 XFS_STATS_INC(vn_remove);
362 * Do all our cleanup, and remove this vnode. 393 XFS_STATS_INC(vn_reclaim);
363 */ 394 XFS_STATS_DEC(vn_active);
364 vn_remove(vp); 395
396 /*
397 * This can happen because xfs_iget_core calls xfs_idestroy if we
398 * find an inode with di_mode == 0 but without IGET_CREATE set.
399 */
400 if (vp->v_fbhv)
401 VOP_INACTIVE(vp, NULL, cache);
402
403 VN_LOCK(vp);
404 vp->v_flag &= ~VMODIFIED;
405 VN_UNLOCK(vp, 0);
406
407 if (vp->v_fbhv) {
408 VOP_RECLAIM(vp, error);
409 if (error)
410 panic("vn_purge: cannot reclaim");
365 } 411 }
366}
367 412
413 ASSERT(vp->v_fbhv == NULL);
414
415#ifdef XFS_VNODE_TRACE
416 ktrace_free(vp->v_trace);
417#endif
418}
368 419
369/* 420/*
370 * Enqueue a work item to be picked up by the vfs xfssyncd thread. 421 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
@@ -466,25 +517,16 @@ xfssyncd(
466{ 517{
467 long timeleft; 518 long timeleft;
468 vfs_t *vfsp = (vfs_t *) arg; 519 vfs_t *vfsp = (vfs_t *) arg;
469 struct list_head tmp;
470 struct vfs_sync_work *work, *n; 520 struct vfs_sync_work *work, *n;
521 LIST_HEAD (tmp);
471 522
472 daemonize("xfssyncd");
473
474 vfsp->vfs_sync_work.w_vfs = vfsp;
475 vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
476 vfsp->vfs_sync_task = current;
477 wmb();
478 wake_up(&vfsp->vfs_wait_sync_task);
479
480 INIT_LIST_HEAD(&tmp);
481 timeleft = (xfs_syncd_centisecs * HZ) / 100; 523 timeleft = (xfs_syncd_centisecs * HZ) / 100;
482 for (;;) { 524 for (;;) {
483 set_current_state(TASK_INTERRUPTIBLE); 525 set_current_state(TASK_INTERRUPTIBLE);
484 timeleft = schedule_timeout(timeleft); 526 timeleft = schedule_timeout(timeleft);
485 /* swsusp */ 527 /* swsusp */
486 try_to_freeze(); 528 try_to_freeze();
487 if (vfsp->vfs_flag & VFS_UMOUNT) 529 if (kthread_should_stop())
488 break; 530 break;
489 531
490 spin_lock(&vfsp->vfs_sync_lock); 532 spin_lock(&vfsp->vfs_sync_lock);
@@ -513,10 +555,6 @@ xfssyncd(
513 } 555 }
514 } 556 }
515 557
516 vfsp->vfs_sync_task = NULL;
517 wmb();
518 wake_up(&vfsp->vfs_wait_sync_task);
519
520 return 0; 558 return 0;
521} 559}
522 560
@@ -524,13 +562,11 @@ STATIC int
524linvfs_start_syncd( 562linvfs_start_syncd(
525 vfs_t *vfsp) 563 vfs_t *vfsp)
526{ 564{
527 int pid; 565 vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
528 566 vfsp->vfs_sync_work.w_vfs = vfsp;
529 pid = kernel_thread(xfssyncd, (void *) vfsp, 567 vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd");
530 CLONE_VM | CLONE_FS | CLONE_FILES); 568 if (IS_ERR(vfsp->vfs_sync_task))
531 if (pid < 0) 569 return -PTR_ERR(vfsp->vfs_sync_task);
532 return -pid;
533 wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
534 return 0; 570 return 0;
535} 571}
536 572
@@ -538,11 +574,7 @@ STATIC void
538linvfs_stop_syncd( 574linvfs_stop_syncd(
539 vfs_t *vfsp) 575 vfs_t *vfsp)
540{ 576{
541 vfsp->vfs_flag |= VFS_UMOUNT; 577 kthread_stop(vfsp->vfs_sync_task);
542 wmb();
543
544 wake_up_process(vfsp->vfs_sync_task);
545 wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
546} 578}
547 579
548STATIC void 580STATIC void
@@ -866,9 +898,9 @@ init_xfs_fs( void )
866 898
867 ktrace_init(64); 899 ktrace_init(64);
868 900
869 error = init_inodecache(); 901 error = linvfs_init_zones();
870 if (error < 0) 902 if (error < 0)
871 goto undo_inodecache; 903 goto undo_zones;
872 904
873 error = pagebuf_init(); 905 error = pagebuf_init();
874 if (error < 0) 906 if (error < 0)
@@ -889,9 +921,9 @@ undo_register:
889 pagebuf_terminate(); 921 pagebuf_terminate();
890 922
891undo_pagebuf: 923undo_pagebuf:
892 destroy_inodecache(); 924 linvfs_destroy_zones();
893 925
894undo_inodecache: 926undo_zones:
895 return error; 927 return error;
896} 928}
897 929
@@ -903,7 +935,7 @@ exit_xfs_fs( void )
903 unregister_filesystem(&xfs_fs_type); 935 unregister_filesystem(&xfs_fs_type);
904 xfs_cleanup(); 936 xfs_cleanup();
905 pagebuf_terminate(); 937 pagebuf_terminate();
906 destroy_inodecache(); 938 linvfs_destroy_zones();
907 ktrace_uninit(); 939 ktrace_uninit();
908} 940}
909 941
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
index 669c61644959..34cc902ec119 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.c
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -251,7 +251,6 @@ vfs_allocate( void )
251 bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); 251 bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
252 INIT_LIST_HEAD(&vfsp->vfs_sync_list); 252 INIT_LIST_HEAD(&vfsp->vfs_sync_list);
253 spin_lock_init(&vfsp->vfs_sync_lock); 253 spin_lock_init(&vfsp->vfs_sync_lock);
254 init_waitqueue_head(&vfsp->vfs_wait_sync_task);
255 init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); 254 init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
256 return vfsp; 255 return vfsp;
257} 256}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 7ee1f714e9ba..f0ab574fb47a 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -65,7 +65,6 @@ typedef struct vfs {
65 spinlock_t vfs_sync_lock; /* work item list lock */ 65 spinlock_t vfs_sync_lock; /* work item list lock */
66 int vfs_sync_seq; /* sync thread generation no. */ 66 int vfs_sync_seq; /* sync thread generation no. */
67 wait_queue_head_t vfs_wait_single_sync_task; 67 wait_queue_head_t vfs_wait_single_sync_task;
68 wait_queue_head_t vfs_wait_sync_task;
69} vfs_t; 68} vfs_t;
70 69
71#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ 70#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */
@@ -96,7 +95,6 @@ typedef enum {
96#define VFS_RDONLY 0x0001 /* read-only vfs */ 95#define VFS_RDONLY 0x0001 /* read-only vfs */
97#define VFS_GRPID 0x0002 /* group-ID assigned from directory */ 96#define VFS_GRPID 0x0002 /* group-ID assigned from directory */
98#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ 97#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */
99#define VFS_UMOUNT 0x0008 /* unmount in progress */
100#define VFS_END 0x0008 /* max flag */ 98#define VFS_END 0x0008 /* max flag */
101 99
102#define SYNC_ATTR 0x0001 /* sync attributes */ 100#define SYNC_ATTR 0x0001 /* sync attributes */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 250cad54e892..268f45bf6a9a 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -42,93 +42,33 @@ DEFINE_SPINLOCK(vnumber_lock);
42 */ 42 */
43#define NVSYNC 37 43#define NVSYNC 37
44#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) 44#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
45sv_t vsync[NVSYNC]; 45STATIC wait_queue_head_t vsync[NVSYNC];
46
47/*
48 * Translate stat(2) file types to vnode types and vice versa.
49 * Aware of numeric order of S_IFMT and vnode type values.
50 */
51enum vtype iftovt_tab[] = {
52 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
53 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
54};
55
56u_short vttoif_tab[] = {
57 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
58};
59 46
60 47
61void 48void
62vn_init(void) 49vn_init(void)
63{ 50{
64 register sv_t *svp; 51 int i;
65 register int i;
66 52
67 for (svp = vsync, i = 0; i < NVSYNC; i++, svp++) 53 for (i = 0; i < NVSYNC; i++)
68 init_sv(svp, SV_DEFAULT, "vsy", i); 54 init_waitqueue_head(&vsync[i]);
69} 55}
70 56
71/* 57void
72 * Clean a vnode of filesystem-specific data and prepare it for reuse. 58vn_iowait(
73 */
74STATIC int
75vn_reclaim(
76 struct vnode *vp) 59 struct vnode *vp)
77{ 60{
78 int error; 61 wait_queue_head_t *wq = vptosync(vp);
79 62
80 XFS_STATS_INC(vn_reclaim); 63 wait_event(*wq, (atomic_read(&vp->v_iocount) == 0));
81 vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
82
83 /*
84 * Only make the VOP_RECLAIM call if there are behaviors
85 * to call.
86 */
87 if (vp->v_fbhv) {
88 VOP_RECLAIM(vp, error);
89 if (error)
90 return -error;
91 }
92 ASSERT(vp->v_fbhv == NULL);
93
94 VN_LOCK(vp);
95 vp->v_flag &= (VRECLM|VWAIT);
96 VN_UNLOCK(vp, 0);
97
98 vp->v_type = VNON;
99 vp->v_fbhv = NULL;
100
101#ifdef XFS_VNODE_TRACE
102 ktrace_free(vp->v_trace);
103 vp->v_trace = NULL;
104#endif
105
106 return 0;
107}
108
109STATIC void
110vn_wakeup(
111 struct vnode *vp)
112{
113 VN_LOCK(vp);
114 if (vp->v_flag & VWAIT)
115 sv_broadcast(vptosync(vp));
116 vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
117 VN_UNLOCK(vp, 0);
118} 64}
119 65
120int 66void
121vn_wait( 67vn_iowake(
122 struct vnode *vp) 68 struct vnode *vp)
123{ 69{
124 VN_LOCK(vp); 70 if (atomic_dec_and_test(&vp->v_iocount))
125 if (vp->v_flag & (VINACT | VRECLM)) { 71 wake_up(vptosync(vp));
126 vp->v_flag |= VWAIT;
127 sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
128 return 1;
129 }
130 VN_UNLOCK(vp, 0);
131 return 0;
132} 72}
133 73
134struct vnode * 74struct vnode *
@@ -154,6 +94,8 @@ vn_initialize(
154 /* Initialize the first behavior and the behavior chain head. */ 94 /* Initialize the first behavior and the behavior chain head. */
155 vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); 95 vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
156 96
97 atomic_set(&vp->v_iocount, 0);
98
157#ifdef XFS_VNODE_TRACE 99#ifdef XFS_VNODE_TRACE
158 vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); 100 vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
159#endif /* XFS_VNODE_TRACE */ 101#endif /* XFS_VNODE_TRACE */
@@ -163,30 +105,6 @@ vn_initialize(
163} 105}
164 106
165/* 107/*
166 * Get a reference on a vnode.
167 */
168vnode_t *
169vn_get(
170 struct vnode *vp,
171 vmap_t *vmap)
172{
173 struct inode *inode;
174
175 XFS_STATS_INC(vn_get);
176 inode = LINVFS_GET_IP(vp);
177 if (inode->i_state & I_FREEING)
178 return NULL;
179
180 inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
181 if (!inode) /* Inode not present */
182 return NULL;
183
184 vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
185
186 return vp;
187}
188
189/*
190 * Revalidate the Linux inode from the vattr. 108 * Revalidate the Linux inode from the vattr.
191 * Note: i_size _not_ updated; we must hold the inode 109 * Note: i_size _not_ updated; we must hold the inode
192 * semaphore when doing that - callers responsibility. 110 * semaphore when doing that - callers responsibility.
@@ -198,7 +116,7 @@ vn_revalidate_core(
198{ 116{
199 struct inode *inode = LINVFS_GET_IP(vp); 117 struct inode *inode = LINVFS_GET_IP(vp);
200 118
201 inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode; 119 inode->i_mode = vap->va_mode;
202 inode->i_nlink = vap->va_nlink; 120 inode->i_nlink = vap->va_nlink;
203 inode->i_uid = vap->va_uid; 121 inode->i_uid = vap->va_uid;
204 inode->i_gid = vap->va_gid; 122 inode->i_gid = vap->va_gid;
@@ -247,71 +165,6 @@ vn_revalidate(
247} 165}
248 166
249/* 167/*
250 * purge a vnode from the cache
251 * At this point the vnode is guaranteed to have no references (vn_count == 0)
252 * The caller has to make sure that there are no ways someone could
253 * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
254 */
255void
256vn_purge(
257 struct vnode *vp,
258 vmap_t *vmap)
259{
260 vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
261
262again:
263 /*
264 * Check whether vp has already been reclaimed since our caller
265 * sampled its version while holding a filesystem cache lock that
266 * its VOP_RECLAIM function acquires.
267 */
268 VN_LOCK(vp);
269 if (vp->v_number != vmap->v_number) {
270 VN_UNLOCK(vp, 0);
271 return;
272 }
273
274 /*
275 * If vp is being reclaimed or inactivated, wait until it is inert,
276 * then proceed. Can't assume that vnode is actually reclaimed
277 * just because the reclaimed flag is asserted -- a vn_alloc
278 * reclaim can fail.
279 */
280 if (vp->v_flag & (VINACT | VRECLM)) {
281 ASSERT(vn_count(vp) == 0);
282 vp->v_flag |= VWAIT;
283 sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
284 goto again;
285 }
286
287 /*
288 * Another process could have raced in and gotten this vnode...
289 */
290 if (vn_count(vp) > 0) {
291 VN_UNLOCK(vp, 0);
292 return;
293 }
294
295 XFS_STATS_DEC(vn_active);
296 vp->v_flag |= VRECLM;
297 VN_UNLOCK(vp, 0);
298
299 /*
300 * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
301 * vp's filesystem to flush and invalidate all cached resources.
302 * When vn_reclaim returns, vp should have no private data,
303 * either in a system cache or attached to v_data.
304 */
305 if (vn_reclaim(vp) != 0)
306 panic("vn_purge: cannot reclaim");
307
308 /*
309 * Wakeup anyone waiting for vp to be reclaimed.
310 */
311 vn_wakeup(vp);
312}
313
314/*
315 * Add a reference to a referenced vnode. 168 * Add a reference to a referenced vnode.
316 */ 169 */
317struct vnode * 170struct vnode *
@@ -330,80 +183,6 @@ vn_hold(
330 return vp; 183 return vp;
331} 184}
332 185
333/*
334 * Call VOP_INACTIVE on last reference.
335 */
336void
337vn_rele(
338 struct vnode *vp)
339{
340 int vcnt;
341 int cache;
342
343 XFS_STATS_INC(vn_rele);
344
345 VN_LOCK(vp);
346
347 vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
348 vcnt = vn_count(vp);
349
350 /*
351 * Since we always get called from put_inode we know
352 * that i_count won't be decremented after we
353 * return.
354 */
355 if (!vcnt) {
356 /*
357 * As soon as we turn this on, noone can find us in vn_get
358 * until we turn off VINACT or VRECLM
359 */
360 vp->v_flag |= VINACT;
361 VN_UNLOCK(vp, 0);
362
363 /*
364 * Do not make the VOP_INACTIVE call if there
365 * are no behaviors attached to the vnode to call.
366 */
367 if (vp->v_fbhv)
368 VOP_INACTIVE(vp, NULL, cache);
369
370 VN_LOCK(vp);
371 if (vp->v_flag & VWAIT)
372 sv_broadcast(vptosync(vp));
373
374 vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
375 }
376
377 VN_UNLOCK(vp, 0);
378
379 vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
380}
381
382/*
383 * Finish the removal of a vnode.
384 */
385void
386vn_remove(
387 struct vnode *vp)
388{
389 vmap_t vmap;
390
391 /* Make sure we don't do this to the same vnode twice */
392 if (!(vp->v_fbhv))
393 return;
394
395 XFS_STATS_INC(vn_remove);
396 vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
397
398 /*
399 * After the following purge the vnode
400 * will no longer exist.
401 */
402 VMAP(vp, vmap);
403 vn_purge(vp, &vmap);
404}
405
406
407#ifdef XFS_VNODE_TRACE 186#ifdef XFS_VNODE_TRACE
408 187
409#define KTRACE_ENTER(vp, vk, s, line, ra) \ 188#define KTRACE_ENTER(vp, vk, s, line, ra) \
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index a6e57c647be4..35f306cebb87 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as 5 * under the terms of version 2 of the GNU General Public License as
@@ -65,10 +65,6 @@ struct vattr;
65struct xfs_iomap; 65struct xfs_iomap;
66struct attrlist_cursor_kern; 66struct attrlist_cursor_kern;
67 67
68/*
69 * Vnode types. VNON means no type.
70 */
71enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
72 68
73typedef xfs_ino_t vnumber_t; 69typedef xfs_ino_t vnumber_t;
74typedef struct dentry vname_t; 70typedef struct dentry vname_t;
@@ -77,15 +73,14 @@ typedef bhv_head_t vn_bhv_head_t;
77/* 73/*
78 * MP locking protocols: 74 * MP locking protocols:
79 * v_flag, v_vfsp VN_LOCK/VN_UNLOCK 75 * v_flag, v_vfsp VN_LOCK/VN_UNLOCK
80 * v_type read-only or fs-dependent
81 */ 76 */
82typedef struct vnode { 77typedef struct vnode {
83 __u32 v_flag; /* vnode flags (see below) */ 78 __u32 v_flag; /* vnode flags (see below) */
84 enum vtype v_type; /* vnode type */
85 struct vfs *v_vfsp; /* ptr to containing VFS */ 79 struct vfs *v_vfsp; /* ptr to containing VFS */
86 vnumber_t v_number; /* in-core vnode number */ 80 vnumber_t v_number; /* in-core vnode number */
87 vn_bhv_head_t v_bh; /* behavior head */ 81 vn_bhv_head_t v_bh; /* behavior head */
88 spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ 82 spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */
83 atomic_t v_iocount; /* outstanding I/O count */
89#ifdef XFS_VNODE_TRACE 84#ifdef XFS_VNODE_TRACE
90 struct ktrace *v_trace; /* trace header structure */ 85 struct ktrace *v_trace; /* trace header structure */
91#endif 86#endif
@@ -93,6 +88,12 @@ typedef struct vnode {
93 /* inode MUST be last */ 88 /* inode MUST be last */
94} vnode_t; 89} vnode_t;
95 90
91#define VN_ISLNK(vp) S_ISLNK((vp)->v_inode.i_mode)
92#define VN_ISREG(vp) S_ISREG((vp)->v_inode.i_mode)
93#define VN_ISDIR(vp) S_ISDIR((vp)->v_inode.i_mode)
94#define VN_ISCHR(vp) S_ISCHR((vp)->v_inode.i_mode)
95#define VN_ISBLK(vp) S_ISBLK((vp)->v_inode.i_mode)
96
96#define v_fbhv v_bh.bh_first /* first behavior */ 97#define v_fbhv v_bh.bh_first /* first behavior */
97#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ 98#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */
98 99
@@ -133,22 +134,8 @@ typedef enum {
133#define LINVFS_GET_IP(vp) (&(vp)->v_inode) 134#define LINVFS_GET_IP(vp) (&(vp)->v_inode)
134 135
135/* 136/*
136 * Convert between vnode types and inode formats (since POSIX.1
137 * defines mode word of stat structure in terms of inode formats).
138 */
139extern enum vtype iftovt_tab[];
140extern u_short vttoif_tab[];
141#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
142#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
143#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
144
145
146/*
147 * Vnode flags. 137 * Vnode flags.
148 */ 138 */
149#define VINACT 0x1 /* vnode is being inactivated */
150#define VRECLM 0x2 /* vnode is being reclaimed */
151#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */
152#define VMODIFIED 0x8 /* XFS inode state possibly differs */ 139#define VMODIFIED 0x8 /* XFS inode state possibly differs */
153 /* to the Linux inode state. */ 140 /* to the Linux inode state. */
154 141
@@ -408,7 +395,6 @@ typedef struct vnodeops {
408 */ 395 */
409typedef struct vattr { 396typedef struct vattr {
410 int va_mask; /* bit-mask of attributes present */ 397 int va_mask; /* bit-mask of attributes present */
411 enum vtype va_type; /* vnode type (for create) */
412 mode_t va_mode; /* file access mode and type */ 398 mode_t va_mode; /* file access mode and type */
413 xfs_nlink_t va_nlink; /* number of references to file */ 399 xfs_nlink_t va_nlink; /* number of references to file */
414 uid_t va_uid; /* owner user id */ 400 uid_t va_uid; /* owner user id */
@@ -498,27 +484,12 @@ typedef struct vattr {
498 * Check whether mandatory file locking is enabled. 484 * Check whether mandatory file locking is enabled.
499 */ 485 */
500#define MANDLOCK(vp, mode) \ 486#define MANDLOCK(vp, mode) \
501 ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) 487 (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
502 488
503extern void vn_init(void); 489extern void vn_init(void);
504extern int vn_wait(struct vnode *);
505extern vnode_t *vn_initialize(struct inode *); 490extern vnode_t *vn_initialize(struct inode *);
506 491
507/* 492/*
508 * Acquiring and invalidating vnodes:
509 *
510 * if (vn_get(vp, version, 0))
511 * ...;
512 * vn_purge(vp, version);
513 *
514 * vn_get and vn_purge must be called with vmap_t arguments, sampled
515 * while a lock that the vnode's VOP_RECLAIM function acquires is
516 * held, to ensure that the vnode sampled with the lock held isn't
517 * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
518 * and the subsequent vn_get or vn_purge.
519 */
520
521/*
522 * vnode_map structures _must_ match vn_epoch and vnode structure sizes. 493 * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
523 */ 494 */
524typedef struct vnode_map { 495typedef struct vnode_map {
@@ -531,11 +502,11 @@ typedef struct vnode_map {
531 (vmap).v_number = (vp)->v_number, \ 502 (vmap).v_number = (vp)->v_number, \
532 (vmap).v_ino = (vp)->v_inode.i_ino; } 503 (vmap).v_ino = (vp)->v_inode.i_ino; }
533 504
534extern void vn_purge(struct vnode *, vmap_t *);
535extern vnode_t *vn_get(struct vnode *, vmap_t *);
536extern int vn_revalidate(struct vnode *); 505extern int vn_revalidate(struct vnode *);
537extern void vn_revalidate_core(struct vnode *, vattr_t *); 506extern void vn_revalidate_core(struct vnode *, vattr_t *);
538extern void vn_remove(struct vnode *); 507
508extern void vn_iowait(struct vnode *vp);
509extern void vn_iowake(struct vnode *vp);
539 510
540static inline int vn_count(struct vnode *vp) 511static inline int vn_count(struct vnode *vp)
541{ 512{
@@ -546,7 +517,6 @@ static inline int vn_count(struct vnode *vp)
546 * Vnode reference counting functions (and macros for compatibility). 517 * Vnode reference counting functions (and macros for compatibility).
547 */ 518 */
548extern vnode_t *vn_hold(struct vnode *); 519extern vnode_t *vn_hold(struct vnode *);
549extern void vn_rele(struct vnode *);
550 520
551#if defined(XFS_VNODE_TRACE) 521#if defined(XFS_VNODE_TRACE)
552#define VN_HOLD(vp) \ 522#define VN_HOLD(vp) \
@@ -560,6 +530,12 @@ extern void vn_rele(struct vnode *);
560#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) 530#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp)))
561#endif 531#endif
562 532
533static inline struct vnode *vn_grab(struct vnode *vp)
534{
535 struct inode *inode = igrab(LINVFS_GET_IP(vp));
536 return inode ? LINVFS_GET_VP(inode) : NULL;
537}
538
563/* 539/*
564 * Vname handling macros. 540 * Vname handling macros.
565 */ 541 */