aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_aops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c259
1 files changed, 166 insertions, 93 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a3a4b5aaf5d9..c6c077978fe3 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -104,66 +104,114 @@ xfs_page_trace(
104#define xfs_page_trace(tag, inode, page, mask) 104#define xfs_page_trace(tag, inode, page, mask)
105#endif 105#endif
106 106
107void 107/*
108linvfs_unwritten_done( 108 * Schedule IO completion handling on a xfsdatad if this was
109 struct buffer_head *bh, 109 * the final hold on this ioend.
110 int uptodate) 110 */
111STATIC void
112xfs_finish_ioend(
113 xfs_ioend_t *ioend)
111{ 114{
112 xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; 115 if (atomic_dec_and_test(&ioend->io_remaining))
116 queue_work(xfsdatad_workqueue, &ioend->io_work);
117}
113 118
114 ASSERT(buffer_unwritten(bh)); 119STATIC void
115 bh->b_end_io = NULL; 120xfs_destroy_ioend(
116 clear_buffer_unwritten(bh); 121 xfs_ioend_t *ioend)
117 if (!uptodate) 122{
118 pagebuf_ioerror(pb, EIO); 123 vn_iowake(ioend->io_vnode);
119 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 124 mempool_free(ioend, xfs_ioend_pool);
120 pagebuf_iodone(pb, 1, 1);
121 }
122 end_buffer_async_write(bh, uptodate);
123} 125}
124 126
125/* 127/*
126 * Issue transactions to convert a buffer range from unwritten 128 * Issue transactions to convert a buffer range from unwritten
127 * to written extents (buffered IO). 129 * to written extents.
128 */ 130 */
129STATIC void 131STATIC void
130linvfs_unwritten_convert( 132xfs_end_bio_unwritten(
131 xfs_buf_t *bp) 133 void *data)
132{ 134{
133 vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); 135 xfs_ioend_t *ioend = data;
134 int error; 136 vnode_t *vp = ioend->io_vnode;
137 xfs_off_t offset = ioend->io_offset;
138 size_t size = ioend->io_size;
139 struct buffer_head *bh, *next;
140 int error;
141
142 if (ioend->io_uptodate)
143 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
144
145 /* ioend->io_buffer_head is only non-NULL for buffered I/O */
146 for (bh = ioend->io_buffer_head; bh; bh = next) {
147 next = bh->b_private;
148
149 bh->b_end_io = NULL;
150 clear_buffer_unwritten(bh);
151 end_buffer_async_write(bh, ioend->io_uptodate);
152 }
135 153
136 BUG_ON(atomic_read(&bp->pb_hold) < 1); 154 xfs_destroy_ioend(ioend);
137 VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
138 BMAPI_UNWRITTEN, NULL, NULL, error);
139 XFS_BUF_SET_FSPRIVATE(bp, NULL);
140 XFS_BUF_CLR_IODONE_FUNC(bp);
141 XFS_BUF_UNDATAIO(bp);
142 iput(LINVFS_GET_IP(vp));
143 pagebuf_iodone(bp, 0, 0);
144} 155}
145 156
146/* 157/*
147 * Issue transactions to convert a buffer range from unwritten 158 * Allocate and initialise an IO completion structure.
148 * to written extents (direct IO). 159 * We need to track unwritten extent write completion here initially.
160 * We'll need to extend this for updating the ondisk inode size later
161 * (vs. incore size).
149 */ 162 */
150STATIC void 163STATIC xfs_ioend_t *
151linvfs_unwritten_convert_direct( 164xfs_alloc_ioend(
152 struct kiocb *iocb, 165 struct inode *inode)
153 loff_t offset,
154 ssize_t size,
155 void *private)
156{ 166{
157 struct inode *inode = iocb->ki_filp->f_dentry->d_inode; 167 xfs_ioend_t *ioend;
158 ASSERT(!private || inode == (struct inode *)private);
159 168
160 /* private indicates an unwritten extent lay beneath this IO */ 169 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
161 if (private && size > 0) {
162 vnode_t *vp = LINVFS_GET_VP(inode);
163 int error;
164 170
165 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); 171 /*
166 } 172 * Set the count to 1 initially, which will prevent an I/O
173 * completion callback from happening before we have started
174 * all the I/O from calling the completion routine too early.
175 */
176 atomic_set(&ioend->io_remaining, 1);
177 ioend->io_uptodate = 1; /* cleared if any I/O fails */
178 ioend->io_vnode = LINVFS_GET_VP(inode);
179 ioend->io_buffer_head = NULL;
180 atomic_inc(&ioend->io_vnode->v_iocount);
181 ioend->io_offset = 0;
182 ioend->io_size = 0;
183
184 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
185
186 return ioend;
187}
188
189void
190linvfs_unwritten_done(
191 struct buffer_head *bh,
192 int uptodate)
193{
194 xfs_ioend_t *ioend = bh->b_private;
195 static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED;
196 unsigned long flags;
197
198 ASSERT(buffer_unwritten(bh));
199 bh->b_end_io = NULL;
200
201 if (!uptodate)
202 ioend->io_uptodate = 0;
203
204 /*
205 * Deep magic here. We reuse b_private in the buffer_heads to build
206 * a chain for completing the I/O from user context after we've issued
207 * a transaction to convert the unwritten extent.
208 */
209 spin_lock_irqsave(&unwritten_done_lock, flags);
210 bh->b_private = ioend->io_buffer_head;
211 ioend->io_buffer_head = bh;
212 spin_unlock_irqrestore(&unwritten_done_lock, flags);
213
214 xfs_finish_ioend(ioend);
167} 215}
168 216
169STATIC int 217STATIC int
@@ -255,7 +303,7 @@ xfs_probe_unwritten_page(
255 struct address_space *mapping, 303 struct address_space *mapping,
256 pgoff_t index, 304 pgoff_t index,
257 xfs_iomap_t *iomapp, 305 xfs_iomap_t *iomapp,
258 xfs_buf_t *pb, 306 xfs_ioend_t *ioend,
259 unsigned long max_offset, 307 unsigned long max_offset,
260 unsigned long *fsbs, 308 unsigned long *fsbs,
261 unsigned int bbits) 309 unsigned int bbits)
@@ -283,7 +331,7 @@ xfs_probe_unwritten_page(
283 break; 331 break;
284 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); 332 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
285 set_buffer_unwritten_io(bh); 333 set_buffer_unwritten_io(bh);
286 bh->b_private = pb; 334 bh->b_private = ioend;
287 p_offset += bh->b_size; 335 p_offset += bh->b_size;
288 (*fsbs)++; 336 (*fsbs)++;
289 } while ((bh = bh->b_this_page) != head); 337 } while ((bh = bh->b_this_page) != head);
@@ -434,34 +482,15 @@ xfs_map_unwritten(
434{ 482{
435 struct buffer_head *bh = curr; 483 struct buffer_head *bh = curr;
436 xfs_iomap_t *tmp; 484 xfs_iomap_t *tmp;
437 xfs_buf_t *pb; 485 xfs_ioend_t *ioend;
438 loff_t offset, size; 486 loff_t offset;
439 unsigned long nblocks = 0; 487 unsigned long nblocks = 0;
440 488
441 offset = start_page->index; 489 offset = start_page->index;
442 offset <<= PAGE_CACHE_SHIFT; 490 offset <<= PAGE_CACHE_SHIFT;
443 offset += p_offset; 491 offset += p_offset;
444 492
445 /* get an "empty" pagebuf to manage IO completion 493 ioend = xfs_alloc_ioend(inode);
446 * Proper values will be set before returning */
447 pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
448 if (!pb)
449 return -EAGAIN;
450
451 /* Take a reference to the inode to prevent it from
452 * being reclaimed while we have outstanding unwritten
453 * extent IO on it.
454 */
455 if ((igrab(inode)) != inode) {
456 pagebuf_free(pb);
457 return -EAGAIN;
458 }
459
460 /* Set the count to 1 initially, this will stop an I/O
461 * completion callout which happens before we have started
462 * all the I/O from calling pagebuf_iodone too early.
463 */
464 atomic_set(&pb->pb_io_remaining, 1);
465 494
466 /* First map forwards in the page consecutive buffers 495 /* First map forwards in the page consecutive buffers
467 * covering this unwritten extent 496 * covering this unwritten extent
@@ -474,12 +503,12 @@ xfs_map_unwritten(
474 break; 503 break;
475 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); 504 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
476 set_buffer_unwritten_io(bh); 505 set_buffer_unwritten_io(bh);
477 bh->b_private = pb; 506 bh->b_private = ioend;
478 p_offset += bh->b_size; 507 p_offset += bh->b_size;
479 nblocks++; 508 nblocks++;
480 } while ((bh = bh->b_this_page) != head); 509 } while ((bh = bh->b_this_page) != head);
481 510
482 atomic_add(nblocks, &pb->pb_io_remaining); 511 atomic_add(nblocks, &ioend->io_remaining);
483 512
484 /* If we reached the end of the page, map forwards in any 513 /* If we reached the end of the page, map forwards in any
485 * following pages which are also covered by this extent. 514 * following pages which are also covered by this extent.
@@ -496,13 +525,13 @@ xfs_map_unwritten(
496 tloff = min(tlast, tloff); 525 tloff = min(tlast, tloff);
497 for (tindex = start_page->index + 1; tindex < tloff; tindex++) { 526 for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
498 page = xfs_probe_unwritten_page(mapping, 527 page = xfs_probe_unwritten_page(mapping,
499 tindex, iomapp, pb, 528 tindex, iomapp, ioend,
500 PAGE_CACHE_SIZE, &bs, bbits); 529 PAGE_CACHE_SIZE, &bs, bbits);
501 if (!page) 530 if (!page)
502 break; 531 break;
503 nblocks += bs; 532 nblocks += bs;
504 atomic_add(bs, &pb->pb_io_remaining); 533 atomic_add(bs, &ioend->io_remaining);
505 xfs_convert_page(inode, page, iomapp, wbc, pb, 534 xfs_convert_page(inode, page, iomapp, wbc, ioend,
506 startio, all_bh); 535 startio, all_bh);
507 /* stop if converting the next page might add 536 /* stop if converting the next page might add
508 * enough blocks that the corresponding byte 537 * enough blocks that the corresponding byte
@@ -514,12 +543,12 @@ xfs_map_unwritten(
514 if (tindex == tlast && 543 if (tindex == tlast &&
515 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { 544 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
516 page = xfs_probe_unwritten_page(mapping, 545 page = xfs_probe_unwritten_page(mapping,
517 tindex, iomapp, pb, 546 tindex, iomapp, ioend,
518 pg_offset, &bs, bbits); 547 pg_offset, &bs, bbits);
519 if (page) { 548 if (page) {
520 nblocks += bs; 549 nblocks += bs;
521 atomic_add(bs, &pb->pb_io_remaining); 550 atomic_add(bs, &ioend->io_remaining);
522 xfs_convert_page(inode, page, iomapp, wbc, pb, 551 xfs_convert_page(inode, page, iomapp, wbc, ioend,
523 startio, all_bh); 552 startio, all_bh);
524 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) 553 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
525 goto enough; 554 goto enough;
@@ -528,21 +557,9 @@ xfs_map_unwritten(
528 } 557 }
529 558
530enough: 559enough:
531 size = nblocks; /* NB: using 64bit number here */ 560 ioend->io_size = (xfs_off_t)nblocks << block_bits;
532 size <<= block_bits; /* convert fsb's to byte range */ 561 ioend->io_offset = offset;
533 562 xfs_finish_ioend(ioend);
534 XFS_BUF_DATAIO(pb);
535 XFS_BUF_ASYNC(pb);
536 XFS_BUF_SET_SIZE(pb, size);
537 XFS_BUF_SET_COUNT(pb, size);
538 XFS_BUF_SET_OFFSET(pb, offset);
539 XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
540 XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
541
542 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
543 pagebuf_iodone(pb, 1, 1);
544 }
545
546 return 0; 563 return 0;
547} 564}
548 565
@@ -787,7 +804,7 @@ xfs_page_state_convert(
787 continue; 804 continue;
788 if (!iomp) { 805 if (!iomp) {
789 err = xfs_map_blocks(inode, offset, len, &iomap, 806 err = xfs_map_blocks(inode, offset, len, &iomap,
790 BMAPI_READ|BMAPI_IGNSTATE); 807 BMAPI_WRITE|BMAPI_IGNSTATE);
791 if (err) { 808 if (err) {
792 goto error; 809 goto error;
793 } 810 }
@@ -1028,6 +1045,44 @@ linvfs_get_blocks_direct(
1028 create, 1, BMAPI_WRITE|BMAPI_DIRECT); 1045 create, 1, BMAPI_WRITE|BMAPI_DIRECT);
1029} 1046}
1030 1047
1048STATIC void
1049linvfs_end_io_direct(
1050 struct kiocb *iocb,
1051 loff_t offset,
1052 ssize_t size,
1053 void *private)
1054{
1055 xfs_ioend_t *ioend = iocb->private;
1056
1057 /*
1058 * Non-NULL private data means we need to issue a transaction to
1059 * convert a range from unwritten to written extents. This needs
1060 * to happen from process contect but aio+dio I/O completion
1061 * happens from irq context so we need to defer it to a workqueue.
1062 * This is not nessecary for synchronous direct I/O, but we do
1063 * it anyway to keep the code uniform and simpler.
1064 *
1065 * The core direct I/O code might be changed to always call the
1066 * completion handler in the future, in which case all this can
1067 * go away.
1068 */
1069 if (private && size > 0) {
1070 ioend->io_offset = offset;
1071 ioend->io_size = size;
1072 xfs_finish_ioend(ioend);
1073 } else {
1074 ASSERT(size >= 0);
1075 xfs_destroy_ioend(ioend);
1076 }
1077
1078 /*
1079 * blockdev_direct_IO can return an error even afer the I/O
1080 * completion handler was called. Thus we need to protect
1081 * against double-freeing.
1082 */
1083 iocb->private = NULL;
1084}
1085
1031STATIC ssize_t 1086STATIC ssize_t
1032linvfs_direct_IO( 1087linvfs_direct_IO(
1033 int rw, 1088 int rw,
@@ -1042,16 +1097,23 @@ linvfs_direct_IO(
1042 xfs_iomap_t iomap; 1097 xfs_iomap_t iomap;
1043 int maps = 1; 1098 int maps = 1;
1044 int error; 1099 int error;
1100 ssize_t ret;
1045 1101
1046 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); 1102 VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
1047 if (error) 1103 if (error)
1048 return -error; 1104 return -error;
1049 1105
1050 return blockdev_direct_IO_own_locking(rw, iocb, inode, 1106 iocb->private = xfs_alloc_ioend(inode);
1107
1108 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
1051 iomap.iomap_target->pbr_bdev, 1109 iomap.iomap_target->pbr_bdev,
1052 iov, offset, nr_segs, 1110 iov, offset, nr_segs,
1053 linvfs_get_blocks_direct, 1111 linvfs_get_blocks_direct,
1054 linvfs_unwritten_convert_direct); 1112 linvfs_end_io_direct);
1113
1114 if (unlikely(ret <= 0 && iocb->private))
1115 xfs_destroy_ioend(iocb->private);
1116 return ret;
1055} 1117}
1056 1118
1057 1119
@@ -1202,6 +1264,16 @@ out_unlock:
1202 return error; 1264 return error;
1203} 1265}
1204 1266
1267STATIC int
1268linvfs_invalidate_page(
1269 struct page *page,
1270 unsigned long offset)
1271{
1272 xfs_page_trace(XFS_INVALIDPAGE_ENTER,
1273 page->mapping->host, page, offset);
1274 return block_invalidatepage(page, offset);
1275}
1276
1205/* 1277/*
1206 * Called to move a page into cleanable state - and from there 1278 * Called to move a page into cleanable state - and from there
1207 * to be released. Possibly the page is already clean. We always 1279 * to be released. Possibly the page is already clean. We always
@@ -1279,6 +1351,7 @@ struct address_space_operations linvfs_aops = {
1279 .writepage = linvfs_writepage, 1351 .writepage = linvfs_writepage,
1280 .sync_page = block_sync_page, 1352 .sync_page = block_sync_page,
1281 .releasepage = linvfs_release_page, 1353 .releasepage = linvfs_release_page,
1354 .invalidatepage = linvfs_invalidate_page,
1282 .prepare_write = linvfs_prepare_write, 1355 .prepare_write = linvfs_prepare_write,
1283 .commit_write = generic_commit_write, 1356 .commit_write = generic_commit_write,
1284 .bmap = linvfs_bmap, 1357 .bmap = linvfs_bmap,