aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@sgi.com>2005-09-02 02:58:49 -0400
committerNathan Scott <nathans@sgi.com>2005-09-02 02:58:49 -0400
commit0829c3602f4df95898752c402ea90b92a3e33154 (patch)
tree35c0efa4e1fe35b118165fea5812fe9248f30023
parent51c91ed52b8a9a30fcb2a465b40c20a1f11735ba (diff)
[XFS] Add infrastructure for tracking I/O completions
SGI-PV: 934766 SGI-Modid: xfs-linux:xfs-kern:196856a Signed-off-by: Christoph Hellwig <hch@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c156
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c58
4 files changed, 132 insertions, 85 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index b55cb7f02e88..ed98c7ac7cfd 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -104,22 +104,24 @@ xfs_page_trace(
104#define xfs_page_trace(tag, inode, page, mask) 104#define xfs_page_trace(tag, inode, page, mask)
105#endif 105#endif
106 106
107void 107/*
108linvfs_unwritten_done( 108 * Schedule IO completion handling on a xfsdatad if this was
109 struct buffer_head *bh, 109 * the final hold on this ioend.
110 int uptodate) 110 */
111STATIC void
112xfs_finish_ioend(
113 xfs_ioend_t *ioend)
111{ 114{
112 xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; 115 if (atomic_dec_and_test(&ioend->io_remaining))
116 queue_work(xfsdatad_workqueue, &ioend->io_work);
117}
113 118
114 ASSERT(buffer_unwritten(bh)); 119STATIC void
115 bh->b_end_io = NULL; 120xfs_destroy_ioend(
116 clear_buffer_unwritten(bh); 121 xfs_ioend_t *ioend)
117 if (!uptodate) 122{
118 pagebuf_ioerror(pb, EIO); 123 vn_iowake(ioend->io_vnode);
119 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { 124 mempool_free(ioend, xfs_ioend_pool);
120 pagebuf_iodone(pb, 1, 1);
121 }
122 end_buffer_async_write(bh, uptodate);
123} 125}
124 126
125/* 127/*
@@ -127,20 +129,66 @@ linvfs_unwritten_done(
127 * to written extents (buffered IO). 129 * to written extents (buffered IO).
128 */ 130 */
129STATIC void 131STATIC void
130linvfs_unwritten_convert( 132xfs_end_bio_unwritten(
131 xfs_buf_t *bp) 133 void *data)
132{ 134{
133 vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); 135 xfs_ioend_t *ioend = data;
134 int error; 136 vnode_t *vp = ioend->io_vnode;
137 xfs_off_t offset = ioend->io_offset;
138 size_t size = ioend->io_size;
139 int error;
140
141 if (ioend->io_uptodate)
142 VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
143 xfs_destroy_ioend(ioend);
144}
145
146/*
147 * Allocate and initialise an IO completion structure.
148 * We need to track unwritten extent write completion here initially.
149 * We'll need to extend this for updating the ondisk inode size later
150 * (vs. incore size).
151 */
152STATIC xfs_ioend_t *
153xfs_alloc_ioend(
154 struct inode *inode)
155{
156 xfs_ioend_t *ioend;
135 157
136 BUG_ON(atomic_read(&bp->pb_hold) < 1); 158 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
137 VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp), 159
138 BMAPI_UNWRITTEN, NULL, NULL, error); 160 /*
139 XFS_BUF_SET_FSPRIVATE(bp, NULL); 161 * Set the count to 1 initially, which will prevent an I/O
140 XFS_BUF_CLR_IODONE_FUNC(bp); 162 * completion callback from happening before we have started
141 XFS_BUF_UNDATAIO(bp); 163 * all the I/O from calling the completion routine too early.
142 vn_iowake(vp); 164 */
143 pagebuf_iodone(bp, 0, 0); 165 atomic_set(&ioend->io_remaining, 1);
166 ioend->io_uptodate = 1; /* cleared if any I/O fails */
167 ioend->io_vnode = LINVFS_GET_VP(inode);
168 atomic_inc(&ioend->io_vnode->v_iocount);
169 ioend->io_offset = 0;
170 ioend->io_size = 0;
171
172 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
173
174 return ioend;
175}
176
177void
178linvfs_unwritten_done(
179 struct buffer_head *bh,
180 int uptodate)
181{
182 xfs_ioend_t *ioend = bh->b_private;
183
184 ASSERT(buffer_unwritten(bh));
185 bh->b_end_io = NULL;
186 clear_buffer_unwritten(bh);
187 if (!uptodate)
188 ioend->io_uptodate = 0;
189
190 xfs_finish_ioend(ioend);
191 end_buffer_async_write(bh, uptodate);
144} 192}
145 193
146/* 194/*
@@ -255,7 +303,7 @@ xfs_probe_unwritten_page(
255 struct address_space *mapping, 303 struct address_space *mapping,
256 pgoff_t index, 304 pgoff_t index,
257 xfs_iomap_t *iomapp, 305 xfs_iomap_t *iomapp,
258 xfs_buf_t *pb, 306 xfs_ioend_t *ioend,
259 unsigned long max_offset, 307 unsigned long max_offset,
260 unsigned long *fsbs, 308 unsigned long *fsbs,
261 unsigned int bbits) 309 unsigned int bbits)
@@ -283,7 +331,7 @@ xfs_probe_unwritten_page(
283 break; 331 break;
284 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); 332 xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
285 set_buffer_unwritten_io(bh); 333 set_buffer_unwritten_io(bh);
286 bh->b_private = pb; 334 bh->b_private = ioend;
287 p_offset += bh->b_size; 335 p_offset += bh->b_size;
288 (*fsbs)++; 336 (*fsbs)++;
289 } while ((bh = bh->b_this_page) != head); 337 } while ((bh = bh->b_this_page) != head);
@@ -434,27 +482,15 @@ xfs_map_unwritten(
434{ 482{
435 struct buffer_head *bh = curr; 483 struct buffer_head *bh = curr;
436 xfs_iomap_t *tmp; 484 xfs_iomap_t *tmp;
437 xfs_buf_t *pb; 485 xfs_ioend_t *ioend;
438 loff_t offset, size; 486 loff_t offset;
439 unsigned long nblocks = 0; 487 unsigned long nblocks = 0;
440 488
441 offset = start_page->index; 489 offset = start_page->index;
442 offset <<= PAGE_CACHE_SHIFT; 490 offset <<= PAGE_CACHE_SHIFT;
443 offset += p_offset; 491 offset += p_offset;
444 492
445 /* get an "empty" pagebuf to manage IO completion 493 ioend = xfs_alloc_ioend(inode);
446 * Proper values will be set before returning */
447 pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
448 if (!pb)
449 return -EAGAIN;
450
451 atomic_inc(&LINVFS_GET_VP(inode)->v_iocount);
452
453 /* Set the count to 1 initially, this will stop an I/O
454 * completion callout which happens before we have started
455 * all the I/O from calling pagebuf_iodone too early.
456 */
457 atomic_set(&pb->pb_io_remaining, 1);
458 494
459 /* First map forwards in the page consecutive buffers 495 /* First map forwards in the page consecutive buffers
460 * covering this unwritten extent 496 * covering this unwritten extent
@@ -467,12 +503,12 @@ xfs_map_unwritten(
467 break; 503 break;
468 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); 504 xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
469 set_buffer_unwritten_io(bh); 505 set_buffer_unwritten_io(bh);
470 bh->b_private = pb; 506 bh->b_private = ioend;
471 p_offset += bh->b_size; 507 p_offset += bh->b_size;
472 nblocks++; 508 nblocks++;
473 } while ((bh = bh->b_this_page) != head); 509 } while ((bh = bh->b_this_page) != head);
474 510
475 atomic_add(nblocks, &pb->pb_io_remaining); 511 atomic_add(nblocks, &ioend->io_remaining);
476 512
477 /* If we reached the end of the page, map forwards in any 513 /* If we reached the end of the page, map forwards in any
478 * following pages which are also covered by this extent. 514 * following pages which are also covered by this extent.
@@ -489,13 +525,13 @@ xfs_map_unwritten(
489 tloff = min(tlast, tloff); 525 tloff = min(tlast, tloff);
490 for (tindex = start_page->index + 1; tindex < tloff; tindex++) { 526 for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
491 page = xfs_probe_unwritten_page(mapping, 527 page = xfs_probe_unwritten_page(mapping,
492 tindex, iomapp, pb, 528 tindex, iomapp, ioend,
493 PAGE_CACHE_SIZE, &bs, bbits); 529 PAGE_CACHE_SIZE, &bs, bbits);
494 if (!page) 530 if (!page)
495 break; 531 break;
496 nblocks += bs; 532 nblocks += bs;
497 atomic_add(bs, &pb->pb_io_remaining); 533 atomic_add(bs, &ioend->io_remaining);
498 xfs_convert_page(inode, page, iomapp, wbc, pb, 534 xfs_convert_page(inode, page, iomapp, wbc, ioend,
499 startio, all_bh); 535 startio, all_bh);
500 /* stop if converting the next page might add 536 /* stop if converting the next page might add
501 * enough blocks that the corresponding byte 537 * enough blocks that the corresponding byte
@@ -507,12 +543,12 @@ xfs_map_unwritten(
507 if (tindex == tlast && 543 if (tindex == tlast &&
508 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { 544 (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
509 page = xfs_probe_unwritten_page(mapping, 545 page = xfs_probe_unwritten_page(mapping,
510 tindex, iomapp, pb, 546 tindex, iomapp, ioend,
511 pg_offset, &bs, bbits); 547 pg_offset, &bs, bbits);
512 if (page) { 548 if (page) {
513 nblocks += bs; 549 nblocks += bs;
514 atomic_add(bs, &pb->pb_io_remaining); 550 atomic_add(bs, &ioend->io_remaining);
515 xfs_convert_page(inode, page, iomapp, wbc, pb, 551 xfs_convert_page(inode, page, iomapp, wbc, ioend,
516 startio, all_bh); 552 startio, all_bh);
517 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) 553 if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
518 goto enough; 554 goto enough;
@@ -521,21 +557,9 @@ xfs_map_unwritten(
521 } 557 }
522 558
523enough: 559enough:
524 size = nblocks; /* NB: using 64bit number here */ 560 ioend->io_size = (xfs_off_t)nblocks << block_bits;
525 size <<= block_bits; /* convert fsb's to byte range */ 561 ioend->io_offset = offset;
526 562 xfs_finish_ioend(ioend);
527 XFS_BUF_DATAIO(pb);
528 XFS_BUF_ASYNC(pb);
529 XFS_BUF_SET_SIZE(pb, size);
530 XFS_BUF_SET_COUNT(pb, size);
531 XFS_BUF_SET_OFFSET(pb, offset);
532 XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
533 XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
534
535 if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
536 pagebuf_iodone(pb, 1, 1);
537 }
538
539 return 0; 563 return 0;
540} 564}
541 565
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 58286b1d733b..fba40cbdbcf1 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -67,7 +67,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int);
67STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 67STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
68 68
69STATIC struct workqueue_struct *xfslogd_workqueue; 69STATIC struct workqueue_struct *xfslogd_workqueue;
70STATIC struct workqueue_struct *xfsdatad_workqueue; 70struct workqueue_struct *xfsdatad_workqueue;
71 71
72/* 72/*
73 * Pagebuf debugging 73 * Pagebuf debugging
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 42dc5e4662ed..1c63fd3118d7 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -104,6 +104,7 @@
104#include <xfs_stats.h> 104#include <xfs_stats.h>
105#include <xfs_sysctl.h> 105#include <xfs_sysctl.h>
106#include <xfs_iops.h> 106#include <xfs_iops.h>
107#include <xfs_aops.h>
107#include <xfs_super.h> 108#include <xfs_super.h>
108#include <xfs_globals.h> 109#include <xfs_globals.h>
109#include <xfs_fs_subr.h> 110#include <xfs_fs_subr.h>
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index d2c8a11e22b8..1a0bcbbc0a86 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -70,11 +70,14 @@
70#include <linux/namei.h> 70#include <linux/namei.h>
71#include <linux/init.h> 71#include <linux/init.h>
72#include <linux/mount.h> 72#include <linux/mount.h>
73#include <linux/mempool.h>
73#include <linux/writeback.h> 74#include <linux/writeback.h>
74 75
75STATIC struct quotactl_ops linvfs_qops; 76STATIC struct quotactl_ops linvfs_qops;
76STATIC struct super_operations linvfs_sops; 77STATIC struct super_operations linvfs_sops;
77STATIC kmem_zone_t *linvfs_inode_zone; 78STATIC kmem_zone_t *xfs_vnode_zone;
79STATIC kmem_zone_t *xfs_ioend_zone;
80mempool_t *xfs_ioend_pool;
78 81
79STATIC struct xfs_mount_args * 82STATIC struct xfs_mount_args *
80xfs_args_allocate( 83xfs_args_allocate(
@@ -281,8 +284,7 @@ linvfs_alloc_inode(
281{ 284{
282 vnode_t *vp; 285 vnode_t *vp;
283 286
284 vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, 287 vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
285 kmem_flags_convert(KM_SLEEP));
286 if (!vp) 288 if (!vp)
287 return NULL; 289 return NULL;
288 return LINVFS_GET_IP(vp); 290 return LINVFS_GET_IP(vp);
@@ -292,11 +294,11 @@ STATIC void
292linvfs_destroy_inode( 294linvfs_destroy_inode(
293 struct inode *inode) 295 struct inode *inode)
294{ 296{
295 kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode)); 297 kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
296} 298}
297 299
298STATIC void 300STATIC void
299init_once( 301linvfs_inode_init_once(
300 void *data, 302 void *data,
301 kmem_cache_t *cachep, 303 kmem_cache_t *cachep,
302 unsigned long flags) 304 unsigned long flags)
@@ -309,21 +311,41 @@ init_once(
309} 311}
310 312
311STATIC int 313STATIC int
312init_inodecache( void ) 314linvfs_init_zones(void)
313{ 315{
314 linvfs_inode_zone = kmem_cache_create("linvfs_icache", 316 xfs_vnode_zone = kmem_cache_create("xfs_vnode",
315 sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT, 317 sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
316 init_once, NULL); 318 linvfs_inode_init_once, NULL);
317 if (linvfs_inode_zone == NULL) 319 if (!xfs_vnode_zone)
318 return -ENOMEM; 320 goto out;
321
322 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
323 if (!xfs_ioend_zone)
324 goto out_destroy_vnode_zone;
325
326 xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
327 mempool_alloc_slab, mempool_free_slab,
328 xfs_ioend_zone);
329 if (!xfs_ioend_pool)
330 goto out_free_ioend_zone;
331
319 return 0; 332 return 0;
333
334
335 out_free_ioend_zone:
336 kmem_zone_destroy(xfs_ioend_zone);
337 out_destroy_vnode_zone:
338 kmem_zone_destroy(xfs_vnode_zone);
339 out:
340 return -ENOMEM;
320} 341}
321 342
322STATIC void 343STATIC void
323destroy_inodecache( void ) 344linvfs_destroy_zones(void)
324{ 345{
325 if (kmem_cache_destroy(linvfs_inode_zone)) 346 mempool_destroy(xfs_ioend_pool);
326 printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); 347 kmem_zone_destroy(xfs_vnode_zone);
348 kmem_zone_destroy(xfs_ioend_zone);
327} 349}
328 350
329/* 351/*
@@ -873,9 +895,9 @@ init_xfs_fs( void )
873 895
874 ktrace_init(64); 896 ktrace_init(64);
875 897
876 error = init_inodecache(); 898 error = linvfs_init_zones();
877 if (error < 0) 899 if (error < 0)
878 goto undo_inodecache; 900 goto undo_zones;
879 901
880 error = pagebuf_init(); 902 error = pagebuf_init();
881 if (error < 0) 903 if (error < 0)
@@ -896,9 +918,9 @@ undo_register:
896 pagebuf_terminate(); 918 pagebuf_terminate();
897 919
898undo_pagebuf: 920undo_pagebuf:
899 destroy_inodecache(); 921 linvfs_destroy_zones();
900 922
901undo_inodecache: 923undo_zones:
902 return error; 924 return error;
903} 925}
904 926
@@ -910,7 +932,7 @@ exit_xfs_fs( void )
910 unregister_filesystem(&xfs_fs_type); 932 unregister_filesystem(&xfs_fs_type);
911 xfs_cleanup(); 933 xfs_cleanup();
912 pagebuf_terminate(); 934 pagebuf_terminate();
913 destroy_inodecache(); 935 linvfs_destroy_zones();
914 ktrace_uninit(); 936 ktrace_uninit();
915} 937}
916 938