diff options
author | Christoph Hellwig <hch@infradead.org> | 2007-05-14 04:23:50 -0400 |
---|---|---|
committer | Tim Shimmin <tes@chook.melbourne.sgi.com> | 2007-07-14 01:21:14 -0400 |
commit | 1fa40b01ae4d1b00e366d4949edcc230f5cd6d99 (patch) | |
tree | 66059d8f37bfafc6cce57509829ee253b3269252 | |
parent | 4eb6bf6bfb580afaf1e1a1d30cba17a078530cf4 (diff) |
[XFS] Only use refcounted pages for I/O
Many block drivers (aoe, iscsi) really want refcountable pages in bios,
which is what almost everyone send down. XFS unfortunately has a few
places where it sends down buffers that may come from kmalloc, which
breaks them.
Fix the places that use kmalloc()d buffers.
SGI-PV: 964546
SGI-Modid: xfs-linux-melb:xfs-kern:28562a
Signed-Off-By: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 49 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 19 |
3 files changed, 32 insertions, 38 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index fe4f66a5af14..208daf58b826 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -314,7 +314,7 @@ xfs_buf_free( | |||
314 | 314 | ||
315 | ASSERT(list_empty(&bp->b_hash_list)); | 315 | ASSERT(list_empty(&bp->b_hash_list)); |
316 | 316 | ||
317 | if (bp->b_flags & _XBF_PAGE_CACHE) { | 317 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { |
318 | uint i; | 318 | uint i; |
319 | 319 | ||
320 | if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) | 320 | if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) |
@@ -323,18 +323,11 @@ xfs_buf_free( | |||
323 | for (i = 0; i < bp->b_page_count; i++) { | 323 | for (i = 0; i < bp->b_page_count; i++) { |
324 | struct page *page = bp->b_pages[i]; | 324 | struct page *page = bp->b_pages[i]; |
325 | 325 | ||
326 | ASSERT(!PagePrivate(page)); | 326 | if (bp->b_flags & _XBF_PAGE_CACHE) |
327 | ASSERT(!PagePrivate(page)); | ||
327 | page_cache_release(page); | 328 | page_cache_release(page); |
328 | } | 329 | } |
329 | _xfs_buf_free_pages(bp); | 330 | _xfs_buf_free_pages(bp); |
330 | } else if (bp->b_flags & _XBF_KMEM_ALLOC) { | ||
331 | /* | ||
332 | * XXX(hch): bp->b_count_desired might be incorrect (see | ||
333 | * xfs_buf_associate_memory for details), but fortunately | ||
334 | * the Linux version of kmem_free ignores the len argument.. | ||
335 | */ | ||
336 | kmem_free(bp->b_addr, bp->b_count_desired); | ||
337 | _xfs_buf_free_pages(bp); | ||
338 | } | 331 | } |
339 | 332 | ||
340 | xfs_buf_deallocate(bp); | 333 | xfs_buf_deallocate(bp); |
@@ -764,41 +757,41 @@ xfs_buf_get_noaddr( | |||
764 | size_t len, | 757 | size_t len, |
765 | xfs_buftarg_t *target) | 758 | xfs_buftarg_t *target) |
766 | { | 759 | { |
767 | size_t malloc_len = len; | 760 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; |
761 | int error, i; | ||
768 | xfs_buf_t *bp; | 762 | xfs_buf_t *bp; |
769 | void *data; | ||
770 | int error; | ||
771 | 763 | ||
772 | bp = xfs_buf_allocate(0); | 764 | bp = xfs_buf_allocate(0); |
773 | if (unlikely(bp == NULL)) | 765 | if (unlikely(bp == NULL)) |
774 | goto fail; | 766 | goto fail; |
775 | _xfs_buf_initialize(bp, target, 0, len, 0); | 767 | _xfs_buf_initialize(bp, target, 0, len, 0); |
776 | 768 | ||
777 | try_again: | 769 | error = _xfs_buf_get_pages(bp, page_count, 0); |
778 | data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE); | 770 | if (error) |
779 | if (unlikely(data == NULL)) | ||
780 | goto fail_free_buf; | 771 | goto fail_free_buf; |
781 | 772 | ||
782 | /* check whether alignment matches.. */ | 773 | for (i = 0; i < page_count; i++) { |
783 | if ((__psunsigned_t)data != | 774 | bp->b_pages[i] = alloc_page(GFP_KERNEL); |
784 | ((__psunsigned_t)data & ~target->bt_smask)) { | 775 | if (!bp->b_pages[i]) |
785 | /* .. else double the size and try again */ | 776 | goto fail_free_mem; |
786 | kmem_free(data, malloc_len); | ||
787 | malloc_len <<= 1; | ||
788 | goto try_again; | ||
789 | } | 777 | } |
778 | bp->b_flags |= _XBF_PAGES; | ||
790 | 779 | ||
791 | error = xfs_buf_associate_memory(bp, data, len); | 780 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); |
792 | if (error) | 781 | if (unlikely(error)) { |
782 | printk(KERN_WARNING "%s: failed to map pages\n", | ||
783 | __FUNCTION__); | ||
793 | goto fail_free_mem; | 784 | goto fail_free_mem; |
794 | bp->b_flags |= _XBF_KMEM_ALLOC; | 785 | } |
795 | 786 | ||
796 | xfs_buf_unlock(bp); | 787 | xfs_buf_unlock(bp); |
797 | 788 | ||
798 | XB_TRACE(bp, "no_daddr", data); | 789 | XB_TRACE(bp, "no_daddr", len); |
799 | return bp; | 790 | return bp; |
791 | |||
800 | fail_free_mem: | 792 | fail_free_mem: |
801 | kmem_free(data, malloc_len); | 793 | while (--i >= 0) |
794 | __free_page(bp->b_pages[i]); | ||
802 | fail_free_buf: | 795 | fail_free_buf: |
803 | xfs_buf_free(bp); | 796 | xfs_buf_free(bp); |
804 | fail: | 797 | fail: |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index b6241f6201a5..b5908a34b15d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -63,7 +63,7 @@ typedef enum { | |||
63 | 63 | ||
64 | /* flags used only internally */ | 64 | /* flags used only internally */ |
65 | _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ | 65 | _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ |
66 | _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ | 66 | _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ |
67 | _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ | 67 | _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ |
68 | _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ | 68 | _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ |
69 | } xfs_buf_flags_t; | 69 | } xfs_buf_flags_t; |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c48bf61f17bd..635f99e6302f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1199 | *iclogp = (xlog_in_core_t *) | 1199 | *iclogp = (xlog_in_core_t *) |
1200 | kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); | 1200 | kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); |
1201 | iclog = *iclogp; | 1201 | iclog = *iclogp; |
1202 | iclog->hic_data = (xlog_in_core_2_t *) | ||
1203 | kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE); | ||
1204 | |||
1205 | iclog->ic_prev = prev_iclog; | 1202 | iclog->ic_prev = prev_iclog; |
1206 | prev_iclog = iclog; | 1203 | prev_iclog = iclog; |
1204 | |||
1205 | bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); | ||
1206 | if (!XFS_BUF_CPSEMA(bp)) | ||
1207 | ASSERT(0); | ||
1208 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); | ||
1209 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); | ||
1210 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | ||
1211 | iclog->ic_bp = bp; | ||
1212 | iclog->hic_data = bp->b_addr; | ||
1213 | |||
1207 | log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); | 1214 | log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); |
1208 | 1215 | ||
1209 | head = &iclog->ic_header; | 1216 | head = &iclog->ic_header; |
@@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1216 | INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); | 1223 | INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); |
1217 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); | 1224 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); |
1218 | 1225 | ||
1219 | bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); | ||
1220 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); | ||
1221 | XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); | ||
1222 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | ||
1223 | iclog->ic_bp = bp; | ||
1224 | 1226 | ||
1225 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; | 1227 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; |
1226 | iclog->ic_state = XLOG_STATE_ACTIVE; | 1228 | iclog->ic_state = XLOG_STATE_ACTIVE; |
@@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log) | |||
1528 | } | 1530 | } |
1529 | #endif | 1531 | #endif |
1530 | next_iclog = iclog->ic_next; | 1532 | next_iclog = iclog->ic_next; |
1531 | kmem_free(iclog->hic_data, log->l_iclog_size); | ||
1532 | kmem_free(iclog, sizeof(xlog_in_core_t)); | 1533 | kmem_free(iclog, sizeof(xlog_in_core_t)); |
1533 | iclog = next_iclog; | 1534 | iclog = next_iclog; |
1534 | } | 1535 | } |