aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2007-05-14 04:23:50 -0400
committerTim Shimmin <tes@chook.melbourne.sgi.com>2007-07-14 01:21:14 -0400
commit1fa40b01ae4d1b00e366d4949edcc230f5cd6d99 (patch)
tree66059d8f37bfafc6cce57509829ee253b3269252
parent4eb6bf6bfb580afaf1e1a1d30cba17a078530cf4 (diff)
[XFS] Only use refcounted pages for I/O
Many block drivers (aoe, iscsi) really want refcountable pages in bios, which is what almost everyone send down. XFS unfortunately has a few places where it sends down buffers that may come from kmalloc, which breaks them. Fix the places that use kmalloc()d buffers. SGI-PV: 964546 SGI-Modid: xfs-linux-melb:xfs-kern:28562a Signed-Off-By: Christoph Hellwig <hch@infradead.org> Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Tim Shimmin <tes@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c49
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/xfs_log.c19
3 files changed, 32 insertions, 38 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index fe4f66a5af14..208daf58b826 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -314,7 +314,7 @@ xfs_buf_free(
314 314
315 ASSERT(list_empty(&bp->b_hash_list)); 315 ASSERT(list_empty(&bp->b_hash_list));
316 316
317 if (bp->b_flags & _XBF_PAGE_CACHE) { 317 if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
318 uint i; 318 uint i;
319 319
320 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) 320 if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
@@ -323,18 +323,11 @@ xfs_buf_free(
323 for (i = 0; i < bp->b_page_count; i++) { 323 for (i = 0; i < bp->b_page_count; i++) {
324 struct page *page = bp->b_pages[i]; 324 struct page *page = bp->b_pages[i];
325 325
326 ASSERT(!PagePrivate(page)); 326 if (bp->b_flags & _XBF_PAGE_CACHE)
327 ASSERT(!PagePrivate(page));
327 page_cache_release(page); 328 page_cache_release(page);
328 } 329 }
329 _xfs_buf_free_pages(bp); 330 _xfs_buf_free_pages(bp);
330 } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
331 /*
332 * XXX(hch): bp->b_count_desired might be incorrect (see
333 * xfs_buf_associate_memory for details), but fortunately
334 * the Linux version of kmem_free ignores the len argument..
335 */
336 kmem_free(bp->b_addr, bp->b_count_desired);
337 _xfs_buf_free_pages(bp);
338 } 331 }
339 332
340 xfs_buf_deallocate(bp); 333 xfs_buf_deallocate(bp);
@@ -764,41 +757,41 @@ xfs_buf_get_noaddr(
764 size_t len, 757 size_t len,
765 xfs_buftarg_t *target) 758 xfs_buftarg_t *target)
766{ 759{
767 size_t malloc_len = len; 760 unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
761 int error, i;
768 xfs_buf_t *bp; 762 xfs_buf_t *bp;
769 void *data;
770 int error;
771 763
772 bp = xfs_buf_allocate(0); 764 bp = xfs_buf_allocate(0);
773 if (unlikely(bp == NULL)) 765 if (unlikely(bp == NULL))
774 goto fail; 766 goto fail;
775 _xfs_buf_initialize(bp, target, 0, len, 0); 767 _xfs_buf_initialize(bp, target, 0, len, 0);
776 768
777 try_again: 769 error = _xfs_buf_get_pages(bp, page_count, 0);
778 data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE); 770 if (error)
779 if (unlikely(data == NULL))
780 goto fail_free_buf; 771 goto fail_free_buf;
781 772
782 /* check whether alignment matches.. */ 773 for (i = 0; i < page_count; i++) {
783 if ((__psunsigned_t)data != 774 bp->b_pages[i] = alloc_page(GFP_KERNEL);
784 ((__psunsigned_t)data & ~target->bt_smask)) { 775 if (!bp->b_pages[i])
785 /* .. else double the size and try again */ 776 goto fail_free_mem;
786 kmem_free(data, malloc_len);
787 malloc_len <<= 1;
788 goto try_again;
789 } 777 }
778 bp->b_flags |= _XBF_PAGES;
790 779
791 error = xfs_buf_associate_memory(bp, data, len); 780 error = _xfs_buf_map_pages(bp, XBF_MAPPED);
792 if (error) 781 if (unlikely(error)) {
782 printk(KERN_WARNING "%s: failed to map pages\n",
783 __FUNCTION__);
793 goto fail_free_mem; 784 goto fail_free_mem;
794 bp->b_flags |= _XBF_KMEM_ALLOC; 785 }
795 786
796 xfs_buf_unlock(bp); 787 xfs_buf_unlock(bp);
797 788
798 XB_TRACE(bp, "no_daddr", data); 789 XB_TRACE(bp, "no_daddr", len);
799 return bp; 790 return bp;
791
800 fail_free_mem: 792 fail_free_mem:
801 kmem_free(data, malloc_len); 793 while (--i >= 0)
794 __free_page(bp->b_pages[i]);
802 fail_free_buf: 795 fail_free_buf:
803 xfs_buf_free(bp); 796 xfs_buf_free(bp);
804 fail: 797 fail:
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index b6241f6201a5..b5908a34b15d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -63,7 +63,7 @@ typedef enum {
63 63
64 /* flags used only internally */ 64 /* flags used only internally */
65 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ 65 _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
66 _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ 66 _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
69} xfs_buf_flags_t; 69} xfs_buf_flags_t;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c48bf61f17bd..635f99e6302f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t *mp,
1199 *iclogp = (xlog_in_core_t *) 1199 *iclogp = (xlog_in_core_t *)
1200 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); 1200 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
1201 iclog = *iclogp; 1201 iclog = *iclogp;
1202 iclog->hic_data = (xlog_in_core_2_t *)
1203 kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
1204
1205 iclog->ic_prev = prev_iclog; 1202 iclog->ic_prev = prev_iclog;
1206 prev_iclog = iclog; 1203 prev_iclog = iclog;
1204
1205 bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
1206 if (!XFS_BUF_CPSEMA(bp))
1207 ASSERT(0);
1208 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1209 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1210 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1211 iclog->ic_bp = bp;
1212 iclog->hic_data = bp->b_addr;
1213
1207 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); 1214 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
1208 1215
1209 head = &iclog->ic_header; 1216 head = &iclog->ic_header;
@@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1216 INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT); 1223 INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
1217 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 1224 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
1218 1225
1219 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
1220 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1221 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1222 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1223 iclog->ic_bp = bp;
1224 1226
1225 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; 1227 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
1226 iclog->ic_state = XLOG_STATE_ACTIVE; 1228 iclog->ic_state = XLOG_STATE_ACTIVE;
@@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log)
1528 } 1530 }
1529#endif 1531#endif
1530 next_iclog = iclog->ic_next; 1532 next_iclog = iclog->ic_next;
1531 kmem_free(iclog->hic_data, log->l_iclog_size);
1532 kmem_free(iclog, sizeof(xlog_in_core_t)); 1533 kmem_free(iclog, sizeof(xlog_in_core_t));
1533 iclog = next_iclog; 1534 iclog = next_iclog;
1534 } 1535 }