diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 401 |
1 files changed, 278 insertions, 123 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 77b8be81c769..bd111b7e1daa 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <linux/migrate.h> | 33 | #include <linux/migrate.h> |
| 34 | #include <linux/backing-dev.h> | 34 | #include <linux/backing-dev.h> |
| 35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
| 36 | #include <linux/list_sort.h> | ||
| 36 | 37 | ||
| 37 | #include "xfs_sb.h" | 38 | #include "xfs_sb.h" |
| 38 | #include "xfs_inum.h" | 39 | #include "xfs_inum.h" |
| @@ -76,6 +77,27 @@ struct workqueue_struct *xfsconvertd_workqueue; | |||
| 76 | #define xfs_buf_deallocate(bp) \ | 77 | #define xfs_buf_deallocate(bp) \ |
| 77 | kmem_zone_free(xfs_buf_zone, (bp)); | 78 | kmem_zone_free(xfs_buf_zone, (bp)); |
| 78 | 79 | ||
| 80 | static inline int | ||
| 81 | xfs_buf_is_vmapped( | ||
| 82 | struct xfs_buf *bp) | ||
| 83 | { | ||
| 84 | /* | ||
| 85 | * Return true if the buffer is vmapped. | ||
| 86 | * | ||
| 87 | * The XBF_MAPPED flag is set if the buffer should be mapped, but the | ||
| 88 | * code is clever enough to know it doesn't have to map a single page, | ||
| 89 | * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. | ||
| 90 | */ | ||
| 91 | return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; | ||
| 92 | } | ||
| 93 | |||
| 94 | static inline int | ||
| 95 | xfs_buf_vmap_len( | ||
| 96 | struct xfs_buf *bp) | ||
| 97 | { | ||
| 98 | return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; | ||
| 99 | } | ||
| 100 | |||
| 79 | /* | 101 | /* |
| 80 | * Page Region interfaces. | 102 | * Page Region interfaces. |
| 81 | * | 103 | * |
| @@ -146,75 +168,6 @@ test_page_region( | |||
| 146 | } | 168 | } |
| 147 | 169 | ||
| 148 | /* | 170 | /* |
| 149 | * Mapping of multi-page buffers into contiguous virtual space | ||
| 150 | */ | ||
| 151 | |||
| 152 | typedef struct a_list { | ||
| 153 | void *vm_addr; | ||
| 154 | struct a_list *next; | ||
| 155 | } a_list_t; | ||
| 156 | |||
| 157 | static a_list_t *as_free_head; | ||
| 158 | static int as_list_len; | ||
| 159 | static DEFINE_SPINLOCK(as_lock); | ||
| 160 | |||
| 161 | /* | ||
| 162 | * Try to batch vunmaps because they are costly. | ||
| 163 | */ | ||
| 164 | STATIC void | ||
| 165 | free_address( | ||
| 166 | void *addr) | ||
| 167 | { | ||
| 168 | a_list_t *aentry; | ||
| 169 | |||
| 170 | #ifdef CONFIG_XEN | ||
| 171 | /* | ||
| 172 | * Xen needs to be able to make sure it can get an exclusive | ||
| 173 | * RO mapping of pages it wants to turn into a pagetable. If | ||
| 174 | * a newly allocated page is also still being vmap()ed by xfs, | ||
| 175 | * it will cause pagetable construction to fail. This is a | ||
| 176 | * quick workaround to always eagerly unmap pages so that Xen | ||
| 177 | * is happy. | ||
| 178 | */ | ||
| 179 | vunmap(addr); | ||
| 180 | return; | ||
| 181 | #endif | ||
| 182 | |||
| 183 | aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT); | ||
| 184 | if (likely(aentry)) { | ||
| 185 | spin_lock(&as_lock); | ||
| 186 | aentry->next = as_free_head; | ||
| 187 | aentry->vm_addr = addr; | ||
| 188 | as_free_head = aentry; | ||
| 189 | as_list_len++; | ||
| 190 | spin_unlock(&as_lock); | ||
| 191 | } else { | ||
| 192 | vunmap(addr); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | STATIC void | ||
| 197 | purge_addresses(void) | ||
| 198 | { | ||
| 199 | a_list_t *aentry, *old; | ||
| 200 | |||
| 201 | if (as_free_head == NULL) | ||
| 202 | return; | ||
| 203 | |||
| 204 | spin_lock(&as_lock); | ||
| 205 | aentry = as_free_head; | ||
| 206 | as_free_head = NULL; | ||
| 207 | as_list_len = 0; | ||
| 208 | spin_unlock(&as_lock); | ||
| 209 | |||
| 210 | while ((old = aentry) != NULL) { | ||
| 211 | vunmap(aentry->vm_addr); | ||
| 212 | aentry = aentry->next; | ||
| 213 | kfree(old); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | /* | ||
| 218 | * Internal xfs_buf_t object manipulation | 171 | * Internal xfs_buf_t object manipulation |
| 219 | */ | 172 | */ |
| 220 | 173 | ||
| @@ -314,8 +267,9 @@ xfs_buf_free( | |||
| 314 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 267 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { |
| 315 | uint i; | 268 | uint i; |
| 316 | 269 | ||
| 317 | if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) | 270 | if (xfs_buf_is_vmapped(bp)) |
| 318 | free_address(bp->b_addr - bp->b_offset); | 271 | vm_unmap_ram(bp->b_addr - bp->b_offset, |
| 272 | bp->b_page_count); | ||
| 319 | 273 | ||
| 320 | for (i = 0; i < bp->b_page_count; i++) { | 274 | for (i = 0; i < bp->b_page_count; i++) { |
| 321 | struct page *page = bp->b_pages[i]; | 275 | struct page *page = bp->b_pages[i]; |
| @@ -435,10 +389,8 @@ _xfs_buf_map_pages( | |||
| 435 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; | 389 | bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; |
| 436 | bp->b_flags |= XBF_MAPPED; | 390 | bp->b_flags |= XBF_MAPPED; |
| 437 | } else if (flags & XBF_MAPPED) { | 391 | } else if (flags & XBF_MAPPED) { |
| 438 | if (as_list_len > 64) | 392 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, |
| 439 | purge_addresses(); | 393 | -1, PAGE_KERNEL); |
| 440 | bp->b_addr = vmap(bp->b_pages, bp->b_page_count, | ||
| 441 | VM_MAP, PAGE_KERNEL); | ||
| 442 | if (unlikely(bp->b_addr == NULL)) | 394 | if (unlikely(bp->b_addr == NULL)) |
| 443 | return -ENOMEM; | 395 | return -ENOMEM; |
| 444 | bp->b_addr += bp->b_offset; | 396 | bp->b_addr += bp->b_offset; |
| @@ -1051,22 +1003,30 @@ xfs_buf_ioerror( | |||
| 1051 | } | 1003 | } |
| 1052 | 1004 | ||
| 1053 | int | 1005 | int |
| 1054 | xfs_bawrite( | 1006 | xfs_bwrite( |
| 1055 | void *mp, | 1007 | struct xfs_mount *mp, |
| 1056 | struct xfs_buf *bp) | 1008 | struct xfs_buf *bp) |
| 1057 | { | 1009 | { |
| 1058 | trace_xfs_buf_bawrite(bp, _RET_IP_); | 1010 | int iowait = (bp->b_flags & XBF_ASYNC) == 0; |
| 1011 | int error = 0; | ||
| 1059 | 1012 | ||
| 1060 | ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); | 1013 | bp->b_strat = xfs_bdstrat_cb; |
| 1014 | bp->b_mount = mp; | ||
| 1015 | bp->b_flags |= XBF_WRITE; | ||
| 1016 | if (!iowait) | ||
| 1017 | bp->b_flags |= _XBF_RUN_QUEUES; | ||
| 1061 | 1018 | ||
| 1062 | xfs_buf_delwri_dequeue(bp); | 1019 | xfs_buf_delwri_dequeue(bp); |
| 1020 | xfs_buf_iostrategy(bp); | ||
| 1063 | 1021 | ||
| 1064 | bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); | 1022 | if (iowait) { |
| 1065 | bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); | 1023 | error = xfs_buf_iowait(bp); |
| 1024 | if (error) | ||
| 1025 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | ||
| 1026 | xfs_buf_relse(bp); | ||
| 1027 | } | ||
| 1066 | 1028 | ||
| 1067 | bp->b_mount = mp; | 1029 | return error; |
| 1068 | bp->b_strat = xfs_bdstrat_cb; | ||
| 1069 | return xfs_bdstrat_cb(bp); | ||
| 1070 | } | 1030 | } |
| 1071 | 1031 | ||
| 1072 | void | 1032 | void |
| @@ -1085,6 +1045,126 @@ xfs_bdwrite( | |||
| 1085 | xfs_buf_delwri_queue(bp, 1); | 1045 | xfs_buf_delwri_queue(bp, 1); |
| 1086 | } | 1046 | } |
| 1087 | 1047 | ||
| 1048 | /* | ||
| 1049 | * Called when we want to stop a buffer from getting written or read. | ||
| 1050 | * We attach the EIO error, muck with its flags, and call biodone | ||
| 1051 | * so that the proper iodone callbacks get called. | ||
| 1052 | */ | ||
| 1053 | STATIC int | ||
| 1054 | xfs_bioerror( | ||
| 1055 | xfs_buf_t *bp) | ||
| 1056 | { | ||
| 1057 | #ifdef XFSERRORDEBUG | ||
| 1058 | ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); | ||
| 1059 | #endif | ||
| 1060 | |||
| 1061 | /* | ||
| 1062 | * No need to wait until the buffer is unpinned, we aren't flushing it. | ||
| 1063 | */ | ||
| 1064 | XFS_BUF_ERROR(bp, EIO); | ||
| 1065 | |||
| 1066 | /* | ||
| 1067 | * We're calling biodone, so delete XBF_DONE flag. | ||
| 1068 | */ | ||
| 1069 | XFS_BUF_UNREAD(bp); | ||
| 1070 | XFS_BUF_UNDELAYWRITE(bp); | ||
| 1071 | XFS_BUF_UNDONE(bp); | ||
| 1072 | XFS_BUF_STALE(bp); | ||
| 1073 | |||
| 1074 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
| 1075 | xfs_biodone(bp); | ||
| 1076 | |||
| 1077 | return EIO; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | /* | ||
| 1081 | * Same as xfs_bioerror, except that we are releasing the buffer | ||
| 1082 | * here ourselves, and avoiding the biodone call. | ||
| 1083 | * This is meant for userdata errors; metadata bufs come with | ||
| 1084 | * iodone functions attached, so that we can track down errors. | ||
| 1085 | */ | ||
| 1086 | STATIC int | ||
| 1087 | xfs_bioerror_relse( | ||
| 1088 | struct xfs_buf *bp) | ||
| 1089 | { | ||
| 1090 | int64_t fl = XFS_BUF_BFLAGS(bp); | ||
| 1091 | /* | ||
| 1092 | * No need to wait until the buffer is unpinned. | ||
| 1093 | * We aren't flushing it. | ||
| 1094 | * | ||
| 1095 | * chunkhold expects B_DONE to be set, whether | ||
| 1096 | * we actually finish the I/O or not. We don't want to | ||
| 1097 | * change that interface. | ||
| 1098 | */ | ||
| 1099 | XFS_BUF_UNREAD(bp); | ||
| 1100 | XFS_BUF_UNDELAYWRITE(bp); | ||
| 1101 | XFS_BUF_DONE(bp); | ||
| 1102 | XFS_BUF_STALE(bp); | ||
| 1103 | XFS_BUF_CLR_IODONE_FUNC(bp); | ||
| 1104 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
| 1105 | if (!(fl & XBF_ASYNC)) { | ||
| 1106 | /* | ||
| 1107 | * Mark b_error and B_ERROR _both_. | ||
| 1108 | * Lot's of chunkcache code assumes that. | ||
| 1109 | * There's no reason to mark error for | ||
| 1110 | * ASYNC buffers. | ||
| 1111 | */ | ||
| 1112 | XFS_BUF_ERROR(bp, EIO); | ||
| 1113 | XFS_BUF_FINISH_IOWAIT(bp); | ||
| 1114 | } else { | ||
| 1115 | xfs_buf_relse(bp); | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | return EIO; | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | |||
| 1122 | /* | ||
| 1123 | * All xfs metadata buffers except log state machine buffers | ||
| 1124 | * get this attached as their b_bdstrat callback function. | ||
| 1125 | * This is so that we can catch a buffer | ||
| 1126 | * after prematurely unpinning it to forcibly shutdown the filesystem. | ||
| 1127 | */ | ||
| 1128 | int | ||
| 1129 | xfs_bdstrat_cb( | ||
| 1130 | struct xfs_buf *bp) | ||
| 1131 | { | ||
| 1132 | if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { | ||
| 1133 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | ||
| 1134 | /* | ||
| 1135 | * Metadata write that didn't get logged but | ||
| 1136 | * written delayed anyway. These aren't associated | ||
| 1137 | * with a transaction, and can be ignored. | ||
| 1138 | */ | ||
| 1139 | if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) | ||
| 1140 | return xfs_bioerror_relse(bp); | ||
| 1141 | else | ||
| 1142 | return xfs_bioerror(bp); | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | xfs_buf_iorequest(bp); | ||
| 1146 | return 0; | ||
| 1147 | } | ||
| 1148 | |||
| 1149 | /* | ||
| 1150 | * Wrapper around bdstrat so that we can stop data from going to disk in case | ||
| 1151 | * we are shutting down the filesystem. Typically user data goes thru this | ||
| 1152 | * path; one of the exceptions is the superblock. | ||
| 1153 | */ | ||
| 1154 | void | ||
| 1155 | xfsbdstrat( | ||
| 1156 | struct xfs_mount *mp, | ||
| 1157 | struct xfs_buf *bp) | ||
| 1158 | { | ||
| 1159 | if (XFS_FORCED_SHUTDOWN(mp)) { | ||
| 1160 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | ||
| 1161 | xfs_bioerror_relse(bp); | ||
| 1162 | return; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | xfs_buf_iorequest(bp); | ||
| 1166 | } | ||
| 1167 | |||
| 1088 | STATIC void | 1168 | STATIC void |
| 1089 | _xfs_buf_ioend( | 1169 | _xfs_buf_ioend( |
| 1090 | xfs_buf_t *bp, | 1170 | xfs_buf_t *bp, |
| @@ -1107,6 +1187,9 @@ xfs_buf_bio_end_io( | |||
| 1107 | 1187 | ||
| 1108 | xfs_buf_ioerror(bp, -error); | 1188 | xfs_buf_ioerror(bp, -error); |
| 1109 | 1189 | ||
| 1190 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) | ||
| 1191 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); | ||
| 1192 | |||
| 1110 | do { | 1193 | do { |
| 1111 | struct page *page = bvec->bv_page; | 1194 | struct page *page = bvec->bv_page; |
| 1112 | 1195 | ||
| @@ -1216,6 +1299,10 @@ next_chunk: | |||
| 1216 | 1299 | ||
| 1217 | submit_io: | 1300 | submit_io: |
| 1218 | if (likely(bio->bi_size)) { | 1301 | if (likely(bio->bi_size)) { |
| 1302 | if (xfs_buf_is_vmapped(bp)) { | ||
| 1303 | flush_kernel_vmap_range(bp->b_addr, | ||
| 1304 | xfs_buf_vmap_len(bp)); | ||
| 1305 | } | ||
| 1219 | submit_bio(rw, bio); | 1306 | submit_bio(rw, bio); |
| 1220 | if (size) | 1307 | if (size) |
| 1221 | goto next_chunk; | 1308 | goto next_chunk; |
| @@ -1296,7 +1383,7 @@ xfs_buf_iomove( | |||
| 1296 | xfs_buf_t *bp, /* buffer to process */ | 1383 | xfs_buf_t *bp, /* buffer to process */ |
| 1297 | size_t boff, /* starting buffer offset */ | 1384 | size_t boff, /* starting buffer offset */ |
| 1298 | size_t bsize, /* length to copy */ | 1385 | size_t bsize, /* length to copy */ |
| 1299 | caddr_t data, /* data address */ | 1386 | void *data, /* data address */ |
| 1300 | xfs_buf_rw_t mode) /* read/write/zero flag */ | 1387 | xfs_buf_rw_t mode) /* read/write/zero flag */ |
| 1301 | { | 1388 | { |
| 1302 | size_t bend, cpoff, csize; | 1389 | size_t bend, cpoff, csize; |
| @@ -1378,8 +1465,8 @@ xfs_alloc_bufhash( | |||
| 1378 | 1465 | ||
| 1379 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ | 1466 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ |
| 1380 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; | 1467 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; |
| 1381 | btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * | 1468 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * |
| 1382 | sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); | 1469 | sizeof(xfs_bufhash_t)); |
| 1383 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1470 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { |
| 1384 | spin_lock_init(&btp->bt_hash[i].bh_lock); | 1471 | spin_lock_init(&btp->bt_hash[i].bh_lock); |
| 1385 | INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); | 1472 | INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); |
| @@ -1390,7 +1477,7 @@ STATIC void | |||
| 1390 | xfs_free_bufhash( | 1477 | xfs_free_bufhash( |
| 1391 | xfs_buftarg_t *btp) | 1478 | xfs_buftarg_t *btp) |
| 1392 | { | 1479 | { |
| 1393 | kmem_free(btp->bt_hash); | 1480 | kmem_free_large(btp->bt_hash); |
| 1394 | btp->bt_hash = NULL; | 1481 | btp->bt_hash = NULL; |
| 1395 | } | 1482 | } |
| 1396 | 1483 | ||
| @@ -1595,6 +1682,11 @@ xfs_buf_delwri_queue( | |||
| 1595 | list_del(&bp->b_list); | 1682 | list_del(&bp->b_list); |
| 1596 | } | 1683 | } |
| 1597 | 1684 | ||
| 1685 | if (list_empty(dwq)) { | ||
| 1686 | /* start xfsbufd as it is about to have something to do */ | ||
| 1687 | wake_up_process(bp->b_target->bt_task); | ||
| 1688 | } | ||
| 1689 | |||
| 1598 | bp->b_flags |= _XBF_DELWRI_Q; | 1690 | bp->b_flags |= _XBF_DELWRI_Q; |
| 1599 | list_add_tail(&bp->b_list, dwq); | 1691 | list_add_tail(&bp->b_list, dwq); |
| 1600 | bp->b_queuetime = jiffies; | 1692 | bp->b_queuetime = jiffies; |
| @@ -1626,6 +1718,35 @@ xfs_buf_delwri_dequeue( | |||
| 1626 | trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); | 1718 | trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); |
| 1627 | } | 1719 | } |
| 1628 | 1720 | ||
| 1721 | /* | ||
| 1722 | * If a delwri buffer needs to be pushed before it has aged out, then promote | ||
| 1723 | * it to the head of the delwri queue so that it will be flushed on the next | ||
| 1724 | * xfsbufd run. We do this by resetting the queuetime of the buffer to be older | ||
| 1725 | * than the age currently needed to flush the buffer. Hence the next time the | ||
| 1726 | * xfsbufd sees it is guaranteed to be considered old enough to flush. | ||
| 1727 | */ | ||
| 1728 | void | ||
| 1729 | xfs_buf_delwri_promote( | ||
| 1730 | struct xfs_buf *bp) | ||
| 1731 | { | ||
| 1732 | struct xfs_buftarg *btp = bp->b_target; | ||
| 1733 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; | ||
| 1734 | |||
| 1735 | ASSERT(bp->b_flags & XBF_DELWRI); | ||
| 1736 | ASSERT(bp->b_flags & _XBF_DELWRI_Q); | ||
| 1737 | |||
| 1738 | /* | ||
| 1739 | * Check the buffer age before locking the delayed write queue as we | ||
| 1740 | * don't need to promote buffers that are already past the flush age. | ||
| 1741 | */ | ||
| 1742 | if (bp->b_queuetime < jiffies - age) | ||
| 1743 | return; | ||
| 1744 | bp->b_queuetime = jiffies - age; | ||
| 1745 | spin_lock(&btp->bt_delwrite_lock); | ||
| 1746 | list_move(&bp->b_list, &btp->bt_delwrite_queue); | ||
| 1747 | spin_unlock(&btp->bt_delwrite_lock); | ||
| 1748 | } | ||
| 1749 | |||
| 1629 | STATIC void | 1750 | STATIC void |
| 1630 | xfs_buf_runall_queues( | 1751 | xfs_buf_runall_queues( |
| 1631 | struct workqueue_struct *queue) | 1752 | struct workqueue_struct *queue) |
| @@ -1644,6 +1765,8 @@ xfsbufd_wakeup( | |||
| 1644 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { | 1765 | list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { |
| 1645 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) | 1766 | if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) |
| 1646 | continue; | 1767 | continue; |
| 1768 | if (list_empty(&btp->bt_delwrite_queue)) | ||
| 1769 | continue; | ||
| 1647 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); | 1770 | set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); |
| 1648 | wake_up_process(btp->bt_task); | 1771 | wake_up_process(btp->bt_task); |
| 1649 | } | 1772 | } |
| @@ -1694,20 +1817,53 @@ xfs_buf_delwri_split( | |||
| 1694 | 1817 | ||
| 1695 | } | 1818 | } |
| 1696 | 1819 | ||
| 1820 | /* | ||
| 1821 | * Compare function is more complex than it needs to be because | ||
| 1822 | * the return value is only 32 bits and we are doing comparisons | ||
| 1823 | * on 64 bit values | ||
| 1824 | */ | ||
| 1825 | static int | ||
| 1826 | xfs_buf_cmp( | ||
| 1827 | void *priv, | ||
| 1828 | struct list_head *a, | ||
| 1829 | struct list_head *b) | ||
| 1830 | { | ||
| 1831 | struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); | ||
| 1832 | struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); | ||
| 1833 | xfs_daddr_t diff; | ||
| 1834 | |||
| 1835 | diff = ap->b_bn - bp->b_bn; | ||
| 1836 | if (diff < 0) | ||
| 1837 | return -1; | ||
| 1838 | if (diff > 0) | ||
| 1839 | return 1; | ||
| 1840 | return 0; | ||
| 1841 | } | ||
| 1842 | |||
| 1843 | void | ||
| 1844 | xfs_buf_delwri_sort( | ||
| 1845 | xfs_buftarg_t *target, | ||
| 1846 | struct list_head *list) | ||
| 1847 | { | ||
| 1848 | list_sort(NULL, list, xfs_buf_cmp); | ||
| 1849 | } | ||
| 1850 | |||
| 1697 | STATIC int | 1851 | STATIC int |
| 1698 | xfsbufd( | 1852 | xfsbufd( |
| 1699 | void *data) | 1853 | void *data) |
| 1700 | { | 1854 | { |
| 1701 | struct list_head tmp; | 1855 | xfs_buftarg_t *target = (xfs_buftarg_t *)data; |
| 1702 | xfs_buftarg_t *target = (xfs_buftarg_t *)data; | ||
| 1703 | int count; | ||
| 1704 | xfs_buf_t *bp; | ||
| 1705 | 1856 | ||
| 1706 | current->flags |= PF_MEMALLOC; | 1857 | current->flags |= PF_MEMALLOC; |
| 1707 | 1858 | ||
| 1708 | set_freezable(); | 1859 | set_freezable(); |
| 1709 | 1860 | ||
| 1710 | do { | 1861 | do { |
| 1862 | long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | ||
| 1863 | long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); | ||
| 1864 | int count = 0; | ||
| 1865 | struct list_head tmp; | ||
| 1866 | |||
| 1711 | if (unlikely(freezing(current))) { | 1867 | if (unlikely(freezing(current))) { |
| 1712 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1868 | set_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
| 1713 | refrigerator(); | 1869 | refrigerator(); |
| @@ -1715,24 +1871,20 @@ xfsbufd( | |||
| 1715 | clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); | 1871 | clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); |
| 1716 | } | 1872 | } |
| 1717 | 1873 | ||
| 1718 | schedule_timeout_interruptible( | 1874 | /* sleep for a long time if there is nothing to do. */ |
| 1719 | xfs_buf_timer_centisecs * msecs_to_jiffies(10)); | 1875 | if (list_empty(&target->bt_delwrite_queue)) |
| 1876 | tout = MAX_SCHEDULE_TIMEOUT; | ||
| 1877 | schedule_timeout_interruptible(tout); | ||
| 1720 | 1878 | ||
| 1721 | xfs_buf_delwri_split(target, &tmp, | 1879 | xfs_buf_delwri_split(target, &tmp, age); |
| 1722 | xfs_buf_age_centisecs * msecs_to_jiffies(10)); | 1880 | list_sort(NULL, &tmp, xfs_buf_cmp); |
| 1723 | |||
| 1724 | count = 0; | ||
| 1725 | while (!list_empty(&tmp)) { | 1881 | while (!list_empty(&tmp)) { |
| 1726 | bp = list_entry(tmp.next, xfs_buf_t, b_list); | 1882 | struct xfs_buf *bp; |
| 1727 | ASSERT(target == bp->b_target); | 1883 | bp = list_first_entry(&tmp, struct xfs_buf, b_list); |
| 1728 | |||
| 1729 | list_del_init(&bp->b_list); | 1884 | list_del_init(&bp->b_list); |
| 1730 | xfs_buf_iostrategy(bp); | 1885 | xfs_buf_iostrategy(bp); |
| 1731 | count++; | 1886 | count++; |
| 1732 | } | 1887 | } |
| 1733 | |||
| 1734 | if (as_list_len > 0) | ||
| 1735 | purge_addresses(); | ||
| 1736 | if (count) | 1888 | if (count) |
| 1737 | blk_run_address_space(target->bt_mapping); | 1889 | blk_run_address_space(target->bt_mapping); |
| 1738 | 1890 | ||
| @@ -1751,42 +1903,45 @@ xfs_flush_buftarg( | |||
| 1751 | xfs_buftarg_t *target, | 1903 | xfs_buftarg_t *target, |
| 1752 | int wait) | 1904 | int wait) |
| 1753 | { | 1905 | { |
| 1754 | struct list_head tmp; | 1906 | xfs_buf_t *bp; |
| 1755 | xfs_buf_t *bp, *n; | ||
| 1756 | int pincount = 0; | 1907 | int pincount = 0; |
| 1908 | LIST_HEAD(tmp_list); | ||
| 1909 | LIST_HEAD(wait_list); | ||
| 1757 | 1910 | ||
| 1758 | xfs_buf_runall_queues(xfsconvertd_workqueue); | 1911 | xfs_buf_runall_queues(xfsconvertd_workqueue); |
| 1759 | xfs_buf_runall_queues(xfsdatad_workqueue); | 1912 | xfs_buf_runall_queues(xfsdatad_workqueue); |
| 1760 | xfs_buf_runall_queues(xfslogd_workqueue); | 1913 | xfs_buf_runall_queues(xfslogd_workqueue); |
| 1761 | 1914 | ||
| 1762 | set_bit(XBT_FORCE_FLUSH, &target->bt_flags); | 1915 | set_bit(XBT_FORCE_FLUSH, &target->bt_flags); |
| 1763 | pincount = xfs_buf_delwri_split(target, &tmp, 0); | 1916 | pincount = xfs_buf_delwri_split(target, &tmp_list, 0); |
| 1764 | 1917 | ||
| 1765 | /* | 1918 | /* |
| 1766 | * Dropped the delayed write list lock, now walk the temporary list | 1919 | * Dropped the delayed write list lock, now walk the temporary list. |
| 1920 | * All I/O is issued async and then if we need to wait for completion | ||
| 1921 | * we do that after issuing all the IO. | ||
| 1767 | */ | 1922 | */ |
| 1768 | list_for_each_entry_safe(bp, n, &tmp, b_list) { | 1923 | list_sort(NULL, &tmp_list, xfs_buf_cmp); |
| 1924 | while (!list_empty(&tmp_list)) { | ||
| 1925 | bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); | ||
| 1769 | ASSERT(target == bp->b_target); | 1926 | ASSERT(target == bp->b_target); |
| 1770 | if (wait) | 1927 | list_del_init(&bp->b_list); |
| 1928 | if (wait) { | ||
| 1771 | bp->b_flags &= ~XBF_ASYNC; | 1929 | bp->b_flags &= ~XBF_ASYNC; |
| 1772 | else | 1930 | list_add(&bp->b_list, &wait_list); |
| 1773 | list_del_init(&bp->b_list); | 1931 | } |
| 1774 | |||
| 1775 | xfs_buf_iostrategy(bp); | 1932 | xfs_buf_iostrategy(bp); |
| 1776 | } | 1933 | } |
| 1777 | 1934 | ||
| 1778 | if (wait) | 1935 | if (wait) { |
| 1936 | /* Expedite and wait for IO to complete. */ | ||
| 1779 | blk_run_address_space(target->bt_mapping); | 1937 | blk_run_address_space(target->bt_mapping); |
| 1938 | while (!list_empty(&wait_list)) { | ||
| 1939 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | ||
| 1780 | 1940 | ||
| 1781 | /* | 1941 | list_del_init(&bp->b_list); |
| 1782 | * Remaining list items must be flushed before returning | 1942 | xfs_iowait(bp); |
| 1783 | */ | 1943 | xfs_buf_relse(bp); |
| 1784 | while (!list_empty(&tmp)) { | 1944 | } |
| 1785 | bp = list_entry(tmp.next, xfs_buf_t, b_list); | ||
| 1786 | |||
| 1787 | list_del_init(&bp->b_list); | ||
| 1788 | xfs_iowait(bp); | ||
| 1789 | xfs_buf_relse(bp); | ||
| 1790 | } | 1945 | } |
| 1791 | 1946 | ||
| 1792 | return pincount; | 1947 | return pincount; |
