diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 540 |
1 files changed, 268 insertions, 272 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 74d8be87f983..6cbbd165c60d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -43,7 +43,29 @@ | |||
43 | #include <linux/pagevec.h> | 43 | #include <linux/pagevec.h> |
44 | #include <linux/writeback.h> | 44 | #include <linux/writeback.h> |
45 | 45 | ||
46 | STATIC void xfs_count_page_state(struct page *, int *, int *, int *); | 46 | STATIC void |
47 | xfs_count_page_state( | ||
48 | struct page *page, | ||
49 | int *delalloc, | ||
50 | int *unmapped, | ||
51 | int *unwritten) | ||
52 | { | ||
53 | struct buffer_head *bh, *head; | ||
54 | |||
55 | *delalloc = *unmapped = *unwritten = 0; | ||
56 | |||
57 | bh = head = page_buffers(page); | ||
58 | do { | ||
59 | if (buffer_uptodate(bh) && !buffer_mapped(bh)) | ||
60 | (*unmapped) = 1; | ||
61 | else if (buffer_unwritten(bh) && !buffer_delay(bh)) | ||
62 | clear_buffer_unwritten(bh); | ||
63 | else if (buffer_unwritten(bh)) | ||
64 | (*unwritten) = 1; | ||
65 | else if (buffer_delay(bh)) | ||
66 | (*delalloc) = 1; | ||
67 | } while ((bh = bh->b_this_page) != head); | ||
68 | } | ||
47 | 69 | ||
48 | #if defined(XFS_RW_TRACE) | 70 | #if defined(XFS_RW_TRACE) |
49 | void | 71 | void |
@@ -54,7 +76,7 @@ xfs_page_trace( | |||
54 | int mask) | 76 | int mask) |
55 | { | 77 | { |
56 | xfs_inode_t *ip; | 78 | xfs_inode_t *ip; |
57 | vnode_t *vp = LINVFS_GET_VP(inode); | 79 | vnode_t *vp = vn_from_inode(inode); |
58 | loff_t isize = i_size_read(inode); | 80 | loff_t isize = i_size_read(inode); |
59 | loff_t offset = page_offset(page); | 81 | loff_t offset = page_offset(page); |
60 | int delalloc = -1, unmapped = -1, unwritten = -1; | 82 | int delalloc = -1, unmapped = -1, unwritten = -1; |
@@ -81,7 +103,7 @@ xfs_page_trace( | |||
81 | (void *)((unsigned long)delalloc), | 103 | (void *)((unsigned long)delalloc), |
82 | (void *)((unsigned long)unmapped), | 104 | (void *)((unsigned long)unmapped), |
83 | (void *)((unsigned long)unwritten), | 105 | (void *)((unsigned long)unwritten), |
84 | (void *)NULL, | 106 | (void *)((unsigned long)current_pid()), |
85 | (void *)NULL); | 107 | (void *)NULL); |
86 | } | 108 | } |
87 | #else | 109 | #else |
@@ -192,7 +214,7 @@ xfs_alloc_ioend( | |||
192 | ioend->io_uptodate = 1; /* cleared if any I/O fails */ | 214 | ioend->io_uptodate = 1; /* cleared if any I/O fails */ |
193 | ioend->io_list = NULL; | 215 | ioend->io_list = NULL; |
194 | ioend->io_type = type; | 216 | ioend->io_type = type; |
195 | ioend->io_vnode = LINVFS_GET_VP(inode); | 217 | ioend->io_vnode = vn_from_inode(inode); |
196 | ioend->io_buffer_head = NULL; | 218 | ioend->io_buffer_head = NULL; |
197 | ioend->io_buffer_tail = NULL; | 219 | ioend->io_buffer_tail = NULL; |
198 | atomic_inc(&ioend->io_vnode->v_iocount); | 220 | atomic_inc(&ioend->io_vnode->v_iocount); |
@@ -217,7 +239,7 @@ xfs_map_blocks( | |||
217 | xfs_iomap_t *mapp, | 239 | xfs_iomap_t *mapp, |
218 | int flags) | 240 | int flags) |
219 | { | 241 | { |
220 | vnode_t *vp = LINVFS_GET_VP(inode); | 242 | vnode_t *vp = vn_from_inode(inode); |
221 | int error, nmaps = 1; | 243 | int error, nmaps = 1; |
222 | 244 | ||
223 | VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error); | 245 | VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error); |
@@ -350,7 +372,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) | |||
350 | * assumes that all buffers on the page are started at the same time. | 372 | * assumes that all buffers on the page are started at the same time. |
351 | * | 373 | * |
352 | * The fix is two passes across the ioend list - one to start writeback on the | 374 | * The fix is two passes across the ioend list - one to start writeback on the |
353 | * bufferheads, and then the second one submit them for I/O. | 375 | * buffer_heads, and then submit them for I/O on the second pass. |
354 | */ | 376 | */ |
355 | STATIC void | 377 | STATIC void |
356 | xfs_submit_ioend( | 378 | xfs_submit_ioend( |
@@ -462,28 +484,37 @@ xfs_add_to_ioend( | |||
462 | } | 484 | } |
463 | 485 | ||
464 | STATIC void | 486 | STATIC void |
487 | xfs_map_buffer( | ||
488 | struct buffer_head *bh, | ||
489 | xfs_iomap_t *mp, | ||
490 | xfs_off_t offset, | ||
491 | uint block_bits) | ||
492 | { | ||
493 | sector_t bn; | ||
494 | |||
495 | ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); | ||
496 | |||
497 | bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) + | ||
498 | ((offset - mp->iomap_offset) >> block_bits); | ||
499 | |||
500 | ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME)); | ||
501 | |||
502 | bh->b_blocknr = bn; | ||
503 | set_buffer_mapped(bh); | ||
504 | } | ||
505 | |||
506 | STATIC void | ||
465 | xfs_map_at_offset( | 507 | xfs_map_at_offset( |
466 | struct buffer_head *bh, | 508 | struct buffer_head *bh, |
467 | loff_t offset, | 509 | loff_t offset, |
468 | int block_bits, | 510 | int block_bits, |
469 | xfs_iomap_t *iomapp) | 511 | xfs_iomap_t *iomapp) |
470 | { | 512 | { |
471 | xfs_daddr_t bn; | ||
472 | int sector_shift; | ||
473 | |||
474 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); | 513 | ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); |
475 | ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); | 514 | ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); |
476 | ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); | ||
477 | |||
478 | sector_shift = block_bits - BBSHIFT; | ||
479 | bn = (iomapp->iomap_bn >> sector_shift) + | ||
480 | ((offset - iomapp->iomap_offset) >> block_bits); | ||
481 | |||
482 | ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME)); | ||
483 | ASSERT((bn << sector_shift) >= iomapp->iomap_bn); | ||
484 | 515 | ||
485 | lock_buffer(bh); | 516 | lock_buffer(bh); |
486 | bh->b_blocknr = bn; | 517 | xfs_map_buffer(bh, iomapp, offset, block_bits); |
487 | bh->b_bdev = iomapp->iomap_target->bt_bdev; | 518 | bh->b_bdev = iomapp->iomap_target->bt_bdev; |
488 | set_buffer_mapped(bh); | 519 | set_buffer_mapped(bh); |
489 | clear_buffer_delay(bh); | 520 | clear_buffer_delay(bh); |
@@ -616,7 +647,7 @@ xfs_is_delayed_page( | |||
616 | acceptable = (type == IOMAP_UNWRITTEN); | 647 | acceptable = (type == IOMAP_UNWRITTEN); |
617 | else if (buffer_delay(bh)) | 648 | else if (buffer_delay(bh)) |
618 | acceptable = (type == IOMAP_DELAY); | 649 | acceptable = (type == IOMAP_DELAY); |
619 | else if (buffer_mapped(bh)) | 650 | else if (buffer_dirty(bh) && buffer_mapped(bh)) |
620 | acceptable = (type == 0); | 651 | acceptable = (type == 0); |
621 | else | 652 | else |
622 | break; | 653 | break; |
@@ -668,7 +699,7 @@ xfs_convert_page( | |||
668 | 699 | ||
669 | /* | 700 | /* |
670 | * page_dirty is initially a count of buffers on the page before | 701 | * page_dirty is initially a count of buffers on the page before |
671 | * EOF and is decrememted as we move each into a cleanable state. | 702 | * EOF and is decremented as we move each into a cleanable state. |
672 | * | 703 | * |
673 | * Derivation: | 704 | * Derivation: |
674 | * | 705 | * |
@@ -811,7 +842,7 @@ xfs_cluster_write( | |||
811 | * page if possible. | 842 | * page if possible. |
812 | * The bh->b_state's cannot know if any of the blocks or which block for | 843 | * The bh->b_state's cannot know if any of the blocks or which block for |
813 | * that matter are dirty due to mmap writes, and therefore bh uptodate is | 844 | * that matter are dirty due to mmap writes, and therefore bh uptodate is |
814 | * only vaild if the page itself isn't completely uptodate. Some layers | 845 | * only valid if the page itself isn't completely uptodate. Some layers |
815 | * may clear the page dirty flag prior to calling write page, under the | 846 | * may clear the page dirty flag prior to calling write page, under the |
816 | * assumption the entire page will be written out; by not writing out the | 847 | * assumption the entire page will be written out; by not writing out the |
817 | * whole page the page can be reused before all valid dirty data is | 848 | * whole page the page can be reused before all valid dirty data is |
@@ -861,7 +892,7 @@ xfs_page_state_convert( | |||
861 | 892 | ||
862 | /* | 893 | /* |
863 | * page_dirty is initially a count of buffers on the page before | 894 | * page_dirty is initially a count of buffers on the page before |
864 | * EOF and is decrememted as we move each into a cleanable state. | 895 | * EOF and is decremented as we move each into a cleanable state. |
865 | * | 896 | * |
866 | * Derivation: | 897 | * Derivation: |
867 | * | 898 | * |
@@ -1040,54 +1071,191 @@ error: | |||
1040 | return err; | 1071 | return err; |
1041 | } | 1072 | } |
1042 | 1073 | ||
1074 | /* | ||
1075 | * writepage: Called from one of two places: | ||
1076 | * | ||
1077 | * 1. we are flushing a delalloc buffer head. | ||
1078 | * | ||
1079 | * 2. we are writing out a dirty page. Typically the page dirty | ||
1080 | * state is cleared before we get here. In this case is it | ||
1081 | * conceivable we have no buffer heads. | ||
1082 | * | ||
1083 | * For delalloc space on the page we need to allocate space and | ||
1084 | * flush it. For unmapped buffer heads on the page we should | ||
1085 | * allocate space if the page is uptodate. For any other dirty | ||
1086 | * buffer heads on the page we should flush them. | ||
1087 | * | ||
1088 | * If we detect that a transaction would be required to flush | ||
1089 | * the page, we have to check the process flags first, if we | ||
1090 | * are already in a transaction or disk I/O during allocations | ||
1091 | * is off, we need to fail the writepage and redirty the page. | ||
1092 | */ | ||
1093 | |||
1043 | STATIC int | 1094 | STATIC int |
1044 | __linvfs_get_block( | 1095 | xfs_vm_writepage( |
1096 | struct page *page, | ||
1097 | struct writeback_control *wbc) | ||
1098 | { | ||
1099 | int error; | ||
1100 | int need_trans; | ||
1101 | int delalloc, unmapped, unwritten; | ||
1102 | struct inode *inode = page->mapping->host; | ||
1103 | |||
1104 | xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); | ||
1105 | |||
1106 | /* | ||
1107 | * We need a transaction if: | ||
1108 | * 1. There are delalloc buffers on the page | ||
1109 | * 2. The page is uptodate and we have unmapped buffers | ||
1110 | * 3. The page is uptodate and we have no buffers | ||
1111 | * 4. There are unwritten buffers on the page | ||
1112 | */ | ||
1113 | |||
1114 | if (!page_has_buffers(page)) { | ||
1115 | unmapped = 1; | ||
1116 | need_trans = 1; | ||
1117 | } else { | ||
1118 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | ||
1119 | if (!PageUptodate(page)) | ||
1120 | unmapped = 0; | ||
1121 | need_trans = delalloc + unmapped + unwritten; | ||
1122 | } | ||
1123 | |||
1124 | /* | ||
1125 | * If we need a transaction and the process flags say | ||
1126 | * we are already in a transaction, or no IO is allowed | ||
1127 | * then mark the page dirty again and leave the page | ||
1128 | * as is. | ||
1129 | */ | ||
1130 | if (PFLAGS_TEST_FSTRANS() && need_trans) | ||
1131 | goto out_fail; | ||
1132 | |||
1133 | /* | ||
1134 | * Delay hooking up buffer heads until we have | ||
1135 | * made our go/no-go decision. | ||
1136 | */ | ||
1137 | if (!page_has_buffers(page)) | ||
1138 | create_empty_buffers(page, 1 << inode->i_blkbits, 0); | ||
1139 | |||
1140 | /* | ||
1141 | * Convert delayed allocate, unwritten or unmapped space | ||
1142 | * to real space and flush out to disk. | ||
1143 | */ | ||
1144 | error = xfs_page_state_convert(inode, page, wbc, 1, unmapped); | ||
1145 | if (error == -EAGAIN) | ||
1146 | goto out_fail; | ||
1147 | if (unlikely(error < 0)) | ||
1148 | goto out_unlock; | ||
1149 | |||
1150 | return 0; | ||
1151 | |||
1152 | out_fail: | ||
1153 | redirty_page_for_writepage(wbc, page); | ||
1154 | unlock_page(page); | ||
1155 | return 0; | ||
1156 | out_unlock: | ||
1157 | unlock_page(page); | ||
1158 | return error; | ||
1159 | } | ||
1160 | |||
1161 | /* | ||
1162 | * Called to move a page into cleanable state - and from there | ||
1163 | * to be released. Possibly the page is already clean. We always | ||
1164 | * have buffer heads in this call. | ||
1165 | * | ||
1166 | * Returns 0 if the page is ok to release, 1 otherwise. | ||
1167 | * | ||
1168 | * Possible scenarios are: | ||
1169 | * | ||
1170 | * 1. We are being called to release a page which has been written | ||
1171 | * to via regular I/O. buffer heads will be dirty and possibly | ||
1172 | * delalloc. If no delalloc buffer heads in this case then we | ||
1173 | * can just return zero. | ||
1174 | * | ||
1175 | * 2. We are called to release a page which has been written via | ||
1176 | * mmap, all we need to do is ensure there is no delalloc | ||
1177 | * state in the buffer heads, if not we can let the caller | ||
1178 | * free them and we should come back later via writepage. | ||
1179 | */ | ||
1180 | STATIC int | ||
1181 | xfs_vm_releasepage( | ||
1182 | struct page *page, | ||
1183 | gfp_t gfp_mask) | ||
1184 | { | ||
1185 | struct inode *inode = page->mapping->host; | ||
1186 | int dirty, delalloc, unmapped, unwritten; | ||
1187 | struct writeback_control wbc = { | ||
1188 | .sync_mode = WB_SYNC_ALL, | ||
1189 | .nr_to_write = 1, | ||
1190 | }; | ||
1191 | |||
1192 | xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask); | ||
1193 | |||
1194 | if (!page_has_buffers(page)) | ||
1195 | return 0; | ||
1196 | |||
1197 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | ||
1198 | if (!delalloc && !unwritten) | ||
1199 | goto free_buffers; | ||
1200 | |||
1201 | if (!(gfp_mask & __GFP_FS)) | ||
1202 | return 0; | ||
1203 | |||
1204 | /* If we are already inside a transaction or the thread cannot | ||
1205 | * do I/O, we cannot release this page. | ||
1206 | */ | ||
1207 | if (PFLAGS_TEST_FSTRANS()) | ||
1208 | return 0; | ||
1209 | |||
1210 | /* | ||
1211 | * Convert delalloc space to real space, do not flush the | ||
1212 | * data out to disk, that will be done by the caller. | ||
1213 | * Never need to allocate space here - we will always | ||
1214 | * come back to writepage in that case. | ||
1215 | */ | ||
1216 | dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0); | ||
1217 | if (dirty == 0 && !unwritten) | ||
1218 | goto free_buffers; | ||
1219 | return 0; | ||
1220 | |||
1221 | free_buffers: | ||
1222 | return try_to_free_buffers(page); | ||
1223 | } | ||
1224 | |||
1225 | STATIC int | ||
1226 | __xfs_get_blocks( | ||
1045 | struct inode *inode, | 1227 | struct inode *inode, |
1046 | sector_t iblock, | 1228 | sector_t iblock, |
1047 | unsigned long blocks, | ||
1048 | struct buffer_head *bh_result, | 1229 | struct buffer_head *bh_result, |
1049 | int create, | 1230 | int create, |
1050 | int direct, | 1231 | int direct, |
1051 | bmapi_flags_t flags) | 1232 | bmapi_flags_t flags) |
1052 | { | 1233 | { |
1053 | vnode_t *vp = LINVFS_GET_VP(inode); | 1234 | vnode_t *vp = vn_from_inode(inode); |
1054 | xfs_iomap_t iomap; | 1235 | xfs_iomap_t iomap; |
1055 | xfs_off_t offset; | 1236 | xfs_off_t offset; |
1056 | ssize_t size; | 1237 | ssize_t size; |
1057 | int retpbbm = 1; | 1238 | int niomap = 1; |
1058 | int error; | 1239 | int error; |
1059 | 1240 | ||
1060 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1241 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
1061 | if (blocks) | 1242 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
1062 | size = (ssize_t) min_t(xfs_off_t, LONG_MAX, | 1243 | size = bh_result->b_size; |
1063 | (xfs_off_t)blocks << inode->i_blkbits); | ||
1064 | else | ||
1065 | size = 1 << inode->i_blkbits; | ||
1066 | |||
1067 | VOP_BMAP(vp, offset, size, | 1244 | VOP_BMAP(vp, offset, size, |
1068 | create ? flags : BMAPI_READ, &iomap, &retpbbm, error); | 1245 | create ? flags : BMAPI_READ, &iomap, &niomap, error); |
1069 | if (error) | 1246 | if (error) |
1070 | return -error; | 1247 | return -error; |
1071 | 1248 | if (niomap == 0) | |
1072 | if (retpbbm == 0) | ||
1073 | return 0; | 1249 | return 0; |
1074 | 1250 | ||
1075 | if (iomap.iomap_bn != IOMAP_DADDR_NULL) { | 1251 | if (iomap.iomap_bn != IOMAP_DADDR_NULL) { |
1076 | xfs_daddr_t bn; | 1252 | /* |
1077 | xfs_off_t delta; | 1253 | * For unwritten extents do not report a disk address on |
1078 | |||
1079 | /* For unwritten extents do not report a disk address on | ||
1080 | * the read case (treat as if we're reading into a hole). | 1254 | * the read case (treat as if we're reading into a hole). |
1081 | */ | 1255 | */ |
1082 | if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { | 1256 | if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { |
1083 | delta = offset - iomap.iomap_offset; | 1257 | xfs_map_buffer(bh_result, &iomap, offset, |
1084 | delta >>= inode->i_blkbits; | 1258 | inode->i_blkbits); |
1085 | |||
1086 | bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT); | ||
1087 | bn += delta; | ||
1088 | BUG_ON(!bn && !(iomap.iomap_flags & IOMAP_REALTIME)); | ||
1089 | bh_result->b_blocknr = bn; | ||
1090 | set_buffer_mapped(bh_result); | ||
1091 | } | 1259 | } |
1092 | if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) { | 1260 | if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) { |
1093 | if (direct) | 1261 | if (direct) |
@@ -1097,12 +1265,16 @@ __linvfs_get_block( | |||
1097 | } | 1265 | } |
1098 | } | 1266 | } |
1099 | 1267 | ||
1100 | /* If this is a realtime file, data might be on a new device */ | 1268 | /* |
1269 | * If this is a realtime file, data may be on a different device. | ||
1270 | * to that pointed to from the buffer_head b_bdev currently. | ||
1271 | */ | ||
1101 | bh_result->b_bdev = iomap.iomap_target->bt_bdev; | 1272 | bh_result->b_bdev = iomap.iomap_target->bt_bdev; |
1102 | 1273 | ||
1103 | /* If we previously allocated a block out beyond eof and | 1274 | /* |
1104 | * we are now coming back to use it then we will need to | 1275 | * If we previously allocated a block out beyond eof and we are |
1105 | * flag it as new even if it has a disk address. | 1276 | * now coming back to use it then we will need to flag it as new |
1277 | * even if it has a disk address. | ||
1106 | */ | 1278 | */ |
1107 | if (create && | 1279 | if (create && |
1108 | ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || | 1280 | ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || |
@@ -1118,42 +1290,40 @@ __linvfs_get_block( | |||
1118 | } | 1290 | } |
1119 | } | 1291 | } |
1120 | 1292 | ||
1121 | if (blocks) { | 1293 | if (direct || size > (1 << inode->i_blkbits)) { |
1122 | ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0); | 1294 | ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0); |
1123 | offset = min_t(xfs_off_t, | 1295 | offset = min_t(xfs_off_t, |
1124 | iomap.iomap_bsize - iomap.iomap_delta, | 1296 | iomap.iomap_bsize - iomap.iomap_delta, size); |
1125 | (xfs_off_t)blocks << inode->i_blkbits); | 1297 | bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); |
1126 | bh_result->b_size = (u32) min_t(xfs_off_t, UINT_MAX, offset); | ||
1127 | } | 1298 | } |
1128 | 1299 | ||
1129 | return 0; | 1300 | return 0; |
1130 | } | 1301 | } |
1131 | 1302 | ||
1132 | int | 1303 | int |
1133 | linvfs_get_block( | 1304 | xfs_get_blocks( |
1134 | struct inode *inode, | 1305 | struct inode *inode, |
1135 | sector_t iblock, | 1306 | sector_t iblock, |
1136 | struct buffer_head *bh_result, | 1307 | struct buffer_head *bh_result, |
1137 | int create) | 1308 | int create) |
1138 | { | 1309 | { |
1139 | return __linvfs_get_block(inode, iblock, 0, bh_result, | 1310 | return __xfs_get_blocks(inode, iblock, |
1140 | create, 0, BMAPI_WRITE); | 1311 | bh_result, create, 0, BMAPI_WRITE); |
1141 | } | 1312 | } |
1142 | 1313 | ||
1143 | STATIC int | 1314 | STATIC int |
1144 | linvfs_get_blocks_direct( | 1315 | xfs_get_blocks_direct( |
1145 | struct inode *inode, | 1316 | struct inode *inode, |
1146 | sector_t iblock, | 1317 | sector_t iblock, |
1147 | unsigned long max_blocks, | ||
1148 | struct buffer_head *bh_result, | 1318 | struct buffer_head *bh_result, |
1149 | int create) | 1319 | int create) |
1150 | { | 1320 | { |
1151 | return __linvfs_get_block(inode, iblock, max_blocks, bh_result, | 1321 | return __xfs_get_blocks(inode, iblock, |
1152 | create, 1, BMAPI_WRITE|BMAPI_DIRECT); | 1322 | bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT); |
1153 | } | 1323 | } |
1154 | 1324 | ||
1155 | STATIC void | 1325 | STATIC void |
1156 | linvfs_end_io_direct( | 1326 | xfs_end_io_direct( |
1157 | struct kiocb *iocb, | 1327 | struct kiocb *iocb, |
1158 | loff_t offset, | 1328 | loff_t offset, |
1159 | ssize_t size, | 1329 | ssize_t size, |
@@ -1164,9 +1334,9 @@ linvfs_end_io_direct( | |||
1164 | /* | 1334 | /* |
1165 | * Non-NULL private data means we need to issue a transaction to | 1335 | * Non-NULL private data means we need to issue a transaction to |
1166 | * convert a range from unwritten to written extents. This needs | 1336 | * convert a range from unwritten to written extents. This needs |
1167 | * to happen from process contect but aio+dio I/O completion | 1337 | * to happen from process context but aio+dio I/O completion |
1168 | * happens from irq context so we need to defer it to a workqueue. | 1338 | * happens from irq context so we need to defer it to a workqueue. |
1169 | * This is not nessecary for synchronous direct I/O, but we do | 1339 | * This is not necessary for synchronous direct I/O, but we do |
1170 | * it anyway to keep the code uniform and simpler. | 1340 | * it anyway to keep the code uniform and simpler. |
1171 | * | 1341 | * |
1172 | * The core direct I/O code might be changed to always call the | 1342 | * The core direct I/O code might be changed to always call the |
@@ -1183,7 +1353,7 @@ linvfs_end_io_direct( | |||
1183 | } | 1353 | } |
1184 | 1354 | ||
1185 | /* | 1355 | /* |
1186 | * blockdev_direct_IO can return an error even afer the I/O | 1356 | * blockdev_direct_IO can return an error even after the I/O |
1187 | * completion handler was called. Thus we need to protect | 1357 | * completion handler was called. Thus we need to protect |
1188 | * against double-freeing. | 1358 | * against double-freeing. |
1189 | */ | 1359 | */ |
@@ -1191,7 +1361,7 @@ linvfs_end_io_direct( | |||
1191 | } | 1361 | } |
1192 | 1362 | ||
1193 | STATIC ssize_t | 1363 | STATIC ssize_t |
1194 | linvfs_direct_IO( | 1364 | xfs_vm_direct_IO( |
1195 | int rw, | 1365 | int rw, |
1196 | struct kiocb *iocb, | 1366 | struct kiocb *iocb, |
1197 | const struct iovec *iov, | 1367 | const struct iovec *iov, |
@@ -1200,7 +1370,7 @@ linvfs_direct_IO( | |||
1200 | { | 1370 | { |
1201 | struct file *file = iocb->ki_filp; | 1371 | struct file *file = iocb->ki_filp; |
1202 | struct inode *inode = file->f_mapping->host; | 1372 | struct inode *inode = file->f_mapping->host; |
1203 | vnode_t *vp = LINVFS_GET_VP(inode); | 1373 | vnode_t *vp = vn_from_inode(inode); |
1204 | xfs_iomap_t iomap; | 1374 | xfs_iomap_t iomap; |
1205 | int maps = 1; | 1375 | int maps = 1; |
1206 | int error; | 1376 | int error; |
@@ -1215,253 +1385,79 @@ linvfs_direct_IO( | |||
1215 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, | 1385 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, |
1216 | iomap.iomap_target->bt_bdev, | 1386 | iomap.iomap_target->bt_bdev, |
1217 | iov, offset, nr_segs, | 1387 | iov, offset, nr_segs, |
1218 | linvfs_get_blocks_direct, | 1388 | xfs_get_blocks_direct, |
1219 | linvfs_end_io_direct); | 1389 | xfs_end_io_direct); |
1220 | 1390 | ||
1221 | if (unlikely(ret <= 0 && iocb->private)) | 1391 | if (unlikely(ret <= 0 && iocb->private)) |
1222 | xfs_destroy_ioend(iocb->private); | 1392 | xfs_destroy_ioend(iocb->private); |
1223 | return ret; | 1393 | return ret; |
1224 | } | 1394 | } |
1225 | 1395 | ||
1396 | STATIC int | ||
1397 | xfs_vm_prepare_write( | ||
1398 | struct file *file, | ||
1399 | struct page *page, | ||
1400 | unsigned int from, | ||
1401 | unsigned int to) | ||
1402 | { | ||
1403 | return block_prepare_write(page, from, to, xfs_get_blocks); | ||
1404 | } | ||
1226 | 1405 | ||
1227 | STATIC sector_t | 1406 | STATIC sector_t |
1228 | linvfs_bmap( | 1407 | xfs_vm_bmap( |
1229 | struct address_space *mapping, | 1408 | struct address_space *mapping, |
1230 | sector_t block) | 1409 | sector_t block) |
1231 | { | 1410 | { |
1232 | struct inode *inode = (struct inode *)mapping->host; | 1411 | struct inode *inode = (struct inode *)mapping->host; |
1233 | vnode_t *vp = LINVFS_GET_VP(inode); | 1412 | vnode_t *vp = vn_from_inode(inode); |
1234 | int error; | 1413 | int error; |
1235 | 1414 | ||
1236 | vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address); | 1415 | vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); |
1237 | 1416 | ||
1238 | VOP_RWLOCK(vp, VRWLOCK_READ); | 1417 | VOP_RWLOCK(vp, VRWLOCK_READ); |
1239 | VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error); | 1418 | VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error); |
1240 | VOP_RWUNLOCK(vp, VRWLOCK_READ); | 1419 | VOP_RWUNLOCK(vp, VRWLOCK_READ); |
1241 | return generic_block_bmap(mapping, block, linvfs_get_block); | 1420 | return generic_block_bmap(mapping, block, xfs_get_blocks); |
1242 | } | 1421 | } |
1243 | 1422 | ||
1244 | STATIC int | 1423 | STATIC int |
1245 | linvfs_readpage( | 1424 | xfs_vm_readpage( |
1246 | struct file *unused, | 1425 | struct file *unused, |
1247 | struct page *page) | 1426 | struct page *page) |
1248 | { | 1427 | { |
1249 | return mpage_readpage(page, linvfs_get_block); | 1428 | return mpage_readpage(page, xfs_get_blocks); |
1250 | } | 1429 | } |
1251 | 1430 | ||
1252 | STATIC int | 1431 | STATIC int |
1253 | linvfs_readpages( | 1432 | xfs_vm_readpages( |
1254 | struct file *unused, | 1433 | struct file *unused, |
1255 | struct address_space *mapping, | 1434 | struct address_space *mapping, |
1256 | struct list_head *pages, | 1435 | struct list_head *pages, |
1257 | unsigned nr_pages) | 1436 | unsigned nr_pages) |
1258 | { | 1437 | { |
1259 | return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block); | 1438 | return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); |
1260 | } | 1439 | } |
1261 | 1440 | ||
1262 | STATIC void | 1441 | STATIC void |
1263 | xfs_count_page_state( | 1442 | xfs_vm_invalidatepage( |
1264 | struct page *page, | ||
1265 | int *delalloc, | ||
1266 | int *unmapped, | ||
1267 | int *unwritten) | ||
1268 | { | ||
1269 | struct buffer_head *bh, *head; | ||
1270 | |||
1271 | *delalloc = *unmapped = *unwritten = 0; | ||
1272 | |||
1273 | bh = head = page_buffers(page); | ||
1274 | do { | ||
1275 | if (buffer_uptodate(bh) && !buffer_mapped(bh)) | ||
1276 | (*unmapped) = 1; | ||
1277 | else if (buffer_unwritten(bh) && !buffer_delay(bh)) | ||
1278 | clear_buffer_unwritten(bh); | ||
1279 | else if (buffer_unwritten(bh)) | ||
1280 | (*unwritten) = 1; | ||
1281 | else if (buffer_delay(bh)) | ||
1282 | (*delalloc) = 1; | ||
1283 | } while ((bh = bh->b_this_page) != head); | ||
1284 | } | ||
1285 | |||
1286 | |||
1287 | /* | ||
1288 | * writepage: Called from one of two places: | ||
1289 | * | ||
1290 | * 1. we are flushing a delalloc buffer head. | ||
1291 | * | ||
1292 | * 2. we are writing out a dirty page. Typically the page dirty | ||
1293 | * state is cleared before we get here. In this case is it | ||
1294 | * conceivable we have no buffer heads. | ||
1295 | * | ||
1296 | * For delalloc space on the page we need to allocate space and | ||
1297 | * flush it. For unmapped buffer heads on the page we should | ||
1298 | * allocate space if the page is uptodate. For any other dirty | ||
1299 | * buffer heads on the page we should flush them. | ||
1300 | * | ||
1301 | * If we detect that a transaction would be required to flush | ||
1302 | * the page, we have to check the process flags first, if we | ||
1303 | * are already in a transaction or disk I/O during allocations | ||
1304 | * is off, we need to fail the writepage and redirty the page. | ||
1305 | */ | ||
1306 | |||
1307 | STATIC int | ||
1308 | linvfs_writepage( | ||
1309 | struct page *page, | ||
1310 | struct writeback_control *wbc) | ||
1311 | { | ||
1312 | int error; | ||
1313 | int need_trans; | ||
1314 | int delalloc, unmapped, unwritten; | ||
1315 | struct inode *inode = page->mapping->host; | ||
1316 | |||
1317 | xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); | ||
1318 | |||
1319 | /* | ||
1320 | * We need a transaction if: | ||
1321 | * 1. There are delalloc buffers on the page | ||
1322 | * 2. The page is uptodate and we have unmapped buffers | ||
1323 | * 3. The page is uptodate and we have no buffers | ||
1324 | * 4. There are unwritten buffers on the page | ||
1325 | */ | ||
1326 | |||
1327 | if (!page_has_buffers(page)) { | ||
1328 | unmapped = 1; | ||
1329 | need_trans = 1; | ||
1330 | } else { | ||
1331 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | ||
1332 | if (!PageUptodate(page)) | ||
1333 | unmapped = 0; | ||
1334 | need_trans = delalloc + unmapped + unwritten; | ||
1335 | } | ||
1336 | |||
1337 | /* | ||
1338 | * If we need a transaction and the process flags say | ||
1339 | * we are already in a transaction, or no IO is allowed | ||
1340 | * then mark the page dirty again and leave the page | ||
1341 | * as is. | ||
1342 | */ | ||
1343 | if (PFLAGS_TEST_FSTRANS() && need_trans) | ||
1344 | goto out_fail; | ||
1345 | |||
1346 | /* | ||
1347 | * Delay hooking up buffer heads until we have | ||
1348 | * made our go/no-go decision. | ||
1349 | */ | ||
1350 | if (!page_has_buffers(page)) | ||
1351 | create_empty_buffers(page, 1 << inode->i_blkbits, 0); | ||
1352 | |||
1353 | /* | ||
1354 | * Convert delayed allocate, unwritten or unmapped space | ||
1355 | * to real space and flush out to disk. | ||
1356 | */ | ||
1357 | error = xfs_page_state_convert(inode, page, wbc, 1, unmapped); | ||
1358 | if (error == -EAGAIN) | ||
1359 | goto out_fail; | ||
1360 | if (unlikely(error < 0)) | ||
1361 | goto out_unlock; | ||
1362 | |||
1363 | return 0; | ||
1364 | |||
1365 | out_fail: | ||
1366 | redirty_page_for_writepage(wbc, page); | ||
1367 | unlock_page(page); | ||
1368 | return 0; | ||
1369 | out_unlock: | ||
1370 | unlock_page(page); | ||
1371 | return error; | ||
1372 | } | ||
1373 | |||
1374 | STATIC int | ||
1375 | linvfs_invalidate_page( | ||
1376 | struct page *page, | 1443 | struct page *page, |
1377 | unsigned long offset) | 1444 | unsigned long offset) |
1378 | { | 1445 | { |
1379 | xfs_page_trace(XFS_INVALIDPAGE_ENTER, | 1446 | xfs_page_trace(XFS_INVALIDPAGE_ENTER, |
1380 | page->mapping->host, page, offset); | 1447 | page->mapping->host, page, offset); |
1381 | return block_invalidatepage(page, offset); | 1448 | block_invalidatepage(page, offset); |
1382 | } | ||
1383 | |||
1384 | /* | ||
1385 | * Called to move a page into cleanable state - and from there | ||
1386 | * to be released. Possibly the page is already clean. We always | ||
1387 | * have buffer heads in this call. | ||
1388 | * | ||
1389 | * Returns 0 if the page is ok to release, 1 otherwise. | ||
1390 | * | ||
1391 | * Possible scenarios are: | ||
1392 | * | ||
1393 | * 1. We are being called to release a page which has been written | ||
1394 | * to via regular I/O. buffer heads will be dirty and possibly | ||
1395 | * delalloc. If no delalloc buffer heads in this case then we | ||
1396 | * can just return zero. | ||
1397 | * | ||
1398 | * 2. We are called to release a page which has been written via | ||
1399 | * mmap, all we need to do is ensure there is no delalloc | ||
1400 | * state in the buffer heads, if not we can let the caller | ||
1401 | * free them and we should come back later via writepage. | ||
1402 | */ | ||
1403 | STATIC int | ||
1404 | linvfs_release_page( | ||
1405 | struct page *page, | ||
1406 | gfp_t gfp_mask) | ||
1407 | { | ||
1408 | struct inode *inode = page->mapping->host; | ||
1409 | int dirty, delalloc, unmapped, unwritten; | ||
1410 | struct writeback_control wbc = { | ||
1411 | .sync_mode = WB_SYNC_ALL, | ||
1412 | .nr_to_write = 1, | ||
1413 | }; | ||
1414 | |||
1415 | xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask); | ||
1416 | |||
1417 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | ||
1418 | if (!delalloc && !unwritten) | ||
1419 | goto free_buffers; | ||
1420 | |||
1421 | if (!(gfp_mask & __GFP_FS)) | ||
1422 | return 0; | ||
1423 | |||
1424 | /* If we are already inside a transaction or the thread cannot | ||
1425 | * do I/O, we cannot release this page. | ||
1426 | */ | ||
1427 | if (PFLAGS_TEST_FSTRANS()) | ||
1428 | return 0; | ||
1429 | |||
1430 | /* | ||
1431 | * Convert delalloc space to real space, do not flush the | ||
1432 | * data out to disk, that will be done by the caller. | ||
1433 | * Never need to allocate space here - we will always | ||
1434 | * come back to writepage in that case. | ||
1435 | */ | ||
1436 | dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0); | ||
1437 | if (dirty == 0 && !unwritten) | ||
1438 | goto free_buffers; | ||
1439 | return 0; | ||
1440 | |||
1441 | free_buffers: | ||
1442 | return try_to_free_buffers(page); | ||
1443 | } | ||
1444 | |||
1445 | STATIC int | ||
1446 | linvfs_prepare_write( | ||
1447 | struct file *file, | ||
1448 | struct page *page, | ||
1449 | unsigned int from, | ||
1450 | unsigned int to) | ||
1451 | { | ||
1452 | return block_prepare_write(page, from, to, linvfs_get_block); | ||
1453 | } | 1449 | } |
1454 | 1450 | ||
1455 | struct address_space_operations linvfs_aops = { | 1451 | struct address_space_operations xfs_address_space_operations = { |
1456 | .readpage = linvfs_readpage, | 1452 | .readpage = xfs_vm_readpage, |
1457 | .readpages = linvfs_readpages, | 1453 | .readpages = xfs_vm_readpages, |
1458 | .writepage = linvfs_writepage, | 1454 | .writepage = xfs_vm_writepage, |
1459 | .sync_page = block_sync_page, | 1455 | .sync_page = block_sync_page, |
1460 | .releasepage = linvfs_release_page, | 1456 | .releasepage = xfs_vm_releasepage, |
1461 | .invalidatepage = linvfs_invalidate_page, | 1457 | .invalidatepage = xfs_vm_invalidatepage, |
1462 | .prepare_write = linvfs_prepare_write, | 1458 | .prepare_write = xfs_vm_prepare_write, |
1463 | .commit_write = generic_commit_write, | 1459 | .commit_write = generic_commit_write, |
1464 | .bmap = linvfs_bmap, | 1460 | .bmap = xfs_vm_bmap, |
1465 | .direct_IO = linvfs_direct_IO, | 1461 | .direct_IO = xfs_vm_direct_IO, |
1466 | .migratepage = buffer_migrate_page, | 1462 | .migratepage = buffer_migrate_page, |
1467 | }; | 1463 | }; |