aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c354
1 files changed, 107 insertions, 247 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index bd5e1cf5428d..7744a3b630e0 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -754,7 +754,6 @@ xfs_convert_page(
754 struct xfs_bmbt_irec *imap, 754 struct xfs_bmbt_irec *imap,
755 xfs_ioend_t **ioendp, 755 xfs_ioend_t **ioendp,
756 struct writeback_control *wbc, 756 struct writeback_control *wbc,
757 int startio,
758 int all_bh) 757 int all_bh)
759{ 758{
760 struct buffer_head *bh, *head; 759 struct buffer_head *bh, *head;
@@ -825,19 +824,14 @@ xfs_convert_page(
825 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 824 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
826 825
827 xfs_map_at_offset(inode, bh, imap, offset); 826 xfs_map_at_offset(inode, bh, imap, offset);
828 if (startio) { 827 xfs_add_to_ioend(inode, bh, offset, type,
829 xfs_add_to_ioend(inode, bh, offset, 828 ioendp, done);
830 type, ioendp, done); 829
831 } else {
832 set_buffer_dirty(bh);
833 unlock_buffer(bh);
834 mark_buffer_dirty(bh);
835 }
836 page_dirty--; 830 page_dirty--;
837 count++; 831 count++;
838 } else { 832 } else {
839 type = IO_NEW; 833 type = IO_NEW;
840 if (buffer_mapped(bh) && all_bh && startio) { 834 if (buffer_mapped(bh) && all_bh) {
841 lock_buffer(bh); 835 lock_buffer(bh);
842 xfs_add_to_ioend(inode, bh, offset, 836 xfs_add_to_ioend(inode, bh, offset,
843 type, ioendp, done); 837 type, ioendp, done);
@@ -852,14 +846,12 @@ xfs_convert_page(
852 if (uptodate && bh == head) 846 if (uptodate && bh == head)
853 SetPageUptodate(page); 847 SetPageUptodate(page);
854 848
855 if (startio) { 849 if (count) {
856 if (count) { 850 wbc->nr_to_write--;
857 wbc->nr_to_write--; 851 if (wbc->nr_to_write <= 0)
858 if (wbc->nr_to_write <= 0) 852 done = 1;
859 done = 1;
860 }
861 xfs_start_page_writeback(page, !page_dirty, count);
862 } 853 }
854 xfs_start_page_writeback(page, !page_dirty, count);
863 855
864 return done; 856 return done;
865 fail_unlock_page: 857 fail_unlock_page:
@@ -879,7 +871,6 @@ xfs_cluster_write(
879 struct xfs_bmbt_irec *imap, 871 struct xfs_bmbt_irec *imap,
880 xfs_ioend_t **ioendp, 872 xfs_ioend_t **ioendp,
881 struct writeback_control *wbc, 873 struct writeback_control *wbc,
882 int startio,
883 int all_bh, 874 int all_bh,
884 pgoff_t tlast) 875 pgoff_t tlast)
885{ 876{
@@ -895,7 +886,7 @@ xfs_cluster_write(
895 886
896 for (i = 0; i < pagevec_count(&pvec); i++) { 887 for (i = 0; i < pagevec_count(&pvec); i++) {
897 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 888 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
898 imap, ioendp, wbc, startio, all_bh); 889 imap, ioendp, wbc, all_bh);
899 if (done) 890 if (done)
900 break; 891 break;
901 } 892 }
@@ -1025,51 +1016,94 @@ out_invalidate:
1025} 1016}
1026 1017
1027/* 1018/*
1028 * Calling this without startio set means we are being asked to make a dirty 1019 * Write out a dirty page.
1029 * page ready for freeing it's buffers. When called with startio set then 1020 *
1030 * we are coming from writepage. 1021 * For delalloc space on the page we need to allocate space and flush it.
1022 * For unwritten space on the page we need to start the conversion to
1023 * regular allocated space.
1024 * For unmapped buffer heads on the page we should allocate space if the
1025 * page is uptodate.
1026 * For any other dirty buffer heads on the page we should flush them.
1031 * 1027 *
1032 * When called with startio set it is important that we write the WHOLE 1028 * If we detect that a transaction would be required to flush the page, we
1033 * page if possible. 1029 * have to check the process flags first, if we are already in a transaction
1034 * The bh->b_state's cannot know if any of the blocks or which block for 1030 * or disk I/O during allocations is off, we need to fail the writepage and
1035 * that matter are dirty due to mmap writes, and therefore bh uptodate is 1031 * redirty the page.
1036 * only valid if the page itself isn't completely uptodate. Some layers 1032 *
1037 * may clear the page dirty flag prior to calling write page, under the 1033 * The bh->b_state's cannot know if any of the blocks or which block for that
1038 * assumption the entire page will be written out; by not writing out the 1034 * matter are dirty due to mmap writes, and therefore bh uptodate is only
1039 * whole page the page can be reused before all valid dirty data is 1035 * valid if the page itself isn't completely uptodate.
1040 * written out. Note: in the case of a page that has been dirty'd by
1041 * mapwrite and but partially setup by block_prepare_write the
1042 * bh->b_states's will not agree and only ones setup by BPW/BCW will have
1043 * valid state, thus the whole page must be written out thing.
1044 */ 1036 */
1045
1046STATIC int 1037STATIC int
1047xfs_page_state_convert( 1038xfs_vm_writepage(
1048 struct inode *inode, 1039 struct page *page,
1049 struct page *page, 1040 struct writeback_control *wbc)
1050 struct writeback_control *wbc,
1051 int startio,
1052 int unmapped) /* also implies page uptodate */
1053{ 1041{
1042 struct inode *inode = page->mapping->host;
1043 int need_trans;
1044 int delalloc, unmapped, unwritten;
1054 struct buffer_head *bh, *head; 1045 struct buffer_head *bh, *head;
1055 struct xfs_bmbt_irec imap; 1046 struct xfs_bmbt_irec imap;
1056 xfs_ioend_t *ioend = NULL, *iohead = NULL; 1047 xfs_ioend_t *ioend = NULL, *iohead = NULL;
1057 loff_t offset; 1048 loff_t offset;
1058 unsigned long p_offset = 0;
1059 unsigned int type; 1049 unsigned int type;
1060 __uint64_t end_offset; 1050 __uint64_t end_offset;
1061 pgoff_t end_index, last_index; 1051 pgoff_t end_index, last_index;
1062 ssize_t size, len; 1052 ssize_t size, len;
1063 int flags, err, imap_valid = 0, uptodate = 1; 1053 int flags, err, imap_valid = 0, uptodate = 1;
1064 int page_dirty, count = 0; 1054 int count = 0;
1065 int trylock = 0; 1055 int all_bh;
1066 int all_bh = unmapped; 1056
1057 trace_xfs_writepage(inode, page, 0);
1058
1059 /*
1060 * Refuse to write the page out if we are called from reclaim context.
1061 *
1062 * This is primarily to avoid stack overflows when called from deep
1063 * used stacks in random callers for direct reclaim, but disabling
1064 * reclaim for kswap is a nice side-effect as kswapd causes rather
1065 * suboptimal I/O patters, too.
1066 *
1067 * This should really be done by the core VM, but until that happens
1068 * filesystems like XFS, btrfs and ext4 have to take care of this
1069 * by themselves.
1070 */
1071 if (current->flags & PF_MEMALLOC)
1072 goto out_fail;
1067 1073
1068 if (startio) { 1074 /*
1069 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) 1075 * We need a transaction if:
1070 trylock |= BMAPI_TRYLOCK; 1076 * 1. There are delalloc buffers on the page
1077 * 2. The page is uptodate and we have unmapped buffers
1078 * 3. The page is uptodate and we have no buffers
1079 * 4. There are unwritten buffers on the page
1080 */
1081 if (!page_has_buffers(page)) {
1082 unmapped = 1;
1083 need_trans = 1;
1084 } else {
1085 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
1086 if (!PageUptodate(page))
1087 unmapped = 0;
1088 need_trans = delalloc + unmapped + unwritten;
1071 } 1089 }
1072 1090
1091 /*
1092 * If we need a transaction and the process flags say
1093 * we are already in a transaction, or no IO is allowed
1094 * then mark the page dirty again and leave the page
1095 * as is.
1096 */
1097 if (current_test_flags(PF_FSTRANS) && need_trans)
1098 goto out_fail;
1099
1100 /*
1101 * Delay hooking up buffer heads until we have
1102 * made our go/no-go decision.
1103 */
1104 if (!page_has_buffers(page))
1105 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
1106
1073 /* Is this page beyond the end of the file? */ 1107 /* Is this page beyond the end of the file? */
1074 offset = i_size_read(inode); 1108 offset = i_size_read(inode);
1075 end_index = offset >> PAGE_CACHE_SHIFT; 1109 end_index = offset >> PAGE_CACHE_SHIFT;
@@ -1077,53 +1111,27 @@ xfs_page_state_convert(
1077 if (page->index >= end_index) { 1111 if (page->index >= end_index) {
1078 if ((page->index >= end_index + 1) || 1112 if ((page->index >= end_index + 1) ||
1079 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 1113 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
1080 if (startio) 1114 unlock_page(page);
1081 unlock_page(page);
1082 return 0; 1115 return 0;
1083 } 1116 }
1084 } 1117 }
1085 1118
1086 /*
1087 * page_dirty is initially a count of buffers on the page before
1088 * EOF and is decremented as we move each into a cleanable state.
1089 *
1090 * Derivation:
1091 *
1092 * End offset is the highest offset that this page should represent.
1093 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
1094 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
1095 * hence give us the correct page_dirty count. On any other page,
1096 * it will be zero and in that case we need page_dirty to be the
1097 * count of buffers on the page.
1098 */
1099 end_offset = min_t(unsigned long long, 1119 end_offset = min_t(unsigned long long,
1100 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); 1120 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
1101 len = 1 << inode->i_blkbits; 1121 len = 1 << inode->i_blkbits;
1102 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
1103 PAGE_CACHE_SIZE);
1104 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
1105 page_dirty = p_offset / len;
1106 1122
1107 bh = head = page_buffers(page); 1123 bh = head = page_buffers(page);
1108 offset = page_offset(page); 1124 offset = page_offset(page);
1109 flags = BMAPI_READ; 1125 flags = BMAPI_READ;
1110 type = IO_NEW; 1126 type = IO_NEW;
1111 1127
1112 /* TODO: cleanup count and page_dirty */ 1128 all_bh = unmapped;
1113 1129
1114 do { 1130 do {
1115 if (offset >= end_offset) 1131 if (offset >= end_offset)
1116 break; 1132 break;
1117 if (!buffer_uptodate(bh)) 1133 if (!buffer_uptodate(bh))
1118 uptodate = 0; 1134 uptodate = 0;
1119 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
1120 /*
1121 * the iomap is actually still valid, but the ioend
1122 * isn't. shouldn't happen too often.
1123 */
1124 imap_valid = 0;
1125 continue;
1126 }
1127 1135
1128 /* 1136 /*
1129 * A hole may still be marked uptodate because discard_buffer 1137 * A hole may still be marked uptodate because discard_buffer
@@ -1150,7 +1158,7 @@ xfs_page_state_convert(
1150 */ 1158 */
1151 if (buffer_unwritten(bh) || buffer_delay(bh) || 1159 if (buffer_unwritten(bh) || buffer_delay(bh) ||
1152 ((buffer_uptodate(bh) || PageUptodate(page)) && 1160 ((buffer_uptodate(bh) || PageUptodate(page)) &&
1153 !buffer_mapped(bh) && (unmapped || startio))) { 1161 !buffer_mapped(bh))) {
1154 int new_ioend = 0; 1162 int new_ioend = 0;
1155 1163
1156 /* 1164 /*
@@ -1164,7 +1172,11 @@ xfs_page_state_convert(
1164 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 1172 flags = BMAPI_WRITE | BMAPI_IGNSTATE;
1165 } else if (buffer_delay(bh)) { 1173 } else if (buffer_delay(bh)) {
1166 type = IO_DELAY; 1174 type = IO_DELAY;
1167 flags = BMAPI_ALLOCATE | trylock; 1175 flags = BMAPI_ALLOCATE;
1176
1177 if (wbc->sync_mode == WB_SYNC_NONE &&
1178 wbc->nonblocking)
1179 flags |= BMAPI_TRYLOCK;
1168 } else { 1180 } else {
1169 type = IO_NEW; 1181 type = IO_NEW;
1170 flags = BMAPI_WRITE | BMAPI_MMAP; 1182 flags = BMAPI_WRITE | BMAPI_MMAP;
@@ -1196,19 +1208,11 @@ xfs_page_state_convert(
1196 } 1208 }
1197 if (imap_valid) { 1209 if (imap_valid) {
1198 xfs_map_at_offset(inode, bh, &imap, offset); 1210 xfs_map_at_offset(inode, bh, &imap, offset);
1199 if (startio) { 1211 xfs_add_to_ioend(inode, bh, offset, type,
1200 xfs_add_to_ioend(inode, bh, offset, 1212 &ioend, new_ioend);
1201 type, &ioend,
1202 new_ioend);
1203 } else {
1204 set_buffer_dirty(bh);
1205 unlock_buffer(bh);
1206 mark_buffer_dirty(bh);
1207 }
1208 page_dirty--;
1209 count++; 1213 count++;
1210 } 1214 }
1211 } else if (buffer_uptodate(bh) && startio) { 1215 } else if (buffer_uptodate(bh)) {
1212 /* 1216 /*
1213 * we got here because the buffer is already mapped. 1217 * we got here because the buffer is already mapped.
1214 * That means it must already have extents allocated 1218 * That means it must already have extents allocated
@@ -1241,13 +1245,11 @@ xfs_page_state_convert(
1241 all_bh = 1; 1245 all_bh = 1;
1242 xfs_add_to_ioend(inode, bh, offset, type, 1246 xfs_add_to_ioend(inode, bh, offset, type,
1243 &ioend, !imap_valid); 1247 &ioend, !imap_valid);
1244 page_dirty--;
1245 count++; 1248 count++;
1246 } else { 1249 } else {
1247 imap_valid = 0; 1250 imap_valid = 0;
1248 } 1251 }
1249 } else if ((buffer_uptodate(bh) || PageUptodate(page)) && 1252 } else if (PageUptodate(page)) {
1250 (unmapped || startio)) {
1251 imap_valid = 0; 1253 imap_valid = 0;
1252 } 1254 }
1253 1255
@@ -1259,8 +1261,7 @@ xfs_page_state_convert(
1259 if (uptodate && bh == head) 1261 if (uptodate && bh == head)
1260 SetPageUptodate(page); 1262 SetPageUptodate(page);
1261 1263
1262 if (startio) 1264 xfs_start_page_writeback(page, 1, count);
1263 xfs_start_page_writeback(page, 1, count);
1264 1265
1265 if (ioend && imap_valid) { 1266 if (ioend && imap_valid) {
1266 xfs_off_t end_index; 1267 xfs_off_t end_index;
@@ -1278,131 +1279,28 @@ xfs_page_state_convert(
1278 end_index = last_index; 1279 end_index = last_index;
1279 1280
1280 xfs_cluster_write(inode, page->index + 1, &imap, &ioend, 1281 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
1281 wbc, startio, all_bh, end_index); 1282 wbc, all_bh, end_index);
1282 } 1283 }
1283 1284
1284 if (iohead) 1285 if (iohead)
1285 xfs_submit_ioend(wbc, iohead); 1286 xfs_submit_ioend(wbc, iohead);
1286 1287
1287 return page_dirty; 1288 return 0;
1288 1289
1289error: 1290error:
1290 if (iohead) 1291 if (iohead)
1291 xfs_cancel_ioend(iohead); 1292 xfs_cancel_ioend(iohead);
1292 1293
1293 /* 1294 if (!unmapped)
1294 * If it's delalloc and we have nowhere to put it, 1295 xfs_aops_discard_page(page);
1295 * throw it away, unless the lower layers told 1296 ClearPageUptodate(page);
1296 * us to try again. 1297 unlock_page(page);
1297 */
1298 if (err != -EAGAIN) {
1299 if (!unmapped)
1300 xfs_aops_discard_page(page);
1301 ClearPageUptodate(page);
1302 }
1303 return err; 1298 return err;
1304}
1305
1306/*
1307 * writepage: Called from one of two places:
1308 *
1309 * 1. we are flushing a delalloc buffer head.
1310 *
1311 * 2. we are writing out a dirty page. Typically the page dirty
1312 * state is cleared before we get here. In this case is it
1313 * conceivable we have no buffer heads.
1314 *
1315 * For delalloc space on the page we need to allocate space and
1316 * flush it. For unmapped buffer heads on the page we should
1317 * allocate space if the page is uptodate. For any other dirty
1318 * buffer heads on the page we should flush them.
1319 *
1320 * If we detect that a transaction would be required to flush
1321 * the page, we have to check the process flags first, if we
1322 * are already in a transaction or disk I/O during allocations
1323 * is off, we need to fail the writepage and redirty the page.
1324 */
1325
1326STATIC int
1327xfs_vm_writepage(
1328 struct page *page,
1329 struct writeback_control *wbc)
1330{
1331 int error;
1332 int need_trans;
1333 int delalloc, unmapped, unwritten;
1334 struct inode *inode = page->mapping->host;
1335
1336 trace_xfs_writepage(inode, page, 0);
1337
1338 /*
1339 * Refuse to write the page out if we are called from reclaim context.
1340 *
1341 * This is primarily to avoid stack overflows when called from deep
1342 * used stacks in random callers for direct reclaim, but disabling
1343 * reclaim for kswap is a nice side-effect as kswapd causes rather
1344 * suboptimal I/O patters, too.
1345 *
1346 * This should really be done by the core VM, but until that happens
1347 * filesystems like XFS, btrfs and ext4 have to take care of this
1348 * by themselves.
1349 */
1350 if (current->flags & PF_MEMALLOC)
1351 goto out_fail;
1352
1353 /*
1354 * We need a transaction if:
1355 * 1. There are delalloc buffers on the page
1356 * 2. The page is uptodate and we have unmapped buffers
1357 * 3. The page is uptodate and we have no buffers
1358 * 4. There are unwritten buffers on the page
1359 */
1360
1361 if (!page_has_buffers(page)) {
1362 unmapped = 1;
1363 need_trans = 1;
1364 } else {
1365 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
1366 if (!PageUptodate(page))
1367 unmapped = 0;
1368 need_trans = delalloc + unmapped + unwritten;
1369 }
1370
1371 /*
1372 * If we need a transaction and the process flags say
1373 * we are already in a transaction, or no IO is allowed
1374 * then mark the page dirty again and leave the page
1375 * as is.
1376 */
1377 if (current_test_flags(PF_FSTRANS) && need_trans)
1378 goto out_fail;
1379
1380 /*
1381 * Delay hooking up buffer heads until we have
1382 * made our go/no-go decision.
1383 */
1384 if (!page_has_buffers(page))
1385 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
1386
1387 /*
1388 * Convert delayed allocate, unwritten or unmapped space
1389 * to real space and flush out to disk.
1390 */
1391 error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
1392 if (error == -EAGAIN)
1393 goto out_fail;
1394 if (unlikely(error < 0))
1395 goto out_unlock;
1396
1397 return 0;
1398 1299
1399out_fail: 1300out_fail:
1400 redirty_page_for_writepage(wbc, page); 1301 redirty_page_for_writepage(wbc, page);
1401 unlock_page(page); 1302 unlock_page(page);
1402 return 0; 1303 return 0;
1403out_unlock:
1404 unlock_page(page);
1405 return error;
1406} 1304}
1407 1305
1408STATIC int 1306STATIC int
@@ -1416,65 +1314,27 @@ xfs_vm_writepages(
1416 1314
1417/* 1315/*
1418 * Called to move a page into cleanable state - and from there 1316 * Called to move a page into cleanable state - and from there
1419 * to be released. Possibly the page is already clean. We always 1317 * to be released. The page should already be clean. We always
1420 * have buffer heads in this call. 1318 * have buffer heads in this call.
1421 * 1319 *
1422 * Returns 0 if the page is ok to release, 1 otherwise. 1320 * Returns 1 if the page is ok to release, 0 otherwise.
1423 *
1424 * Possible scenarios are:
1425 *
1426 * 1. We are being called to release a page which has been written
1427 * to via regular I/O. buffer heads will be dirty and possibly
1428 * delalloc. If no delalloc buffer heads in this case then we
1429 * can just return zero.
1430 *
1431 * 2. We are called to release a page which has been written via
1432 * mmap, all we need to do is ensure there is no delalloc
1433 * state in the buffer heads, if not we can let the caller
1434 * free them and we should come back later via writepage.
1435 */ 1321 */
1436STATIC int 1322STATIC int
1437xfs_vm_releasepage( 1323xfs_vm_releasepage(
1438 struct page *page, 1324 struct page *page,
1439 gfp_t gfp_mask) 1325 gfp_t gfp_mask)
1440{ 1326{
1441 struct inode *inode = page->mapping->host; 1327 int delalloc, unmapped, unwritten;
1442 int dirty, delalloc, unmapped, unwritten;
1443 struct writeback_control wbc = {
1444 .sync_mode = WB_SYNC_ALL,
1445 .nr_to_write = 1,
1446 };
1447
1448 trace_xfs_releasepage(inode, page, 0);
1449 1328
1450 if (!page_has_buffers(page)) 1329 trace_xfs_releasepage(page->mapping->host, page, 0);
1451 return 0;
1452 1330
1453 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 1331 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
1454 if (!delalloc && !unwritten)
1455 goto free_buffers;
1456 1332
1457 if (!(gfp_mask & __GFP_FS)) 1333 if (WARN_ON(delalloc))
1458 return 0; 1334 return 0;
1459 1335 if (WARN_ON(unwritten))
1460 /* If we are already inside a transaction or the thread cannot
1461 * do I/O, we cannot release this page.
1462 */
1463 if (current_test_flags(PF_FSTRANS))
1464 return 0; 1336 return 0;
1465 1337
1466 /*
1467 * Convert delalloc space to real space, do not flush the
1468 * data out to disk, that will be done by the caller.
1469 * Never need to allocate space here - we will always
1470 * come back to writepage in that case.
1471 */
1472 dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
1473 if (dirty == 0 && !unwritten)
1474 goto free_buffers;
1475 return 0;
1476
1477free_buffers:
1478 return try_to_free_buffers(page); 1338 return try_to_free_buffers(page);
1479} 1339}
1480 1340