diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 611 |
1 files changed, 215 insertions, 396 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 34640d6dbdcb..d24e78f32f3e 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
| @@ -21,19 +21,12 @@ | |||
| 21 | #include "xfs_inum.h" | 21 | #include "xfs_inum.h" |
| 22 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
| 23 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
| 24 | #include "xfs_dir2.h" | ||
| 25 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
| 26 | #include "xfs_dmapi.h" | ||
| 27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
| 28 | #include "xfs_bmap_btree.h" | 26 | #include "xfs_bmap_btree.h" |
| 29 | #include "xfs_alloc_btree.h" | ||
| 30 | #include "xfs_ialloc_btree.h" | ||
| 31 | #include "xfs_dir2_sf.h" | ||
| 32 | #include "xfs_attr_sf.h" | ||
| 33 | #include "xfs_dinode.h" | 27 | #include "xfs_dinode.h" |
| 34 | #include "xfs_inode.h" | 28 | #include "xfs_inode.h" |
| 35 | #include "xfs_alloc.h" | 29 | #include "xfs_alloc.h" |
| 36 | #include "xfs_btree.h" | ||
| 37 | #include "xfs_error.h" | 30 | #include "xfs_error.h" |
| 38 | #include "xfs_rw.h" | 31 | #include "xfs_rw.h" |
| 39 | #include "xfs_iomap.h" | 32 | #include "xfs_iomap.h" |
| @@ -92,18 +85,15 @@ void | |||
| 92 | xfs_count_page_state( | 85 | xfs_count_page_state( |
| 93 | struct page *page, | 86 | struct page *page, |
| 94 | int *delalloc, | 87 | int *delalloc, |
| 95 | int *unmapped, | ||
| 96 | int *unwritten) | 88 | int *unwritten) |
| 97 | { | 89 | { |
| 98 | struct buffer_head *bh, *head; | 90 | struct buffer_head *bh, *head; |
| 99 | 91 | ||
| 100 | *delalloc = *unmapped = *unwritten = 0; | 92 | *delalloc = *unwritten = 0; |
| 101 | 93 | ||
| 102 | bh = head = page_buffers(page); | 94 | bh = head = page_buffers(page); |
| 103 | do { | 95 | do { |
| 104 | if (buffer_uptodate(bh) && !buffer_mapped(bh)) | 96 | if (buffer_unwritten(bh)) |
| 105 | (*unmapped) = 1; | ||
| 106 | else if (buffer_unwritten(bh)) | ||
| 107 | (*unwritten) = 1; | 97 | (*unwritten) = 1; |
| 108 | else if (buffer_delay(bh)) | 98 | else if (buffer_delay(bh)) |
| 109 | (*delalloc) = 1; | 99 | (*delalloc) = 1; |
| @@ -212,23 +202,17 @@ xfs_setfilesize( | |||
| 212 | } | 202 | } |
| 213 | 203 | ||
| 214 | /* | 204 | /* |
| 215 | * Schedule IO completion handling on a xfsdatad if this was | 205 | * Schedule IO completion handling on the final put of an ioend. |
| 216 | * the final hold on this ioend. If we are asked to wait, | ||
| 217 | * flush the workqueue. | ||
| 218 | */ | 206 | */ |
| 219 | STATIC void | 207 | STATIC void |
| 220 | xfs_finish_ioend( | 208 | xfs_finish_ioend( |
| 221 | xfs_ioend_t *ioend, | 209 | struct xfs_ioend *ioend) |
| 222 | int wait) | ||
| 223 | { | 210 | { |
| 224 | if (atomic_dec_and_test(&ioend->io_remaining)) { | 211 | if (atomic_dec_and_test(&ioend->io_remaining)) { |
| 225 | struct workqueue_struct *wq; | 212 | if (ioend->io_type == IO_UNWRITTEN) |
| 226 | 213 | queue_work(xfsconvertd_workqueue, &ioend->io_work); | |
| 227 | wq = (ioend->io_type == IO_UNWRITTEN) ? | 214 | else |
| 228 | xfsconvertd_workqueue : xfsdatad_workqueue; | 215 | queue_work(xfsdatad_workqueue, &ioend->io_work); |
| 229 | queue_work(wq, &ioend->io_work); | ||
| 230 | if (wait) | ||
| 231 | flush_workqueue(wq); | ||
| 232 | } | 216 | } |
| 233 | } | 217 | } |
| 234 | 218 | ||
| @@ -272,11 +256,25 @@ xfs_end_io( | |||
| 272 | */ | 256 | */ |
| 273 | if (error == EAGAIN) { | 257 | if (error == EAGAIN) { |
| 274 | atomic_inc(&ioend->io_remaining); | 258 | atomic_inc(&ioend->io_remaining); |
| 275 | xfs_finish_ioend(ioend, 0); | 259 | xfs_finish_ioend(ioend); |
| 276 | /* ensure we don't spin on blocked ioends */ | 260 | /* ensure we don't spin on blocked ioends */ |
| 277 | delay(1); | 261 | delay(1); |
| 278 | } else | 262 | } else { |
| 263 | if (ioend->io_iocb) | ||
| 264 | aio_complete(ioend->io_iocb, ioend->io_result, 0); | ||
| 279 | xfs_destroy_ioend(ioend); | 265 | xfs_destroy_ioend(ioend); |
| 266 | } | ||
| 267 | } | ||
| 268 | |||
| 269 | /* | ||
| 270 | * Call IO completion handling in caller context on the final put of an ioend. | ||
| 271 | */ | ||
| 272 | STATIC void | ||
| 273 | xfs_finish_ioend_sync( | ||
| 274 | struct xfs_ioend *ioend) | ||
| 275 | { | ||
| 276 | if (atomic_dec_and_test(&ioend->io_remaining)) | ||
| 277 | xfs_end_io(&ioend->io_work); | ||
| 280 | } | 278 | } |
| 281 | 279 | ||
| 282 | /* | 280 | /* |
| @@ -309,6 +307,8 @@ xfs_alloc_ioend( | |||
| 309 | atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); | 307 | atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); |
| 310 | ioend->io_offset = 0; | 308 | ioend->io_offset = 0; |
| 311 | ioend->io_size = 0; | 309 | ioend->io_size = 0; |
| 310 | ioend->io_iocb = NULL; | ||
| 311 | ioend->io_result = 0; | ||
| 312 | 312 | ||
| 313 | INIT_WORK(&ioend->io_work, xfs_end_io); | 313 | INIT_WORK(&ioend->io_work, xfs_end_io); |
| 314 | return ioend; | 314 | return ioend; |
| @@ -358,7 +358,7 @@ xfs_end_bio( | |||
| 358 | bio->bi_end_io = NULL; | 358 | bio->bi_end_io = NULL; |
| 359 | bio_put(bio); | 359 | bio_put(bio); |
| 360 | 360 | ||
| 361 | xfs_finish_ioend(ioend, 0); | 361 | xfs_finish_ioend(ioend); |
| 362 | } | 362 | } |
| 363 | 363 | ||
| 364 | STATIC void | 364 | STATIC void |
| @@ -500,7 +500,7 @@ xfs_submit_ioend( | |||
| 500 | } | 500 | } |
| 501 | if (bio) | 501 | if (bio) |
| 502 | xfs_submit_ioend_bio(wbc, ioend, bio); | 502 | xfs_submit_ioend_bio(wbc, ioend, bio); |
| 503 | xfs_finish_ioend(ioend, 0); | 503 | xfs_finish_ioend(ioend); |
| 504 | } while ((ioend = next) != NULL); | 504 | } while ((ioend = next) != NULL); |
| 505 | } | 505 | } |
| 506 | 506 | ||
| @@ -614,31 +614,30 @@ xfs_map_at_offset( | |||
| 614 | STATIC unsigned int | 614 | STATIC unsigned int |
| 615 | xfs_probe_page( | 615 | xfs_probe_page( |
| 616 | struct page *page, | 616 | struct page *page, |
| 617 | unsigned int pg_offset, | 617 | unsigned int pg_offset) |
| 618 | int mapped) | ||
| 619 | { | 618 | { |
| 619 | struct buffer_head *bh, *head; | ||
| 620 | int ret = 0; | 620 | int ret = 0; |
| 621 | 621 | ||
| 622 | if (PageWriteback(page)) | 622 | if (PageWriteback(page)) |
| 623 | return 0; | 623 | return 0; |
| 624 | if (!PageDirty(page)) | ||
| 625 | return 0; | ||
| 626 | if (!page->mapping) | ||
| 627 | return 0; | ||
| 628 | if (!page_has_buffers(page)) | ||
| 629 | return 0; | ||
| 624 | 630 | ||
| 625 | if (page->mapping && PageDirty(page)) { | 631 | bh = head = page_buffers(page); |
| 626 | if (page_has_buffers(page)) { | 632 | do { |
| 627 | struct buffer_head *bh, *head; | 633 | if (!buffer_uptodate(bh)) |
| 628 | 634 | break; | |
| 629 | bh = head = page_buffers(page); | 635 | if (!buffer_mapped(bh)) |
| 630 | do { | 636 | break; |
| 631 | if (!buffer_uptodate(bh)) | 637 | ret += bh->b_size; |
| 632 | break; | 638 | if (ret >= pg_offset) |
| 633 | if (mapped != buffer_mapped(bh)) | 639 | break; |
| 634 | break; | 640 | } while ((bh = bh->b_this_page) != head); |
| 635 | ret += bh->b_size; | ||
| 636 | if (ret >= pg_offset) | ||
| 637 | break; | ||
| 638 | } while ((bh = bh->b_this_page) != head); | ||
| 639 | } else | ||
| 640 | ret = mapped ? 0 : PAGE_CACHE_SIZE; | ||
| 641 | } | ||
| 642 | 641 | ||
| 643 | return ret; | 642 | return ret; |
| 644 | } | 643 | } |
| @@ -648,8 +647,7 @@ xfs_probe_cluster( | |||
| 648 | struct inode *inode, | 647 | struct inode *inode, |
| 649 | struct page *startpage, | 648 | struct page *startpage, |
| 650 | struct buffer_head *bh, | 649 | struct buffer_head *bh, |
| 651 | struct buffer_head *head, | 650 | struct buffer_head *head) |
| 652 | int mapped) | ||
| 653 | { | 651 | { |
| 654 | struct pagevec pvec; | 652 | struct pagevec pvec; |
| 655 | pgoff_t tindex, tlast, tloff; | 653 | pgoff_t tindex, tlast, tloff; |
| @@ -658,7 +656,7 @@ xfs_probe_cluster( | |||
| 658 | 656 | ||
| 659 | /* First sum forwards in this page */ | 657 | /* First sum forwards in this page */ |
| 660 | do { | 658 | do { |
| 661 | if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh))) | 659 | if (!buffer_uptodate(bh) || !buffer_mapped(bh)) |
| 662 | return total; | 660 | return total; |
| 663 | total += bh->b_size; | 661 | total += bh->b_size; |
| 664 | } while ((bh = bh->b_this_page) != head); | 662 | } while ((bh = bh->b_this_page) != head); |
| @@ -692,7 +690,7 @@ xfs_probe_cluster( | |||
| 692 | pg_offset = PAGE_CACHE_SIZE; | 690 | pg_offset = PAGE_CACHE_SIZE; |
| 693 | 691 | ||
| 694 | if (page->index == tindex && trylock_page(page)) { | 692 | if (page->index == tindex && trylock_page(page)) { |
| 695 | pg_len = xfs_probe_page(page, pg_offset, mapped); | 693 | pg_len = xfs_probe_page(page, pg_offset); |
| 696 | unlock_page(page); | 694 | unlock_page(page); |
| 697 | } | 695 | } |
| 698 | 696 | ||
| @@ -761,7 +759,6 @@ xfs_convert_page( | |||
| 761 | struct xfs_bmbt_irec *imap, | 759 | struct xfs_bmbt_irec *imap, |
| 762 | xfs_ioend_t **ioendp, | 760 | xfs_ioend_t **ioendp, |
| 763 | struct writeback_control *wbc, | 761 | struct writeback_control *wbc, |
| 764 | int startio, | ||
| 765 | int all_bh) | 762 | int all_bh) |
| 766 | { | 763 | { |
| 767 | struct buffer_head *bh, *head; | 764 | struct buffer_head *bh, *head; |
| @@ -832,19 +829,14 @@ xfs_convert_page( | |||
| 832 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 829 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
| 833 | 830 | ||
| 834 | xfs_map_at_offset(inode, bh, imap, offset); | 831 | xfs_map_at_offset(inode, bh, imap, offset); |
| 835 | if (startio) { | 832 | xfs_add_to_ioend(inode, bh, offset, type, |
| 836 | xfs_add_to_ioend(inode, bh, offset, | 833 | ioendp, done); |
| 837 | type, ioendp, done); | 834 | |
| 838 | } else { | ||
| 839 | set_buffer_dirty(bh); | ||
| 840 | unlock_buffer(bh); | ||
| 841 | mark_buffer_dirty(bh); | ||
| 842 | } | ||
| 843 | page_dirty--; | 835 | page_dirty--; |
| 844 | count++; | 836 | count++; |
| 845 | } else { | 837 | } else { |
| 846 | type = IO_NEW; | 838 | type = IO_NEW; |
| 847 | if (buffer_mapped(bh) && all_bh && startio) { | 839 | if (buffer_mapped(bh) && all_bh) { |
| 848 | lock_buffer(bh); | 840 | lock_buffer(bh); |
| 849 | xfs_add_to_ioend(inode, bh, offset, | 841 | xfs_add_to_ioend(inode, bh, offset, |
| 850 | type, ioendp, done); | 842 | type, ioendp, done); |
| @@ -859,14 +851,12 @@ xfs_convert_page( | |||
| 859 | if (uptodate && bh == head) | 851 | if (uptodate && bh == head) |
| 860 | SetPageUptodate(page); | 852 | SetPageUptodate(page); |
| 861 | 853 | ||
| 862 | if (startio) { | 854 | if (count) { |
| 863 | if (count) { | 855 | wbc->nr_to_write--; |
| 864 | wbc->nr_to_write--; | 856 | if (wbc->nr_to_write <= 0) |
| 865 | if (wbc->nr_to_write <= 0) | 857 | done = 1; |
| 866 | done = 1; | ||
| 867 | } | ||
| 868 | xfs_start_page_writeback(page, !page_dirty, count); | ||
| 869 | } | 858 | } |
| 859 | xfs_start_page_writeback(page, !page_dirty, count); | ||
| 870 | 860 | ||
| 871 | return done; | 861 | return done; |
| 872 | fail_unlock_page: | 862 | fail_unlock_page: |
| @@ -886,7 +876,6 @@ xfs_cluster_write( | |||
| 886 | struct xfs_bmbt_irec *imap, | 876 | struct xfs_bmbt_irec *imap, |
| 887 | xfs_ioend_t **ioendp, | 877 | xfs_ioend_t **ioendp, |
| 888 | struct writeback_control *wbc, | 878 | struct writeback_control *wbc, |
| 889 | int startio, | ||
| 890 | int all_bh, | 879 | int all_bh, |
| 891 | pgoff_t tlast) | 880 | pgoff_t tlast) |
| 892 | { | 881 | { |
| @@ -902,7 +891,7 @@ xfs_cluster_write( | |||
| 902 | 891 | ||
| 903 | for (i = 0; i < pagevec_count(&pvec); i++) { | 892 | for (i = 0; i < pagevec_count(&pvec); i++) { |
| 904 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | 893 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, |
| 905 | imap, ioendp, wbc, startio, all_bh); | 894 | imap, ioendp, wbc, all_bh); |
| 906 | if (done) | 895 | if (done) |
| 907 | break; | 896 | break; |
| 908 | } | 897 | } |
| @@ -981,7 +970,7 @@ xfs_aops_discard_page( | |||
| 981 | */ | 970 | */ |
| 982 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, | 971 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, |
| 983 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | 972 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, |
| 984 | &nimaps, NULL, NULL); | 973 | &nimaps, NULL); |
| 985 | 974 | ||
| 986 | if (error) { | 975 | if (error) { |
| 987 | /* something screwed, just bail */ | 976 | /* something screwed, just bail */ |
| @@ -1009,7 +998,7 @@ xfs_aops_discard_page( | |||
| 1009 | */ | 998 | */ |
| 1010 | xfs_bmap_init(&flist, &firstblock); | 999 | xfs_bmap_init(&flist, &firstblock); |
| 1011 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, | 1000 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, |
| 1012 | &flist, NULL, &done); | 1001 | &flist, &done); |
| 1013 | 1002 | ||
| 1014 | ASSERT(!flist.xbf_count && !flist.xbf_first); | 1003 | ASSERT(!flist.xbf_count && !flist.xbf_first); |
| 1015 | if (error) { | 1004 | if (error) { |
| @@ -1032,50 +1021,66 @@ out_invalidate: | |||
| 1032 | } | 1021 | } |
| 1033 | 1022 | ||
| 1034 | /* | 1023 | /* |
| 1035 | * Calling this without startio set means we are being asked to make a dirty | 1024 | * Write out a dirty page. |
| 1036 | * page ready for freeing it's buffers. When called with startio set then | 1025 | * |
| 1037 | * we are coming from writepage. | 1026 | * For delalloc space on the page we need to allocate space and flush it. |
| 1027 | * For unwritten space on the page we need to start the conversion to | ||
| 1028 | * regular allocated space. | ||
| 1029 | * For any other dirty buffer heads on the page we should flush them. | ||
| 1038 | * | 1030 | * |
| 1039 | * When called with startio set it is important that we write the WHOLE | 1031 | * If we detect that a transaction would be required to flush the page, we |
| 1040 | * page if possible. | 1032 | * have to check the process flags first, if we are already in a transaction |
| 1041 | * The bh->b_state's cannot know if any of the blocks or which block for | 1033 | * or disk I/O during allocations is off, we need to fail the writepage and |
| 1042 | * that matter are dirty due to mmap writes, and therefore bh uptodate is | 1034 | * redirty the page. |
| 1043 | * only valid if the page itself isn't completely uptodate. Some layers | ||
| 1044 | * may clear the page dirty flag prior to calling write page, under the | ||
| 1045 | * assumption the entire page will be written out; by not writing out the | ||
| 1046 | * whole page the page can be reused before all valid dirty data is | ||
| 1047 | * written out. Note: in the case of a page that has been dirty'd by | ||
| 1048 | * mapwrite and but partially setup by block_prepare_write the | ||
| 1049 | * bh->b_states's will not agree and only ones setup by BPW/BCW will have | ||
| 1050 | * valid state, thus the whole page must be written out thing. | ||
| 1051 | */ | 1035 | */ |
| 1052 | |||
| 1053 | STATIC int | 1036 | STATIC int |
| 1054 | xfs_page_state_convert( | 1037 | xfs_vm_writepage( |
| 1055 | struct inode *inode, | 1038 | struct page *page, |
| 1056 | struct page *page, | 1039 | struct writeback_control *wbc) |
| 1057 | struct writeback_control *wbc, | ||
| 1058 | int startio, | ||
| 1059 | int unmapped) /* also implies page uptodate */ | ||
| 1060 | { | 1040 | { |
| 1041 | struct inode *inode = page->mapping->host; | ||
| 1042 | int delalloc, unwritten; | ||
| 1061 | struct buffer_head *bh, *head; | 1043 | struct buffer_head *bh, *head; |
| 1062 | struct xfs_bmbt_irec imap; | 1044 | struct xfs_bmbt_irec imap; |
| 1063 | xfs_ioend_t *ioend = NULL, *iohead = NULL; | 1045 | xfs_ioend_t *ioend = NULL, *iohead = NULL; |
| 1064 | loff_t offset; | 1046 | loff_t offset; |
| 1065 | unsigned long p_offset = 0; | ||
| 1066 | unsigned int type; | 1047 | unsigned int type; |
| 1067 | __uint64_t end_offset; | 1048 | __uint64_t end_offset; |
| 1068 | pgoff_t end_index, last_index; | 1049 | pgoff_t end_index, last_index; |
| 1069 | ssize_t size, len; | 1050 | ssize_t size, len; |
| 1070 | int flags, err, imap_valid = 0, uptodate = 1; | 1051 | int flags, err, imap_valid = 0, uptodate = 1; |
| 1071 | int page_dirty, count = 0; | 1052 | int count = 0; |
| 1072 | int trylock = 0; | 1053 | int all_bh = 0; |
| 1073 | int all_bh = unmapped; | ||
| 1074 | 1054 | ||
| 1075 | if (startio) { | 1055 | trace_xfs_writepage(inode, page, 0); |
| 1076 | if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) | 1056 | |
| 1077 | trylock |= BMAPI_TRYLOCK; | 1057 | ASSERT(page_has_buffers(page)); |
| 1078 | } | 1058 | |
| 1059 | /* | ||
| 1060 | * Refuse to write the page out if we are called from reclaim context. | ||
| 1061 | * | ||
| 1062 | * This avoids stack overflows when called from deeply used stacks in | ||
| 1063 | * random callers for direct reclaim or memcg reclaim. We explicitly | ||
| 1064 | * allow reclaim from kswapd as the stack usage there is relatively low. | ||
| 1065 | * | ||
| 1066 | * This should really be done by the core VM, but until that happens | ||
| 1067 | * filesystems like XFS, btrfs and ext4 have to take care of this | ||
| 1068 | * by themselves. | ||
| 1069 | */ | ||
| 1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) | ||
| 1071 | goto out_fail; | ||
| 1072 | |||
| 1073 | /* | ||
| 1074 | * We need a transaction if there are delalloc or unwritten buffers | ||
| 1075 | * on the page. | ||
| 1076 | * | ||
| 1077 | * If we need a transaction and the process flags say we are already | ||
| 1078 | * in a transaction, or no IO is allowed then mark the page dirty | ||
| 1079 | * again and leave the page as is. | ||
| 1080 | */ | ||
| 1081 | xfs_count_page_state(page, &delalloc, &unwritten); | ||
| 1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) | ||
| 1083 | goto out_fail; | ||
| 1079 | 1084 | ||
| 1080 | /* Is this page beyond the end of the file? */ | 1085 | /* Is this page beyond the end of the file? */ |
| 1081 | offset = i_size_read(inode); | 1086 | offset = i_size_read(inode); |
| @@ -1084,50 +1089,33 @@ xfs_page_state_convert( | |||
| 1084 | if (page->index >= end_index) { | 1089 | if (page->index >= end_index) { |
| 1085 | if ((page->index >= end_index + 1) || | 1090 | if ((page->index >= end_index + 1) || |
| 1086 | !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { | 1091 | !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { |
| 1087 | if (startio) | 1092 | unlock_page(page); |
| 1088 | unlock_page(page); | ||
| 1089 | return 0; | 1093 | return 0; |
| 1090 | } | 1094 | } |
| 1091 | } | 1095 | } |
| 1092 | 1096 | ||
| 1093 | /* | ||
| 1094 | * page_dirty is initially a count of buffers on the page before | ||
| 1095 | * EOF and is decremented as we move each into a cleanable state. | ||
| 1096 | * | ||
| 1097 | * Derivation: | ||
| 1098 | * | ||
| 1099 | * End offset is the highest offset that this page should represent. | ||
| 1100 | * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) | ||
| 1101 | * will evaluate non-zero and be less than PAGE_CACHE_SIZE and | ||
| 1102 | * hence give us the correct page_dirty count. On any other page, | ||
| 1103 | * it will be zero and in that case we need page_dirty to be the | ||
| 1104 | * count of buffers on the page. | ||
| 1105 | */ | ||
| 1106 | end_offset = min_t(unsigned long long, | 1097 | end_offset = min_t(unsigned long long, |
| 1107 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); | 1098 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, |
| 1099 | offset); | ||
| 1108 | len = 1 << inode->i_blkbits; | 1100 | len = 1 << inode->i_blkbits; |
| 1109 | p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), | ||
| 1110 | PAGE_CACHE_SIZE); | ||
| 1111 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; | ||
| 1112 | page_dirty = p_offset / len; | ||
| 1113 | 1101 | ||
| 1114 | bh = head = page_buffers(page); | 1102 | bh = head = page_buffers(page); |
| 1115 | offset = page_offset(page); | 1103 | offset = page_offset(page); |
| 1116 | flags = BMAPI_READ; | 1104 | flags = BMAPI_READ; |
| 1117 | type = IO_NEW; | 1105 | type = IO_NEW; |
| 1118 | 1106 | ||
| 1119 | /* TODO: cleanup count and page_dirty */ | ||
| 1120 | |||
| 1121 | do { | 1107 | do { |
| 1122 | if (offset >= end_offset) | 1108 | if (offset >= end_offset) |
| 1123 | break; | 1109 | break; |
| 1124 | if (!buffer_uptodate(bh)) | 1110 | if (!buffer_uptodate(bh)) |
| 1125 | uptodate = 0; | 1111 | uptodate = 0; |
| 1126 | if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { | 1112 | |
| 1127 | /* | 1113 | /* |
| 1128 | * the iomap is actually still valid, but the ioend | 1114 | * A hole may still be marked uptodate because discard_buffer |
| 1129 | * isn't. shouldn't happen too often. | 1115 | * leaves the flag set. |
| 1130 | */ | 1116 | */ |
| 1117 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { | ||
| 1118 | ASSERT(!buffer_dirty(bh)); | ||
| 1131 | imap_valid = 0; | 1119 | imap_valid = 0; |
| 1132 | continue; | 1120 | continue; |
| 1133 | } | 1121 | } |
| @@ -1135,19 +1123,7 @@ xfs_page_state_convert( | |||
| 1135 | if (imap_valid) | 1123 | if (imap_valid) |
| 1136 | imap_valid = xfs_imap_valid(inode, &imap, offset); | 1124 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
| 1137 | 1125 | ||
| 1138 | /* | 1126 | if (buffer_unwritten(bh) || buffer_delay(bh)) { |
| 1139 | * First case, map an unwritten extent and prepare for | ||
| 1140 | * extent state conversion transaction on completion. | ||
| 1141 | * | ||
| 1142 | * Second case, allocate space for a delalloc buffer. | ||
| 1143 | * We can return EAGAIN here in the release page case. | ||
| 1144 | * | ||
| 1145 | * Third case, an unmapped buffer was found, and we are | ||
| 1146 | * in a path where we need to write the whole page out. | ||
| 1147 | */ | ||
| 1148 | if (buffer_unwritten(bh) || buffer_delay(bh) || | ||
| 1149 | ((buffer_uptodate(bh) || PageUptodate(page)) && | ||
| 1150 | !buffer_mapped(bh) && (unmapped || startio))) { | ||
| 1151 | int new_ioend = 0; | 1127 | int new_ioend = 0; |
| 1152 | 1128 | ||
| 1153 | /* | 1129 | /* |
| @@ -1161,15 +1137,16 @@ xfs_page_state_convert( | |||
| 1161 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; | 1137 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; |
| 1162 | } else if (buffer_delay(bh)) { | 1138 | } else if (buffer_delay(bh)) { |
| 1163 | type = IO_DELAY; | 1139 | type = IO_DELAY; |
| 1164 | flags = BMAPI_ALLOCATE | trylock; | 1140 | flags = BMAPI_ALLOCATE; |
| 1165 | } else { | 1141 | |
| 1166 | type = IO_NEW; | 1142 | if (wbc->sync_mode == WB_SYNC_NONE && |
| 1167 | flags = BMAPI_WRITE | BMAPI_MMAP; | 1143 | wbc->nonblocking) |
| 1144 | flags |= BMAPI_TRYLOCK; | ||
| 1168 | } | 1145 | } |
| 1169 | 1146 | ||
| 1170 | if (!imap_valid) { | 1147 | if (!imap_valid) { |
| 1171 | /* | 1148 | /* |
| 1172 | * if we didn't have a valid mapping then we | 1149 | * If we didn't have a valid mapping then we |
| 1173 | * need to ensure that we put the new mapping | 1150 | * need to ensure that we put the new mapping |
| 1174 | * in a new ioend structure. This needs to be | 1151 | * in a new ioend structure. This needs to be |
| 1175 | * done to ensure that the ioends correctly | 1152 | * done to ensure that the ioends correctly |
| @@ -1177,14 +1154,7 @@ xfs_page_state_convert( | |||
| 1177 | * for unwritten extent conversion. | 1154 | * for unwritten extent conversion. |
| 1178 | */ | 1155 | */ |
| 1179 | new_ioend = 1; | 1156 | new_ioend = 1; |
| 1180 | if (type == IO_NEW) { | 1157 | err = xfs_map_blocks(inode, offset, len, |
| 1181 | size = xfs_probe_cluster(inode, | ||
| 1182 | page, bh, head, 0); | ||
| 1183 | } else { | ||
| 1184 | size = len; | ||
| 1185 | } | ||
| 1186 | |||
| 1187 | err = xfs_map_blocks(inode, offset, size, | ||
| 1188 | &imap, flags); | 1158 | &imap, flags); |
| 1189 | if (err) | 1159 | if (err) |
| 1190 | goto error; | 1160 | goto error; |
| @@ -1193,19 +1163,11 @@ xfs_page_state_convert( | |||
| 1193 | } | 1163 | } |
| 1194 | if (imap_valid) { | 1164 | if (imap_valid) { |
| 1195 | xfs_map_at_offset(inode, bh, &imap, offset); | 1165 | xfs_map_at_offset(inode, bh, &imap, offset); |
| 1196 | if (startio) { | 1166 | xfs_add_to_ioend(inode, bh, offset, type, |
| 1197 | xfs_add_to_ioend(inode, bh, offset, | 1167 | &ioend, new_ioend); |
| 1198 | type, &ioend, | ||
| 1199 | new_ioend); | ||
| 1200 | } else { | ||
| 1201 | set_buffer_dirty(bh); | ||
| 1202 | unlock_buffer(bh); | ||
| 1203 | mark_buffer_dirty(bh); | ||
| 1204 | } | ||
| 1205 | page_dirty--; | ||
| 1206 | count++; | 1168 | count++; |
| 1207 | } | 1169 | } |
| 1208 | } else if (buffer_uptodate(bh) && startio) { | 1170 | } else if (buffer_uptodate(bh)) { |
| 1209 | /* | 1171 | /* |
| 1210 | * we got here because the buffer is already mapped. | 1172 | * we got here because the buffer is already mapped. |
| 1211 | * That means it must already have extents allocated | 1173 | * That means it must already have extents allocated |
| @@ -1213,8 +1175,7 @@ xfs_page_state_convert( | |||
| 1213 | */ | 1175 | */ |
| 1214 | if (!imap_valid || flags != BMAPI_READ) { | 1176 | if (!imap_valid || flags != BMAPI_READ) { |
| 1215 | flags = BMAPI_READ; | 1177 | flags = BMAPI_READ; |
| 1216 | size = xfs_probe_cluster(inode, page, bh, | 1178 | size = xfs_probe_cluster(inode, page, bh, head); |
| 1217 | head, 1); | ||
| 1218 | err = xfs_map_blocks(inode, offset, size, | 1179 | err = xfs_map_blocks(inode, offset, size, |
| 1219 | &imap, flags); | 1180 | &imap, flags); |
| 1220 | if (err) | 1181 | if (err) |
| @@ -1233,18 +1194,16 @@ xfs_page_state_convert( | |||
| 1233 | */ | 1194 | */ |
| 1234 | type = IO_NEW; | 1195 | type = IO_NEW; |
| 1235 | if (trylock_buffer(bh)) { | 1196 | if (trylock_buffer(bh)) { |
| 1236 | ASSERT(buffer_mapped(bh)); | ||
| 1237 | if (imap_valid) | 1197 | if (imap_valid) |
| 1238 | all_bh = 1; | 1198 | all_bh = 1; |
| 1239 | xfs_add_to_ioend(inode, bh, offset, type, | 1199 | xfs_add_to_ioend(inode, bh, offset, type, |
| 1240 | &ioend, !imap_valid); | 1200 | &ioend, !imap_valid); |
| 1241 | page_dirty--; | ||
| 1242 | count++; | 1201 | count++; |
| 1243 | } else { | 1202 | } else { |
| 1244 | imap_valid = 0; | 1203 | imap_valid = 0; |
| 1245 | } | 1204 | } |
| 1246 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && | 1205 | } else if (PageUptodate(page)) { |
| 1247 | (unmapped || startio)) { | 1206 | ASSERT(buffer_mapped(bh)); |
| 1248 | imap_valid = 0; | 1207 | imap_valid = 0; |
| 1249 | } | 1208 | } |
| 1250 | 1209 | ||
| @@ -1256,8 +1215,7 @@ xfs_page_state_convert( | |||
| 1256 | if (uptodate && bh == head) | 1215 | if (uptodate && bh == head) |
| 1257 | SetPageUptodate(page); | 1216 | SetPageUptodate(page); |
| 1258 | 1217 | ||
| 1259 | if (startio) | 1218 | xfs_start_page_writeback(page, 1, count); |
| 1260 | xfs_start_page_writeback(page, 1, count); | ||
| 1261 | 1219 | ||
| 1262 | if (ioend && imap_valid) { | 1220 | if (ioend && imap_valid) { |
| 1263 | xfs_off_t end_index; | 1221 | xfs_off_t end_index; |
| @@ -1275,131 +1233,27 @@ xfs_page_state_convert( | |||
| 1275 | end_index = last_index; | 1233 | end_index = last_index; |
| 1276 | 1234 | ||
| 1277 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, | 1235 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, |
| 1278 | wbc, startio, all_bh, end_index); | 1236 | wbc, all_bh, end_index); |
| 1279 | } | 1237 | } |
| 1280 | 1238 | ||
| 1281 | if (iohead) | 1239 | if (iohead) |
| 1282 | xfs_submit_ioend(wbc, iohead); | 1240 | xfs_submit_ioend(wbc, iohead); |
| 1283 | 1241 | ||
| 1284 | return page_dirty; | 1242 | return 0; |
| 1285 | 1243 | ||
| 1286 | error: | 1244 | error: |
| 1287 | if (iohead) | 1245 | if (iohead) |
| 1288 | xfs_cancel_ioend(iohead); | 1246 | xfs_cancel_ioend(iohead); |
| 1289 | 1247 | ||
| 1290 | /* | 1248 | xfs_aops_discard_page(page); |
| 1291 | * If it's delalloc and we have nowhere to put it, | 1249 | ClearPageUptodate(page); |
| 1292 | * throw it away, unless the lower layers told | 1250 | unlock_page(page); |
| 1293 | * us to try again. | ||
| 1294 | */ | ||
| 1295 | if (err != -EAGAIN) { | ||
| 1296 | if (!unmapped) | ||
| 1297 | xfs_aops_discard_page(page); | ||
| 1298 | ClearPageUptodate(page); | ||
| 1299 | } | ||
| 1300 | return err; | 1251 | return err; |
| 1301 | } | ||
| 1302 | |||
| 1303 | /* | ||
| 1304 | * writepage: Called from one of two places: | ||
| 1305 | * | ||
| 1306 | * 1. we are flushing a delalloc buffer head. | ||
| 1307 | * | ||
| 1308 | * 2. we are writing out a dirty page. Typically the page dirty | ||
| 1309 | * state is cleared before we get here. In this case is it | ||
| 1310 | * conceivable we have no buffer heads. | ||
| 1311 | * | ||
| 1312 | * For delalloc space on the page we need to allocate space and | ||
| 1313 | * flush it. For unmapped buffer heads on the page we should | ||
| 1314 | * allocate space if the page is uptodate. For any other dirty | ||
| 1315 | * buffer heads on the page we should flush them. | ||
| 1316 | * | ||
| 1317 | * If we detect that a transaction would be required to flush | ||
| 1318 | * the page, we have to check the process flags first, if we | ||
| 1319 | * are already in a transaction or disk I/O during allocations | ||
| 1320 | * is off, we need to fail the writepage and redirty the page. | ||
| 1321 | */ | ||
| 1322 | |||
| 1323 | STATIC int | ||
| 1324 | xfs_vm_writepage( | ||
| 1325 | struct page *page, | ||
| 1326 | struct writeback_control *wbc) | ||
| 1327 | { | ||
| 1328 | int error; | ||
| 1329 | int need_trans; | ||
| 1330 | int delalloc, unmapped, unwritten; | ||
| 1331 | struct inode *inode = page->mapping->host; | ||
| 1332 | |||
| 1333 | trace_xfs_writepage(inode, page, 0); | ||
| 1334 | |||
| 1335 | /* | ||
| 1336 | * Refuse to write the page out if we are called from reclaim context. | ||
| 1337 | * | ||
| 1338 | * This is primarily to avoid stack overflows when called from deep | ||
| 1339 | * used stacks in random callers for direct reclaim, but disabling | ||
| 1340 | * reclaim for kswap is a nice side-effect as kswapd causes rather | ||
| 1341 | * suboptimal I/O patters, too. | ||
| 1342 | * | ||
| 1343 | * This should really be done by the core VM, but until that happens | ||
| 1344 | * filesystems like XFS, btrfs and ext4 have to take care of this | ||
| 1345 | * by themselves. | ||
| 1346 | */ | ||
| 1347 | if (current->flags & PF_MEMALLOC) | ||
| 1348 | goto out_fail; | ||
| 1349 | |||
| 1350 | /* | ||
| 1351 | * We need a transaction if: | ||
| 1352 | * 1. There are delalloc buffers on the page | ||
| 1353 | * 2. The page is uptodate and we have unmapped buffers | ||
| 1354 | * 3. The page is uptodate and we have no buffers | ||
| 1355 | * 4. There are unwritten buffers on the page | ||
| 1356 | */ | ||
| 1357 | |||
| 1358 | if (!page_has_buffers(page)) { | ||
| 1359 | unmapped = 1; | ||
| 1360 | need_trans = 1; | ||
| 1361 | } else { | ||
| 1362 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | ||
| 1363 | if (!PageUptodate(page)) | ||
| 1364 | unmapped = 0; | ||
| 1365 | need_trans = delalloc + unmapped + unwritten; | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | /* | ||
| 1369 | * If we need a transaction and the process flags say | ||
| 1370 | * we are already in a transaction, or no IO is allowed | ||
| 1371 | * then mark the page dirty again and leave the page | ||
| 1372 | * as is. | ||
| 1373 | */ | ||
| 1374 | if (current_test_flags(PF_FSTRANS) && need_trans) | ||
| 1375 | goto out_fail; | ||
| 1376 | |||
| 1377 | /* | ||
| 1378 | * Delay hooking up buffer heads until we have | ||
| 1379 | * made our go/no-go decision. | ||
| 1380 | */ | ||
| 1381 | if (!page_has_buffers(page)) | ||
| 1382 | create_empty_buffers(page, 1 << inode->i_blkbits, 0); | ||
| 1383 | |||
| 1384 | /* | ||
| 1385 | * Convert delayed allocate, unwritten or unmapped space | ||
| 1386 | * to real space and flush out to disk. | ||
| 1387 | */ | ||
| 1388 | error = xfs_page_state_convert(inode, page, wbc, 1, unmapped); | ||
| 1389 | if (error == -EAGAIN) | ||
| 1390 | goto out_fail; | ||
| 1391 | if (unlikely(error < 0)) | ||
| 1392 | goto out_unlock; | ||
| 1393 | |||
| 1394 | return 0; | ||
| 1395 | 1252 | ||
| 1396 | out_fail: | 1253 | out_fail: |
| 1397 | redirty_page_for_writepage(wbc, page); | 1254 | redirty_page_for_writepage(wbc, page); |
| 1398 | unlock_page(page); | 1255 | unlock_page(page); |
| 1399 | return 0; | 1256 | return 0; |
| 1400 | out_unlock: | ||
| 1401 | unlock_page(page); | ||
| 1402 | return error; | ||
| 1403 | } | 1257 | } |
| 1404 | 1258 | ||
| 1405 | STATIC int | 1259 | STATIC int |
| @@ -1413,65 +1267,27 @@ xfs_vm_writepages( | |||
| 1413 | 1267 | ||
| 1414 | /* | 1268 | /* |
| 1415 | * Called to move a page into cleanable state - and from there | 1269 | * Called to move a page into cleanable state - and from there |
| 1416 | * to be released. Possibly the page is already clean. We always | 1270 | * to be released. The page should already be clean. We always |
| 1417 | * have buffer heads in this call. | 1271 | * have buffer heads in this call. |
| 1418 | * | 1272 | * |
| 1419 | * Returns 0 if the page is ok to release, 1 otherwise. | 1273 | * Returns 1 if the page is ok to release, 0 otherwise. |
| 1420 | * | ||
| 1421 | * Possible scenarios are: | ||
| 1422 | * | ||
| 1423 | * 1. We are being called to release a page which has been written | ||
| 1424 | * to via regular I/O. buffer heads will be dirty and possibly | ||
| 1425 | * delalloc. If no delalloc buffer heads in this case then we | ||
| 1426 | * can just return zero. | ||
| 1427 | * | ||
| 1428 | * 2. We are called to release a page which has been written via | ||
| 1429 | * mmap, all we need to do is ensure there is no delalloc | ||
| 1430 | * state in the buffer heads, if not we can let the caller | ||
| 1431 | * free them and we should come back later via writepage. | ||
| 1432 | */ | 1274 | */ |
| 1433 | STATIC int | 1275 | STATIC int |
| 1434 | xfs_vm_releasepage( | 1276 | xfs_vm_releasepage( |
| 1435 | struct page *page, | 1277 | struct page *page, |
| 1436 | gfp_t gfp_mask) | 1278 | gfp_t gfp_mask) |
| 1437 | { | 1279 | { |
| 1438 | struct inode *inode = page->mapping->host; | 1280 | int delalloc, unwritten; |
| 1439 | int dirty, delalloc, unmapped, unwritten; | ||
| 1440 | struct writeback_control wbc = { | ||
| 1441 | .sync_mode = WB_SYNC_ALL, | ||
| 1442 | .nr_to_write = 1, | ||
| 1443 | }; | ||
| 1444 | 1281 | ||
| 1445 | trace_xfs_releasepage(inode, page, 0); | 1282 | trace_xfs_releasepage(page->mapping->host, page, 0); |
| 1446 | |||
| 1447 | if (!page_has_buffers(page)) | ||
| 1448 | return 0; | ||
| 1449 | 1283 | ||
| 1450 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | 1284 | xfs_count_page_state(page, &delalloc, &unwritten); |
| 1451 | if (!delalloc && !unwritten) | ||
| 1452 | goto free_buffers; | ||
| 1453 | 1285 | ||
| 1454 | if (!(gfp_mask & __GFP_FS)) | 1286 | if (WARN_ON(delalloc)) |
| 1455 | return 0; | 1287 | return 0; |
| 1456 | 1288 | if (WARN_ON(unwritten)) | |
| 1457 | /* If we are already inside a transaction or the thread cannot | ||
| 1458 | * do I/O, we cannot release this page. | ||
| 1459 | */ | ||
| 1460 | if (current_test_flags(PF_FSTRANS)) | ||
| 1461 | return 0; | 1289 | return 0; |
| 1462 | 1290 | ||
| 1463 | /* | ||
| 1464 | * Convert delalloc space to real space, do not flush the | ||
| 1465 | * data out to disk, that will be done by the caller. | ||
| 1466 | * Never need to allocate space here - we will always | ||
| 1467 | * come back to writepage in that case. | ||
| 1468 | */ | ||
| 1469 | dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0); | ||
| 1470 | if (dirty == 0 && !unwritten) | ||
| 1471 | goto free_buffers; | ||
| 1472 | return 0; | ||
| 1473 | |||
| 1474 | free_buffers: | ||
| 1475 | return try_to_free_buffers(page); | 1291 | return try_to_free_buffers(page); |
| 1476 | } | 1292 | } |
| 1477 | 1293 | ||
| @@ -1481,9 +1297,9 @@ __xfs_get_blocks( | |||
| 1481 | sector_t iblock, | 1297 | sector_t iblock, |
| 1482 | struct buffer_head *bh_result, | 1298 | struct buffer_head *bh_result, |
| 1483 | int create, | 1299 | int create, |
| 1484 | int direct, | 1300 | int direct) |
| 1485 | bmapi_flags_t flags) | ||
| 1486 | { | 1301 | { |
| 1302 | int flags = create ? BMAPI_WRITE : BMAPI_READ; | ||
| 1487 | struct xfs_bmbt_irec imap; | 1303 | struct xfs_bmbt_irec imap; |
| 1488 | xfs_off_t offset; | 1304 | xfs_off_t offset; |
| 1489 | ssize_t size; | 1305 | ssize_t size; |
| @@ -1498,8 +1314,11 @@ __xfs_get_blocks( | |||
| 1498 | if (!create && direct && offset >= i_size_read(inode)) | 1314 | if (!create && direct && offset >= i_size_read(inode)) |
| 1499 | return 0; | 1315 | return 0; |
| 1500 | 1316 | ||
| 1501 | error = xfs_iomap(XFS_I(inode), offset, size, | 1317 | if (direct && create) |
| 1502 | create ? flags : BMAPI_READ, &imap, &nimap, &new); | 1318 | flags |= BMAPI_DIRECT; |
| 1319 | |||
| 1320 | error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, | ||
| 1321 | &new); | ||
| 1503 | if (error) | 1322 | if (error) |
| 1504 | return -error; | 1323 | return -error; |
| 1505 | if (nimap == 0) | 1324 | if (nimap == 0) |
| @@ -1579,8 +1398,7 @@ xfs_get_blocks( | |||
| 1579 | struct buffer_head *bh_result, | 1398 | struct buffer_head *bh_result, |
| 1580 | int create) | 1399 | int create) |
| 1581 | { | 1400 | { |
| 1582 | return __xfs_get_blocks(inode, iblock, | 1401 | return __xfs_get_blocks(inode, iblock, bh_result, create, 0); |
| 1583 | bh_result, create, 0, BMAPI_WRITE); | ||
| 1584 | } | 1402 | } |
| 1585 | 1403 | ||
| 1586 | STATIC int | 1404 | STATIC int |
| @@ -1590,61 +1408,59 @@ xfs_get_blocks_direct( | |||
| 1590 | struct buffer_head *bh_result, | 1408 | struct buffer_head *bh_result, |
| 1591 | int create) | 1409 | int create) |
| 1592 | { | 1410 | { |
| 1593 | return __xfs_get_blocks(inode, iblock, | 1411 | return __xfs_get_blocks(inode, iblock, bh_result, create, 1); |
| 1594 | bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT); | ||
| 1595 | } | 1412 | } |
| 1596 | 1413 | ||
| 1414 | /* | ||
| 1415 | * Complete a direct I/O write request. | ||
| 1416 | * | ||
| 1417 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | ||
| 1418 | * need to issue a transaction to convert the range from unwritten to written | ||
| 1419 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | ||
| 1420 | * to do this and we are done. But in case this was a successfull AIO | ||
| 1421 | * request this handler is called from interrupt context, from which we | ||
| 1422 | * can't start transactions. In that case offload the I/O completion to | ||
| 1423 | * the workqueues we also use for buffered I/O completion. | ||
| 1424 | */ | ||
| 1597 | STATIC void | 1425 | STATIC void |
| 1598 | xfs_end_io_direct( | 1426 | xfs_end_io_direct_write( |
| 1599 | struct kiocb *iocb, | 1427 | struct kiocb *iocb, |
| 1600 | loff_t offset, | 1428 | loff_t offset, |
| 1601 | ssize_t size, | 1429 | ssize_t size, |
| 1602 | void *private) | 1430 | void *private, |
| 1431 | int ret, | ||
| 1432 | bool is_async) | ||
| 1603 | { | 1433 | { |
| 1604 | xfs_ioend_t *ioend = iocb->private; | 1434 | struct xfs_ioend *ioend = iocb->private; |
| 1605 | 1435 | ||
| 1606 | /* | 1436 | /* |
| 1607 | * Non-NULL private data means we need to issue a transaction to | 1437 | * blockdev_direct_IO can return an error even after the I/O |
| 1608 | * convert a range from unwritten to written extents. This needs | 1438 | * completion handler was called. Thus we need to protect |
| 1609 | * to happen from process context but aio+dio I/O completion | 1439 | * against double-freeing. |
| 1610 | * happens from irq context so we need to defer it to a workqueue. | ||
| 1611 | * This is not necessary for synchronous direct I/O, but we do | ||
| 1612 | * it anyway to keep the code uniform and simpler. | ||
| 1613 | * | ||
| 1614 | * Well, if only it were that simple. Because synchronous direct I/O | ||
| 1615 | * requires extent conversion to occur *before* we return to userspace, | ||
| 1616 | * we have to wait for extent conversion to complete. Look at the | ||
| 1617 | * iocb that has been passed to us to determine if this is AIO or | ||
| 1618 | * not. If it is synchronous, tell xfs_finish_ioend() to kick the | ||
| 1619 | * workqueue and wait for it to complete. | ||
| 1620 | * | ||
| 1621 | * The core direct I/O code might be changed to always call the | ||
| 1622 | * completion handler in the future, in which case all this can | ||
| 1623 | * go away. | ||
| 1624 | */ | 1440 | */ |
| 1441 | iocb->private = NULL; | ||
| 1442 | |||
| 1625 | ioend->io_offset = offset; | 1443 | ioend->io_offset = offset; |
| 1626 | ioend->io_size = size; | 1444 | ioend->io_size = size; |
| 1627 | if (ioend->io_type == IO_READ) { | 1445 | if (private && size > 0) |
| 1628 | xfs_finish_ioend(ioend, 0); | 1446 | ioend->io_type = IO_UNWRITTEN; |
| 1629 | } else if (private && size > 0) { | 1447 | |
| 1630 | xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); | 1448 | if (is_async) { |
| 1631 | } else { | ||
| 1632 | /* | 1449 | /* |
| 1633 | * A direct I/O write ioend starts it's life in unwritten | 1450 | * If we are converting an unwritten extent we need to delay |
| 1634 | * state in case they map an unwritten extent. This write | 1451 | * the AIO completion until after the unwrittent extent |
| 1635 | * didn't map an unwritten extent so switch it's completion | 1452 | * conversion has completed, otherwise do it ASAP. |
| 1636 | * handler. | ||
| 1637 | */ | 1453 | */ |
| 1638 | ioend->io_type = IO_NEW; | 1454 | if (ioend->io_type == IO_UNWRITTEN) { |
| 1639 | xfs_finish_ioend(ioend, 0); | 1455 | ioend->io_iocb = iocb; |
| 1456 | ioend->io_result = ret; | ||
| 1457 | } else { | ||
| 1458 | aio_complete(iocb, ret, 0); | ||
| 1459 | } | ||
| 1460 | xfs_finish_ioend(ioend); | ||
| 1461 | } else { | ||
| 1462 | xfs_finish_ioend_sync(ioend); | ||
| 1640 | } | 1463 | } |
| 1641 | |||
| 1642 | /* | ||
| 1643 | * blockdev_direct_IO can return an error even after the I/O | ||
| 1644 | * completion handler was called. Thus we need to protect | ||
| 1645 | * against double-freeing. | ||
| 1646 | */ | ||
| 1647 | iocb->private = NULL; | ||
| 1648 | } | 1464 | } |
| 1649 | 1465 | ||
| 1650 | STATIC ssize_t | 1466 | STATIC ssize_t |
| @@ -1655,23 +1471,26 @@ xfs_vm_direct_IO( | |||
| 1655 | loff_t offset, | 1471 | loff_t offset, |
| 1656 | unsigned long nr_segs) | 1472 | unsigned long nr_segs) |
| 1657 | { | 1473 | { |
| 1658 | struct file *file = iocb->ki_filp; | 1474 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
| 1659 | struct inode *inode = file->f_mapping->host; | 1475 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); |
| 1660 | struct block_device *bdev; | 1476 | ssize_t ret; |
| 1661 | ssize_t ret; | 1477 | |
| 1662 | 1478 | if (rw & WRITE) { | |
| 1663 | bdev = xfs_find_bdev_for_inode(inode); | 1479 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); |
| 1664 | 1480 | ||
| 1665 | iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? | 1481 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, |
| 1666 | IO_UNWRITTEN : IO_READ); | 1482 | offset, nr_segs, |
| 1667 | 1483 | xfs_get_blocks_direct, | |
| 1668 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | 1484 | xfs_end_io_direct_write); |
| 1669 | offset, nr_segs, | 1485 | if (ret != -EIOCBQUEUED && iocb->private) |
| 1670 | xfs_get_blocks_direct, | 1486 | xfs_destroy_ioend(iocb->private); |
| 1671 | xfs_end_io_direct); | 1487 | } else { |
| 1488 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | ||
| 1489 | offset, nr_segs, | ||
| 1490 | xfs_get_blocks_direct, | ||
| 1491 | NULL); | ||
| 1492 | } | ||
| 1672 | 1493 | ||
| 1673 | if (unlikely(ret != -EIOCBQUEUED && iocb->private)) | ||
| 1674 | xfs_destroy_ioend(iocb->private); | ||
| 1675 | return ret; | 1494 | return ret; |
| 1676 | } | 1495 | } |
| 1677 | 1496 | ||
| @@ -1686,8 +1505,8 @@ xfs_vm_write_begin( | |||
| 1686 | void **fsdata) | 1505 | void **fsdata) |
| 1687 | { | 1506 | { |
| 1688 | *pagep = NULL; | 1507 | *pagep = NULL; |
| 1689 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1508 | return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS, |
| 1690 | xfs_get_blocks); | 1509 | pagep, fsdata, xfs_get_blocks); |
| 1691 | } | 1510 | } |
| 1692 | 1511 | ||
| 1693 | STATIC sector_t | 1512 | STATIC sector_t |
| @@ -1698,7 +1517,7 @@ xfs_vm_bmap( | |||
| 1698 | struct inode *inode = (struct inode *)mapping->host; | 1517 | struct inode *inode = (struct inode *)mapping->host; |
| 1699 | struct xfs_inode *ip = XFS_I(inode); | 1518 | struct xfs_inode *ip = XFS_I(inode); |
| 1700 | 1519 | ||
| 1701 | xfs_itrace_entry(XFS_I(inode)); | 1520 | trace_xfs_vm_bmap(XFS_I(inode)); |
| 1702 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 1521 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
| 1703 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); | 1522 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); |
| 1704 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 1523 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
