diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_aops.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 611 |
1 files changed, 215 insertions, 396 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 34640d6dbdc..d24e78f32f3 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -21,19 +21,12 @@ | |||
21 | #include "xfs_inum.h" | 21 | #include "xfs_inum.h" |
22 | #include "xfs_sb.h" | 22 | #include "xfs_sb.h" |
23 | #include "xfs_ag.h" | 23 | #include "xfs_ag.h" |
24 | #include "xfs_dir2.h" | ||
25 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
26 | #include "xfs_dmapi.h" | ||
27 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
28 | #include "xfs_bmap_btree.h" | 26 | #include "xfs_bmap_btree.h" |
29 | #include "xfs_alloc_btree.h" | ||
30 | #include "xfs_ialloc_btree.h" | ||
31 | #include "xfs_dir2_sf.h" | ||
32 | #include "xfs_attr_sf.h" | ||
33 | #include "xfs_dinode.h" | 27 | #include "xfs_dinode.h" |
34 | #include "xfs_inode.h" | 28 | #include "xfs_inode.h" |
35 | #include "xfs_alloc.h" | 29 | #include "xfs_alloc.h" |
36 | #include "xfs_btree.h" | ||
37 | #include "xfs_error.h" | 30 | #include "xfs_error.h" |
38 | #include "xfs_rw.h" | 31 | #include "xfs_rw.h" |
39 | #include "xfs_iomap.h" | 32 | #include "xfs_iomap.h" |
@@ -92,18 +85,15 @@ void | |||
92 | xfs_count_page_state( | 85 | xfs_count_page_state( |
93 | struct page *page, | 86 | struct page *page, |
94 | int *delalloc, | 87 | int *delalloc, |
95 | int *unmapped, | ||
96 | int *unwritten) | 88 | int *unwritten) |
97 | { | 89 | { |
98 | struct buffer_head *bh, *head; | 90 | struct buffer_head *bh, *head; |
99 | 91 | ||
100 | *delalloc = *unmapped = *unwritten = 0; | 92 | *delalloc = *unwritten = 0; |
101 | 93 | ||
102 | bh = head = page_buffers(page); | 94 | bh = head = page_buffers(page); |
103 | do { | 95 | do { |
104 | if (buffer_uptodate(bh) && !buffer_mapped(bh)) | 96 | if (buffer_unwritten(bh)) |
105 | (*unmapped) = 1; | ||
106 | else if (buffer_unwritten(bh)) | ||
107 | (*unwritten) = 1; | 97 | (*unwritten) = 1; |
108 | else if (buffer_delay(bh)) | 98 | else if (buffer_delay(bh)) |
109 | (*delalloc) = 1; | 99 | (*delalloc) = 1; |
@@ -212,23 +202,17 @@ xfs_setfilesize( | |||
212 | } | 202 | } |
213 | 203 | ||
214 | /* | 204 | /* |
215 | * Schedule IO completion handling on a xfsdatad if this was | 205 | * Schedule IO completion handling on the final put of an ioend. |
216 | * the final hold on this ioend. If we are asked to wait, | ||
217 | * flush the workqueue. | ||
218 | */ | 206 | */ |
219 | STATIC void | 207 | STATIC void |
220 | xfs_finish_ioend( | 208 | xfs_finish_ioend( |
221 | xfs_ioend_t *ioend, | 209 | struct xfs_ioend *ioend) |
222 | int wait) | ||
223 | { | 210 | { |
224 | if (atomic_dec_and_test(&ioend->io_remaining)) { | 211 | if (atomic_dec_and_test(&ioend->io_remaining)) { |
225 | struct workqueue_struct *wq; | 212 | if (ioend->io_type == IO_UNWRITTEN) |
226 | 213 | queue_work(xfsconvertd_workqueue, &ioend->io_work); | |
227 | wq = (ioend->io_type == IO_UNWRITTEN) ? | 214 | else |
228 | xfsconvertd_workqueue : xfsdatad_workqueue; | 215 | queue_work(xfsdatad_workqueue, &ioend->io_work); |
229 | queue_work(wq, &ioend->io_work); | ||
230 | if (wait) | ||
231 | flush_workqueue(wq); | ||
232 | } | 216 | } |
233 | } | 217 | } |
234 | 218 | ||
@@ -272,11 +256,25 @@ xfs_end_io( | |||
272 | */ | 256 | */ |
273 | if (error == EAGAIN) { | 257 | if (error == EAGAIN) { |
274 | atomic_inc(&ioend->io_remaining); | 258 | atomic_inc(&ioend->io_remaining); |
275 | xfs_finish_ioend(ioend, 0); | 259 | xfs_finish_ioend(ioend); |
276 | /* ensure we don't spin on blocked ioends */ | 260 | /* ensure we don't spin on blocked ioends */ |
277 | delay(1); | 261 | delay(1); |
278 | } else | 262 | } else { |
263 | if (ioend->io_iocb) | ||
264 | aio_complete(ioend->io_iocb, ioend->io_result, 0); | ||
279 | xfs_destroy_ioend(ioend); | 265 | xfs_destroy_ioend(ioend); |
266 | } | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Call IO completion handling in caller context on the final put of an ioend. | ||
271 | */ | ||
272 | STATIC void | ||
273 | xfs_finish_ioend_sync( | ||
274 | struct xfs_ioend *ioend) | ||
275 | { | ||
276 | if (atomic_dec_and_test(&ioend->io_remaining)) | ||
277 | xfs_end_io(&ioend->io_work); | ||
280 | } | 278 | } |
281 | 279 | ||
282 | /* | 280 | /* |
@@ -309,6 +307,8 @@ xfs_alloc_ioend( | |||
309 | atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); | 307 | atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); |
310 | ioend->io_offset = 0; | 308 | ioend->io_offset = 0; |
311 | ioend->io_size = 0; | 309 | ioend->io_size = 0; |
310 | ioend->io_iocb = NULL; | ||
311 | ioend->io_result = 0; | ||
312 | 312 | ||
313 | INIT_WORK(&ioend->io_work, xfs_end_io); | 313 | INIT_WORK(&ioend->io_work, xfs_end_io); |
314 | return ioend; | 314 | return ioend; |
@@ -358,7 +358,7 @@ xfs_end_bio( | |||
358 | bio->bi_end_io = NULL; | 358 | bio->bi_end_io = NULL; |
359 | bio_put(bio); | 359 | bio_put(bio); |
360 | 360 | ||
361 | xfs_finish_ioend(ioend, 0); | 361 | xfs_finish_ioend(ioend); |
362 | } | 362 | } |
363 | 363 | ||
364 | STATIC void | 364 | STATIC void |
@@ -500,7 +500,7 @@ xfs_submit_ioend( | |||
500 | } | 500 | } |
501 | if (bio) | 501 | if (bio) |
502 | xfs_submit_ioend_bio(wbc, ioend, bio); | 502 | xfs_submit_ioend_bio(wbc, ioend, bio); |
503 | xfs_finish_ioend(ioend, 0); | 503 | xfs_finish_ioend(ioend); |
504 | } while ((ioend = next) != NULL); | 504 | } while ((ioend = next) != NULL); |
505 | } | 505 | } |
506 | 506 | ||
@@ -614,31 +614,30 @@ xfs_map_at_offset( | |||
614 | STATIC unsigned int | 614 | STATIC unsigned int |
615 | xfs_probe_page( | 615 | xfs_probe_page( |
616 | struct page *page, | 616 | struct page *page, |
617 | unsigned int pg_offset, | 617 | unsigned int pg_offset) |
618 | int mapped) | ||
619 | { | 618 | { |
619 | struct buffer_head *bh, *head; | ||
620 | int ret = 0; | 620 | int ret = 0; |
621 | 621 | ||
622 | if (PageWriteback(page)) | 622 | if (PageWriteback(page)) |
623 | return 0; | 623 | return 0; |
624 | if (!PageDirty(page)) | ||
625 | return 0; | ||
626 | if (!page->mapping) | ||
627 | return 0; | ||
628 | if (!page_has_buffers(page)) | ||
629 | return 0; | ||
624 | 630 | ||
625 | if (page->mapping && PageDirty(page)) { | 631 | bh = head = page_buffers(page); |
626 | if (page_has_buffers(page)) { | 632 | do { |
627 | struct buffer_head *bh, *head; | 633 | if (!buffer_uptodate(bh)) |
628 | 634 | break; | |
629 | bh = head = page_buffers(page); | 635 | if (!buffer_mapped(bh)) |
630 | do { | 636 | break; |
631 | if (!buffer_uptodate(bh)) | 637 | ret += bh->b_size; |
632 | break; | 638 | if (ret >= pg_offset) |
633 | if (mapped != buffer_mapped(bh)) | 639 | break; |
634 | break; | 640 | } while ((bh = bh->b_this_page) != head); |
635 | ret += bh->b_size; | ||
636 | if (ret >= pg_offset) | ||
637 | break; | ||
638 | } while ((bh = bh->b_this_page) != head); | ||
639 | } else | ||
640 | ret = mapped ? 0 : PAGE_CACHE_SIZE; | ||
641 | } | ||
642 | 641 | ||
643 | return ret; | 642 | return ret; |
644 | } | 643 | } |
@@ -648,8 +647,7 @@ xfs_probe_cluster( | |||
648 | struct inode *inode, | 647 | struct inode *inode, |
649 | struct page *startpage, | 648 | struct page *startpage, |
650 | struct buffer_head *bh, | 649 | struct buffer_head *bh, |
651 | struct buffer_head *head, | 650 | struct buffer_head *head) |
652 | int mapped) | ||
653 | { | 651 | { |
654 | struct pagevec pvec; | 652 | struct pagevec pvec; |
655 | pgoff_t tindex, tlast, tloff; | 653 | pgoff_t tindex, tlast, tloff; |
@@ -658,7 +656,7 @@ xfs_probe_cluster( | |||
658 | 656 | ||
659 | /* First sum forwards in this page */ | 657 | /* First sum forwards in this page */ |
660 | do { | 658 | do { |
661 | if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh))) | 659 | if (!buffer_uptodate(bh) || !buffer_mapped(bh)) |
662 | return total; | 660 | return total; |
663 | total += bh->b_size; | 661 | total += bh->b_size; |
664 | } while ((bh = bh->b_this_page) != head); | 662 | } while ((bh = bh->b_this_page) != head); |
@@ -692,7 +690,7 @@ xfs_probe_cluster( | |||
692 | pg_offset = PAGE_CACHE_SIZE; | 690 | pg_offset = PAGE_CACHE_SIZE; |
693 | 691 | ||
694 | if (page->index == tindex && trylock_page(page)) { | 692 | if (page->index == tindex && trylock_page(page)) { |
695 | pg_len = xfs_probe_page(page, pg_offset, mapped); | 693 | pg_len = xfs_probe_page(page, pg_offset); |
696 | unlock_page(page); | 694 | unlock_page(page); |
697 | } | 695 | } |
698 | 696 | ||
@@ -761,7 +759,6 @@ xfs_convert_page( | |||
761 | struct xfs_bmbt_irec *imap, | 759 | struct xfs_bmbt_irec *imap, |
762 | xfs_ioend_t **ioendp, | 760 | xfs_ioend_t **ioendp, |
763 | struct writeback_control *wbc, | 761 | struct writeback_control *wbc, |
764 | int startio, | ||
765 | int all_bh) | 762 | int all_bh) |
766 | { | 763 | { |
767 | struct buffer_head *bh, *head; | 764 | struct buffer_head *bh, *head; |
@@ -832,19 +829,14 @@ xfs_convert_page( | |||
832 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); | 829 | ASSERT(imap->br_startblock != DELAYSTARTBLOCK); |
833 | 830 | ||
834 | xfs_map_at_offset(inode, bh, imap, offset); | 831 | xfs_map_at_offset(inode, bh, imap, offset); |
835 | if (startio) { | 832 | xfs_add_to_ioend(inode, bh, offset, type, |
836 | xfs_add_to_ioend(inode, bh, offset, | 833 | ioendp, done); |
837 | type, ioendp, done); | 834 | |
838 | } else { | ||
839 | set_buffer_dirty(bh); | ||
840 | unlock_buffer(bh); | ||
841 | mark_buffer_dirty(bh); | ||
842 | } | ||
843 | page_dirty--; | 835 | page_dirty--; |
844 | count++; | 836 | count++; |
845 | } else { | 837 | } else { |
846 | type = IO_NEW; | 838 | type = IO_NEW; |
847 | if (buffer_mapped(bh) && all_bh && startio) { | 839 | if (buffer_mapped(bh) && all_bh) { |
848 | lock_buffer(bh); | 840 | lock_buffer(bh); |
849 | xfs_add_to_ioend(inode, bh, offset, | 841 | xfs_add_to_ioend(inode, bh, offset, |
850 | type, ioendp, done); | 842 | type, ioendp, done); |
@@ -859,14 +851,12 @@ xfs_convert_page( | |||
859 | if (uptodate && bh == head) | 851 | if (uptodate && bh == head) |
860 | SetPageUptodate(page); | 852 | SetPageUptodate(page); |
861 | 853 | ||
862 | if (startio) { | 854 | if (count) { |
863 | if (count) { | 855 | wbc->nr_to_write--; |
864 | wbc->nr_to_write--; | 856 | if (wbc->nr_to_write <= 0) |
865 | if (wbc->nr_to_write <= 0) | 857 | done = 1; |
866 | done = 1; | ||
867 | } | ||
868 | xfs_start_page_writeback(page, !page_dirty, count); | ||
869 | } | 858 | } |
859 | xfs_start_page_writeback(page, !page_dirty, count); | ||
870 | 860 | ||
871 | return done; | 861 | return done; |
872 | fail_unlock_page: | 862 | fail_unlock_page: |
@@ -886,7 +876,6 @@ xfs_cluster_write( | |||
886 | struct xfs_bmbt_irec *imap, | 876 | struct xfs_bmbt_irec *imap, |
887 | xfs_ioend_t **ioendp, | 877 | xfs_ioend_t **ioendp, |
888 | struct writeback_control *wbc, | 878 | struct writeback_control *wbc, |
889 | int startio, | ||
890 | int all_bh, | 879 | int all_bh, |
891 | pgoff_t tlast) | 880 | pgoff_t tlast) |
892 | { | 881 | { |
@@ -902,7 +891,7 @@ xfs_cluster_write( | |||
902 | 891 | ||
903 | for (i = 0; i < pagevec_count(&pvec); i++) { | 892 | for (i = 0; i < pagevec_count(&pvec); i++) { |
904 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, | 893 | done = xfs_convert_page(inode, pvec.pages[i], tindex++, |
905 | imap, ioendp, wbc, startio, all_bh); | 894 | imap, ioendp, wbc, all_bh); |
906 | if (done) | 895 | if (done) |
907 | break; | 896 | break; |
908 | } | 897 | } |
@@ -981,7 +970,7 @@ xfs_aops_discard_page( | |||
981 | */ | 970 | */ |
982 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, | 971 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, |
983 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | 972 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, |
984 | &nimaps, NULL, NULL); | 973 | &nimaps, NULL); |
985 | 974 | ||
986 | if (error) { | 975 | if (error) { |
987 | /* something screwed, just bail */ | 976 | /* something screwed, just bail */ |
@@ -1009,7 +998,7 @@ xfs_aops_discard_page( | |||
1009 | */ | 998 | */ |
1010 | xfs_bmap_init(&flist, &firstblock); | 999 | xfs_bmap_init(&flist, &firstblock); |
1011 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, | 1000 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, |
1012 | &flist, NULL, &done); | 1001 | &flist, &done); |
1013 | 1002 | ||
1014 | ASSERT(!flist.xbf_count && !flist.xbf_first); | 1003 | ASSERT(!flist.xbf_count && !flist.xbf_first); |
1015 | if (error) { | 1004 | if (error) { |
@@ -1032,50 +1021,66 @@ out_invalidate: | |||
1032 | } | 1021 | } |
1033 | 1022 | ||
1034 | /* | 1023 | /* |
1035 | * Calling this without startio set means we are being asked to make a dirty | 1024 | * Write out a dirty page. |
1036 | * page ready for freeing it's buffers. When called with startio set then | 1025 | * |
1037 | * we are coming from writepage. | 1026 | * For delalloc space on the page we need to allocate space and flush it. |
1027 | * For unwritten space on the page we need to start the conversion to | ||
1028 | * regular allocated space. | ||
1029 | * For any other dirty buffer heads on the page we should flush them. | ||
1038 | * | 1030 | * |
1039 | * When called with startio set it is important that we write the WHOLE | 1031 | * If we detect that a transaction would be required to flush the page, we |
1040 | * page if possible. | 1032 | * have to check the process flags first, if we are already in a transaction |
1041 | * The bh->b_state's cannot know if any of the blocks or which block for | 1033 | * or disk I/O during allocations is off, we need to fail the writepage and |
1042 | * that matter are dirty due to mmap writes, and therefore bh uptodate is | 1034 | * redirty the page. |
1043 | * only valid if the page itself isn't completely uptodate. Some layers | ||
1044 | * may clear the page dirty flag prior to calling write page, under the | ||
1045 | * assumption the entire page will be written out; by not writing out the | ||
1046 | * whole page the page can be reused before all valid dirty data is | ||
1047 | * written out. Note: in the case of a page that has been dirty'd by | ||
1048 | * mapwrite and but partially setup by block_prepare_write the | ||
1049 | * bh->b_states's will not agree and only ones setup by BPW/BCW will have | ||
1050 | * valid state, thus the whole page must be written out thing. | ||
1051 | */ | 1035 | */ |
1052 | |||
1053 | STATIC int | 1036 | STATIC int |
1054 | xfs_page_state_convert( | 1037 | xfs_vm_writepage( |
1055 | struct inode *inode, | 1038 | struct page *page, |
1056 | struct page *page, | 1039 | struct writeback_control *wbc) |
1057 | struct writeback_control *wbc, | ||
1058 | int startio, | ||
1059 | int unmapped) /* also implies page uptodate */ | ||
1060 | { | 1040 | { |
1041 | struct inode *inode = page->mapping->host; | ||
1042 | int delalloc, unwritten; | ||
1061 | struct buffer_head *bh, *head; | 1043 | struct buffer_head *bh, *head; |
1062 | struct xfs_bmbt_irec imap; | 1044 | struct xfs_bmbt_irec imap; |
1063 | xfs_ioend_t *ioend = NULL, *iohead = NULL; | 1045 | xfs_ioend_t *ioend = NULL, *iohead = NULL; |
1064 | loff_t offset; | 1046 | loff_t offset; |
1065 | unsigned long p_offset = 0; | ||
1066 | unsigned int type; | 1047 | unsigned int type; |
1067 | __uint64_t end_offset; | 1048 | __uint64_t end_offset; |
1068 | pgoff_t end_index, last_index; | 1049 | pgoff_t end_index, last_index; |
1069 | ssize_t size, len; | 1050 | ssize_t size, len; |
1070 | int flags, err, imap_valid = 0, uptodate = 1; | 1051 | int flags, err, imap_valid = 0, uptodate = 1; |
1071 | int page_dirty, count = 0; | 1052 | int count = 0; |
1072 | int trylock = 0; | 1053 | int all_bh = 0; |
1073 | int all_bh = unmapped; | ||
1074 | 1054 | ||
1075 | if (startio) { | 1055 | trace_xfs_writepage(inode, page, 0); |
1076 | if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) | 1056 | |
1077 | trylock |= BMAPI_TRYLOCK; | 1057 | ASSERT(page_has_buffers(page)); |
1078 | } | 1058 | |
1059 | /* | ||
1060 | * Refuse to write the page out if we are called from reclaim context. | ||
1061 | * | ||
1062 | * This avoids stack overflows when called from deeply used stacks in | ||
1063 | * random callers for direct reclaim or memcg reclaim. We explicitly | ||
1064 | * allow reclaim from kswapd as the stack usage there is relatively low. | ||
1065 | * | ||
1066 | * This should really be done by the core VM, but until that happens | ||
1067 | * filesystems like XFS, btrfs and ext4 have to take care of this | ||
1068 | * by themselves. | ||
1069 | */ | ||
1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) | ||
1071 | goto out_fail; | ||
1072 | |||
1073 | /* | ||
1074 | * We need a transaction if there are delalloc or unwritten buffers | ||
1075 | * on the page. | ||
1076 | * | ||
1077 | * If we need a transaction and the process flags say we are already | ||
1078 | * in a transaction, or no IO is allowed then mark the page dirty | ||
1079 | * again and leave the page as is. | ||
1080 | */ | ||
1081 | xfs_count_page_state(page, &delalloc, &unwritten); | ||
1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) | ||
1083 | goto out_fail; | ||
1079 | 1084 | ||
1080 | /* Is this page beyond the end of the file? */ | 1085 | /* Is this page beyond the end of the file? */ |
1081 | offset = i_size_read(inode); | 1086 | offset = i_size_read(inode); |
@@ -1084,50 +1089,33 @@ xfs_page_state_convert( | |||
1084 | if (page->index >= end_index) { | 1089 | if (page->index >= end_index) { |
1085 | if ((page->index >= end_index + 1) || | 1090 | if ((page->index >= end_index + 1) || |
1086 | !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { | 1091 | !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { |
1087 | if (startio) | 1092 | unlock_page(page); |
1088 | unlock_page(page); | ||
1089 | return 0; | 1093 | return 0; |
1090 | } | 1094 | } |
1091 | } | 1095 | } |
1092 | 1096 | ||
1093 | /* | ||
1094 | * page_dirty is initially a count of buffers on the page before | ||
1095 | * EOF and is decremented as we move each into a cleanable state. | ||
1096 | * | ||
1097 | * Derivation: | ||
1098 | * | ||
1099 | * End offset is the highest offset that this page should represent. | ||
1100 | * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) | ||
1101 | * will evaluate non-zero and be less than PAGE_CACHE_SIZE and | ||
1102 | * hence give us the correct page_dirty count. On any other page, | ||
1103 | * it will be zero and in that case we need page_dirty to be the | ||
1104 | * count of buffers on the page. | ||
1105 | */ | ||
1106 | end_offset = min_t(unsigned long long, | 1097 | end_offset = min_t(unsigned long long, |
1107 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); | 1098 | (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, |
1099 | offset); | ||
1108 | len = 1 << inode->i_blkbits; | 1100 | len = 1 << inode->i_blkbits; |
1109 | p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), | ||
1110 | PAGE_CACHE_SIZE); | ||
1111 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; | ||
1112 | page_dirty = p_offset / len; | ||
1113 | 1101 | ||
1114 | bh = head = page_buffers(page); | 1102 | bh = head = page_buffers(page); |
1115 | offset = page_offset(page); | 1103 | offset = page_offset(page); |
1116 | flags = BMAPI_READ; | 1104 | flags = BMAPI_READ; |
1117 | type = IO_NEW; | 1105 | type = IO_NEW; |
1118 | 1106 | ||
1119 | /* TODO: cleanup count and page_dirty */ | ||
1120 | |||
1121 | do { | 1107 | do { |
1122 | if (offset >= end_offset) | 1108 | if (offset >= end_offset) |
1123 | break; | 1109 | break; |
1124 | if (!buffer_uptodate(bh)) | 1110 | if (!buffer_uptodate(bh)) |
1125 | uptodate = 0; | 1111 | uptodate = 0; |
1126 | if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { | 1112 | |
1127 | /* | 1113 | /* |
1128 | * the iomap is actually still valid, but the ioend | 1114 | * A hole may still be marked uptodate because discard_buffer |
1129 | * isn't. shouldn't happen too often. | 1115 | * leaves the flag set. |
1130 | */ | 1116 | */ |
1117 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { | ||
1118 | ASSERT(!buffer_dirty(bh)); | ||
1131 | imap_valid = 0; | 1119 | imap_valid = 0; |
1132 | continue; | 1120 | continue; |
1133 | } | 1121 | } |
@@ -1135,19 +1123,7 @@ xfs_page_state_convert( | |||
1135 | if (imap_valid) | 1123 | if (imap_valid) |
1136 | imap_valid = xfs_imap_valid(inode, &imap, offset); | 1124 | imap_valid = xfs_imap_valid(inode, &imap, offset); |
1137 | 1125 | ||
1138 | /* | 1126 | if (buffer_unwritten(bh) || buffer_delay(bh)) { |
1139 | * First case, map an unwritten extent and prepare for | ||
1140 | * extent state conversion transaction on completion. | ||
1141 | * | ||
1142 | * Second case, allocate space for a delalloc buffer. | ||
1143 | * We can return EAGAIN here in the release page case. | ||
1144 | * | ||
1145 | * Third case, an unmapped buffer was found, and we are | ||
1146 | * in a path where we need to write the whole page out. | ||
1147 | */ | ||
1148 | if (buffer_unwritten(bh) || buffer_delay(bh) || | ||
1149 | ((buffer_uptodate(bh) || PageUptodate(page)) && | ||
1150 | !buffer_mapped(bh) && (unmapped || startio))) { | ||
1151 | int new_ioend = 0; | 1127 | int new_ioend = 0; |
1152 | 1128 | ||
1153 | /* | 1129 | /* |
@@ -1161,15 +1137,16 @@ xfs_page_state_convert( | |||
1161 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; | 1137 | flags = BMAPI_WRITE | BMAPI_IGNSTATE; |
1162 | } else if (buffer_delay(bh)) { | 1138 | } else if (buffer_delay(bh)) { |
1163 | type = IO_DELAY; | 1139 | type = IO_DELAY; |
1164 | flags = BMAPI_ALLOCATE | trylock; | 1140 | flags = BMAPI_ALLOCATE; |
1165 | } else { | 1141 | |
1166 | type = IO_NEW; | 1142 | if (wbc->sync_mode == WB_SYNC_NONE && |
1167 | flags = BMAPI_WRITE | BMAPI_MMAP; | 1143 | wbc->nonblocking) |
1144 | flags |= BMAPI_TRYLOCK; | ||
1168 | } | 1145 | } |
1169 | 1146 | ||
1170 | if (!imap_valid) { | 1147 | if (!imap_valid) { |
1171 | /* | 1148 | /* |
1172 | * if we didn't have a valid mapping then we | 1149 | * If we didn't have a valid mapping then we |
1173 | * need to ensure that we put the new mapping | 1150 | * need to ensure that we put the new mapping |
1174 | * in a new ioend structure. This needs to be | 1151 | * in a new ioend structure. This needs to be |
1175 | * done to ensure that the ioends correctly | 1152 | * done to ensure that the ioends correctly |
@@ -1177,14 +1154,7 @@ xfs_page_state_convert( | |||
1177 | * for unwritten extent conversion. | 1154 | * for unwritten extent conversion. |
1178 | */ | 1155 | */ |
1179 | new_ioend = 1; | 1156 | new_ioend = 1; |
1180 | if (type == IO_NEW) { | 1157 | err = xfs_map_blocks(inode, offset, len, |
1181 | size = xfs_probe_cluster(inode, | ||
1182 | page, bh, head, 0); | ||
1183 | } else { | ||
1184 | size = len; | ||
1185 | } | ||
1186 | |||
1187 | err = xfs_map_blocks(inode, offset, size, | ||
1188 | &imap, flags); | 1158 | &imap, flags); |
1189 | if (err) | 1159 | if (err) |
1190 | goto error; | 1160 | goto error; |
@@ -1193,19 +1163,11 @@ xfs_page_state_convert( | |||
1193 | } | 1163 | } |
1194 | if (imap_valid) { | 1164 | if (imap_valid) { |
1195 | xfs_map_at_offset(inode, bh, &imap, offset); | 1165 | xfs_map_at_offset(inode, bh, &imap, offset); |
1196 | if (startio) { | 1166 | xfs_add_to_ioend(inode, bh, offset, type, |
1197 | xfs_add_to_ioend(inode, bh, offset, | 1167 | &ioend, new_ioend); |
1198 | type, &ioend, | ||
1199 | new_ioend); | ||
1200 | } else { | ||
1201 | set_buffer_dirty(bh); | ||
1202 | unlock_buffer(bh); | ||
1203 | mark_buffer_dirty(bh); | ||
1204 | } | ||
1205 | page_dirty--; | ||
1206 | count++; | 1168 | count++; |
1207 | } | 1169 | } |
1208 | } else if (buffer_uptodate(bh) && startio) { | 1170 | } else if (buffer_uptodate(bh)) { |
1209 | /* | 1171 | /* |
1210 | * we got here because the buffer is already mapped. | 1172 | * we got here because the buffer is already mapped. |
1211 | * That means it must already have extents allocated | 1173 | * That means it must already have extents allocated |
@@ -1213,8 +1175,7 @@ xfs_page_state_convert( | |||
1213 | */ | 1175 | */ |
1214 | if (!imap_valid || flags != BMAPI_READ) { | 1176 | if (!imap_valid || flags != BMAPI_READ) { |
1215 | flags = BMAPI_READ; | 1177 | flags = BMAPI_READ; |
1216 | size = xfs_probe_cluster(inode, page, bh, | 1178 | size = xfs_probe_cluster(inode, page, bh, head); |
1217 | head, 1); | ||
1218 | err = xfs_map_blocks(inode, offset, size, | 1179 | err = xfs_map_blocks(inode, offset, size, |
1219 | &imap, flags); | 1180 | &imap, flags); |
1220 | if (err) | 1181 | if (err) |
@@ -1233,18 +1194,16 @@ xfs_page_state_convert( | |||
1233 | */ | 1194 | */ |
1234 | type = IO_NEW; | 1195 | type = IO_NEW; |
1235 | if (trylock_buffer(bh)) { | 1196 | if (trylock_buffer(bh)) { |
1236 | ASSERT(buffer_mapped(bh)); | ||
1237 | if (imap_valid) | 1197 | if (imap_valid) |
1238 | all_bh = 1; | 1198 | all_bh = 1; |
1239 | xfs_add_to_ioend(inode, bh, offset, type, | 1199 | xfs_add_to_ioend(inode, bh, offset, type, |
1240 | &ioend, !imap_valid); | 1200 | &ioend, !imap_valid); |
1241 | page_dirty--; | ||
1242 | count++; | 1201 | count++; |
1243 | } else { | 1202 | } else { |
1244 | imap_valid = 0; | 1203 | imap_valid = 0; |
1245 | } | 1204 | } |
1246 | } else if ((buffer_uptodate(bh) || PageUptodate(page)) && | 1205 | } else if (PageUptodate(page)) { |
1247 | (unmapped || startio)) { | 1206 | ASSERT(buffer_mapped(bh)); |
1248 | imap_valid = 0; | 1207 | imap_valid = 0; |
1249 | } | 1208 | } |
1250 | 1209 | ||
@@ -1256,8 +1215,7 @@ xfs_page_state_convert( | |||
1256 | if (uptodate && bh == head) | 1215 | if (uptodate && bh == head) |
1257 | SetPageUptodate(page); | 1216 | SetPageUptodate(page); |
1258 | 1217 | ||
1259 | if (startio) | 1218 | xfs_start_page_writeback(page, 1, count); |
1260 | xfs_start_page_writeback(page, 1, count); | ||
1261 | 1219 | ||
1262 | if (ioend && imap_valid) { | 1220 | if (ioend && imap_valid) { |
1263 | xfs_off_t end_index; | 1221 | xfs_off_t end_index; |
@@ -1275,131 +1233,27 @@ xfs_page_state_convert( | |||
1275 | end_index = last_index; | 1233 | end_index = last_index; |
1276 | 1234 | ||
1277 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, | 1235 | xfs_cluster_write(inode, page->index + 1, &imap, &ioend, |
1278 | wbc, startio, all_bh, end_index); | 1236 | wbc, all_bh, end_index); |
1279 | } | 1237 | } |
1280 | 1238 | ||
1281 | if (iohead) | 1239 | if (iohead) |
1282 | xfs_submit_ioend(wbc, iohead); | 1240 | xfs_submit_ioend(wbc, iohead); |
1283 | 1241 | ||
1284 | return page_dirty; | 1242 | return 0; |
1285 | 1243 | ||
1286 | error: | 1244 | error: |
1287 | if (iohead) | 1245 | if (iohead) |
1288 | xfs_cancel_ioend(iohead); | 1246 | xfs_cancel_ioend(iohead); |
1289 | 1247 | ||
1290 | /* | 1248 | xfs_aops_discard_page(page); |
1291 | * If it's delalloc and we have nowhere to put it, | 1249 | ClearPageUptodate(page); |
1292 | * throw it away, unless the lower layers told | 1250 | unlock_page(page); |
1293 | * us to try again. | ||
1294 | */ | ||
1295 | if (err != -EAGAIN) { | ||
1296 | if (!unmapped) | ||
1297 | xfs_aops_discard_page(page); | ||
1298 | ClearPageUptodate(page); | ||
1299 | } | ||
1300 | return err; | 1251 | return err; |
1301 | } | ||
1302 | |||
1303 | /* | ||
1304 | * writepage: Called from one of two places: | ||
1305 | * | ||
1306 | * 1. we are flushing a delalloc buffer head. | ||
1307 | * | ||
1308 | * 2. we are writing out a dirty page. Typically the page dirty | ||
1309 | * state is cleared before we get here. In this case is it | ||
1310 | * conceivable we have no buffer heads. | ||
1311 | * | ||
1312 | * For delalloc space on the page we need to allocate space and | ||
1313 | * flush it. For unmapped buffer heads on the page we should | ||
1314 | * allocate space if the page is uptodate. For any other dirty | ||
1315 | * buffer heads on the page we should flush them. | ||
1316 | * | ||
1317 | * If we detect that a transaction would be required to flush | ||
1318 | * the page, we have to check the process flags first, if we | ||
1319 | * are already in a transaction or disk I/O during allocations | ||
1320 | * is off, we need to fail the writepage and redirty the page. | ||
1321 | */ | ||
1322 | |||
1323 | STATIC int | ||
1324 | xfs_vm_writepage( | ||
1325 | struct page *page, | ||
1326 | struct writeback_control *wbc) | ||
1327 | { | ||
1328 | int error; | ||
1329 | int need_trans; | ||
1330 | int delalloc, unmapped, unwritten; | ||
1331 | struct inode *inode = page->mapping->host; | ||
1332 | |||
1333 | trace_xfs_writepage(inode, page, 0); | ||
1334 | |||
1335 | /* | ||
1336 | * Refuse to write the page out if we are called from reclaim context. | ||
1337 | * | ||
1338 | * This is primarily to avoid stack overflows when called from deep | ||
1339 | * used stacks in random callers for direct reclaim, but disabling | ||
1340 | * reclaim for kswap is a nice side-effect as kswapd causes rather | ||
1341 | * suboptimal I/O patters, too. | ||
1342 | * | ||
1343 | * This should really be done by the core VM, but until that happens | ||
1344 | * filesystems like XFS, btrfs and ext4 have to take care of this | ||
1345 | * by themselves. | ||
1346 | */ | ||
1347 | if (current->flags & PF_MEMALLOC) | ||
1348 | goto out_fail; | ||
1349 | |||
1350 | /* | ||
1351 | * We need a transaction if: | ||
1352 | * 1. There are delalloc buffers on the page | ||
1353 | * 2. The page is uptodate and we have unmapped buffers | ||
1354 | * 3. The page is uptodate and we have no buffers | ||
1355 | * 4. There are unwritten buffers on the page | ||
1356 | */ | ||
1357 | |||
1358 | if (!page_has_buffers(page)) { | ||
1359 | unmapped = 1; | ||
1360 | need_trans = 1; | ||
1361 | } else { | ||
1362 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | ||
1363 | if (!PageUptodate(page)) | ||
1364 | unmapped = 0; | ||
1365 | need_trans = delalloc + unmapped + unwritten; | ||
1366 | } | ||
1367 | |||
1368 | /* | ||
1369 | * If we need a transaction and the process flags say | ||
1370 | * we are already in a transaction, or no IO is allowed | ||
1371 | * then mark the page dirty again and leave the page | ||
1372 | * as is. | ||
1373 | */ | ||
1374 | if (current_test_flags(PF_FSTRANS) && need_trans) | ||
1375 | goto out_fail; | ||
1376 | |||
1377 | /* | ||
1378 | * Delay hooking up buffer heads until we have | ||
1379 | * made our go/no-go decision. | ||
1380 | */ | ||
1381 | if (!page_has_buffers(page)) | ||
1382 | create_empty_buffers(page, 1 << inode->i_blkbits, 0); | ||
1383 | |||
1384 | /* | ||
1385 | * Convert delayed allocate, unwritten or unmapped space | ||
1386 | * to real space and flush out to disk. | ||
1387 | */ | ||
1388 | error = xfs_page_state_convert(inode, page, wbc, 1, unmapped); | ||
1389 | if (error == -EAGAIN) | ||
1390 | goto out_fail; | ||
1391 | if (unlikely(error < 0)) | ||
1392 | goto out_unlock; | ||
1393 | |||
1394 | return 0; | ||
1395 | 1252 | ||
1396 | out_fail: | 1253 | out_fail: |
1397 | redirty_page_for_writepage(wbc, page); | 1254 | redirty_page_for_writepage(wbc, page); |
1398 | unlock_page(page); | 1255 | unlock_page(page); |
1399 | return 0; | 1256 | return 0; |
1400 | out_unlock: | ||
1401 | unlock_page(page); | ||
1402 | return error; | ||
1403 | } | 1257 | } |
1404 | 1258 | ||
1405 | STATIC int | 1259 | STATIC int |
@@ -1413,65 +1267,27 @@ xfs_vm_writepages( | |||
1413 | 1267 | ||
1414 | /* | 1268 | /* |
1415 | * Called to move a page into cleanable state - and from there | 1269 | * Called to move a page into cleanable state - and from there |
1416 | * to be released. Possibly the page is already clean. We always | 1270 | * to be released. The page should already be clean. We always |
1417 | * have buffer heads in this call. | 1271 | * have buffer heads in this call. |
1418 | * | 1272 | * |
1419 | * Returns 0 if the page is ok to release, 1 otherwise. | 1273 | * Returns 1 if the page is ok to release, 0 otherwise. |
1420 | * | ||
1421 | * Possible scenarios are: | ||
1422 | * | ||
1423 | * 1. We are being called to release a page which has been written | ||
1424 | * to via regular I/O. buffer heads will be dirty and possibly | ||
1425 | * delalloc. If no delalloc buffer heads in this case then we | ||
1426 | * can just return zero. | ||
1427 | * | ||
1428 | * 2. We are called to release a page which has been written via | ||
1429 | * mmap, all we need to do is ensure there is no delalloc | ||
1430 | * state in the buffer heads, if not we can let the caller | ||
1431 | * free them and we should come back later via writepage. | ||
1432 | */ | 1274 | */ |
1433 | STATIC int | 1275 | STATIC int |
1434 | xfs_vm_releasepage( | 1276 | xfs_vm_releasepage( |
1435 | struct page *page, | 1277 | struct page *page, |
1436 | gfp_t gfp_mask) | 1278 | gfp_t gfp_mask) |
1437 | { | 1279 | { |
1438 | struct inode *inode = page->mapping->host; | 1280 | int delalloc, unwritten; |
1439 | int dirty, delalloc, unmapped, unwritten; | ||
1440 | struct writeback_control wbc = { | ||
1441 | .sync_mode = WB_SYNC_ALL, | ||
1442 | .nr_to_write = 1, | ||
1443 | }; | ||
1444 | 1281 | ||
1445 | trace_xfs_releasepage(inode, page, 0); | 1282 | trace_xfs_releasepage(page->mapping->host, page, 0); |
1446 | |||
1447 | if (!page_has_buffers(page)) | ||
1448 | return 0; | ||
1449 | 1283 | ||
1450 | xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); | 1284 | xfs_count_page_state(page, &delalloc, &unwritten); |
1451 | if (!delalloc && !unwritten) | ||
1452 | goto free_buffers; | ||
1453 | 1285 | ||
1454 | if (!(gfp_mask & __GFP_FS)) | 1286 | if (WARN_ON(delalloc)) |
1455 | return 0; | 1287 | return 0; |
1456 | 1288 | if (WARN_ON(unwritten)) | |
1457 | /* If we are already inside a transaction or the thread cannot | ||
1458 | * do I/O, we cannot release this page. | ||
1459 | */ | ||
1460 | if (current_test_flags(PF_FSTRANS)) | ||
1461 | return 0; | 1289 | return 0; |
1462 | 1290 | ||
1463 | /* | ||
1464 | * Convert delalloc space to real space, do not flush the | ||
1465 | * data out to disk, that will be done by the caller. | ||
1466 | * Never need to allocate space here - we will always | ||
1467 | * come back to writepage in that case. | ||
1468 | */ | ||
1469 | dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0); | ||
1470 | if (dirty == 0 && !unwritten) | ||
1471 | goto free_buffers; | ||
1472 | return 0; | ||
1473 | |||
1474 | free_buffers: | ||
1475 | return try_to_free_buffers(page); | 1291 | return try_to_free_buffers(page); |
1476 | } | 1292 | } |
1477 | 1293 | ||
@@ -1481,9 +1297,9 @@ __xfs_get_blocks( | |||
1481 | sector_t iblock, | 1297 | sector_t iblock, |
1482 | struct buffer_head *bh_result, | 1298 | struct buffer_head *bh_result, |
1483 | int create, | 1299 | int create, |
1484 | int direct, | 1300 | int direct) |
1485 | bmapi_flags_t flags) | ||
1486 | { | 1301 | { |
1302 | int flags = create ? BMAPI_WRITE : BMAPI_READ; | ||
1487 | struct xfs_bmbt_irec imap; | 1303 | struct xfs_bmbt_irec imap; |
1488 | xfs_off_t offset; | 1304 | xfs_off_t offset; |
1489 | ssize_t size; | 1305 | ssize_t size; |
@@ -1498,8 +1314,11 @@ __xfs_get_blocks( | |||
1498 | if (!create && direct && offset >= i_size_read(inode)) | 1314 | if (!create && direct && offset >= i_size_read(inode)) |
1499 | return 0; | 1315 | return 0; |
1500 | 1316 | ||
1501 | error = xfs_iomap(XFS_I(inode), offset, size, | 1317 | if (direct && create) |
1502 | create ? flags : BMAPI_READ, &imap, &nimap, &new); | 1318 | flags |= BMAPI_DIRECT; |
1319 | |||
1320 | error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, | ||
1321 | &new); | ||
1503 | if (error) | 1322 | if (error) |
1504 | return -error; | 1323 | return -error; |
1505 | if (nimap == 0) | 1324 | if (nimap == 0) |
@@ -1579,8 +1398,7 @@ xfs_get_blocks( | |||
1579 | struct buffer_head *bh_result, | 1398 | struct buffer_head *bh_result, |
1580 | int create) | 1399 | int create) |
1581 | { | 1400 | { |
1582 | return __xfs_get_blocks(inode, iblock, | 1401 | return __xfs_get_blocks(inode, iblock, bh_result, create, 0); |
1583 | bh_result, create, 0, BMAPI_WRITE); | ||
1584 | } | 1402 | } |
1585 | 1403 | ||
1586 | STATIC int | 1404 | STATIC int |
@@ -1590,61 +1408,59 @@ xfs_get_blocks_direct( | |||
1590 | struct buffer_head *bh_result, | 1408 | struct buffer_head *bh_result, |
1591 | int create) | 1409 | int create) |
1592 | { | 1410 | { |
1593 | return __xfs_get_blocks(inode, iblock, | 1411 | return __xfs_get_blocks(inode, iblock, bh_result, create, 1); |
1594 | bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT); | ||
1595 | } | 1412 | } |
1596 | 1413 | ||
1414 | /* | ||
1415 | * Complete a direct I/O write request. | ||
1416 | * | ||
1417 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | ||
1418 | * need to issue a transaction to convert the range from unwritten to written | ||
1419 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | ||
1420 | * to do this and we are done. But in case this was a successfull AIO | ||
1421 | * request this handler is called from interrupt context, from which we | ||
1422 | * can't start transactions. In that case offload the I/O completion to | ||
1423 | * the workqueues we also use for buffered I/O completion. | ||
1424 | */ | ||
1597 | STATIC void | 1425 | STATIC void |
1598 | xfs_end_io_direct( | 1426 | xfs_end_io_direct_write( |
1599 | struct kiocb *iocb, | 1427 | struct kiocb *iocb, |
1600 | loff_t offset, | 1428 | loff_t offset, |
1601 | ssize_t size, | 1429 | ssize_t size, |
1602 | void *private) | 1430 | void *private, |
1431 | int ret, | ||
1432 | bool is_async) | ||
1603 | { | 1433 | { |
1604 | xfs_ioend_t *ioend = iocb->private; | 1434 | struct xfs_ioend *ioend = iocb->private; |
1605 | 1435 | ||
1606 | /* | 1436 | /* |
1607 | * Non-NULL private data means we need to issue a transaction to | 1437 | * blockdev_direct_IO can return an error even after the I/O |
1608 | * convert a range from unwritten to written extents. This needs | 1438 | * completion handler was called. Thus we need to protect |
1609 | * to happen from process context but aio+dio I/O completion | 1439 | * against double-freeing. |
1610 | * happens from irq context so we need to defer it to a workqueue. | ||
1611 | * This is not necessary for synchronous direct I/O, but we do | ||
1612 | * it anyway to keep the code uniform and simpler. | ||
1613 | * | ||
1614 | * Well, if only it were that simple. Because synchronous direct I/O | ||
1615 | * requires extent conversion to occur *before* we return to userspace, | ||
1616 | * we have to wait for extent conversion to complete. Look at the | ||
1617 | * iocb that has been passed to us to determine if this is AIO or | ||
1618 | * not. If it is synchronous, tell xfs_finish_ioend() to kick the | ||
1619 | * workqueue and wait for it to complete. | ||
1620 | * | ||
1621 | * The core direct I/O code might be changed to always call the | ||
1622 | * completion handler in the future, in which case all this can | ||
1623 | * go away. | ||
1624 | */ | 1440 | */ |
1441 | iocb->private = NULL; | ||
1442 | |||
1625 | ioend->io_offset = offset; | 1443 | ioend->io_offset = offset; |
1626 | ioend->io_size = size; | 1444 | ioend->io_size = size; |
1627 | if (ioend->io_type == IO_READ) { | 1445 | if (private && size > 0) |
1628 | xfs_finish_ioend(ioend, 0); | 1446 | ioend->io_type = IO_UNWRITTEN; |
1629 | } else if (private && size > 0) { | 1447 | |
1630 | xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); | 1448 | if (is_async) { |
1631 | } else { | ||
1632 | /* | 1449 | /* |
1633 | * A direct I/O write ioend starts it's life in unwritten | 1450 | * If we are converting an unwritten extent we need to delay |
1634 | * state in case they map an unwritten extent. This write | 1451 | * the AIO completion until after the unwrittent extent |
1635 | * didn't map an unwritten extent so switch it's completion | 1452 | * conversion has completed, otherwise do it ASAP. |
1636 | * handler. | ||
1637 | */ | 1453 | */ |
1638 | ioend->io_type = IO_NEW; | 1454 | if (ioend->io_type == IO_UNWRITTEN) { |
1639 | xfs_finish_ioend(ioend, 0); | 1455 | ioend->io_iocb = iocb; |
1456 | ioend->io_result = ret; | ||
1457 | } else { | ||
1458 | aio_complete(iocb, ret, 0); | ||
1459 | } | ||
1460 | xfs_finish_ioend(ioend); | ||
1461 | } else { | ||
1462 | xfs_finish_ioend_sync(ioend); | ||
1640 | } | 1463 | } |
1641 | |||
1642 | /* | ||
1643 | * blockdev_direct_IO can return an error even after the I/O | ||
1644 | * completion handler was called. Thus we need to protect | ||
1645 | * against double-freeing. | ||
1646 | */ | ||
1647 | iocb->private = NULL; | ||
1648 | } | 1464 | } |
1649 | 1465 | ||
1650 | STATIC ssize_t | 1466 | STATIC ssize_t |
@@ -1655,23 +1471,26 @@ xfs_vm_direct_IO( | |||
1655 | loff_t offset, | 1471 | loff_t offset, |
1656 | unsigned long nr_segs) | 1472 | unsigned long nr_segs) |
1657 | { | 1473 | { |
1658 | struct file *file = iocb->ki_filp; | 1474 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
1659 | struct inode *inode = file->f_mapping->host; | 1475 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); |
1660 | struct block_device *bdev; | 1476 | ssize_t ret; |
1661 | ssize_t ret; | 1477 | |
1662 | 1478 | if (rw & WRITE) { | |
1663 | bdev = xfs_find_bdev_for_inode(inode); | 1479 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); |
1664 | 1480 | ||
1665 | iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? | 1481 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, |
1666 | IO_UNWRITTEN : IO_READ); | 1482 | offset, nr_segs, |
1667 | 1483 | xfs_get_blocks_direct, | |
1668 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | 1484 | xfs_end_io_direct_write); |
1669 | offset, nr_segs, | 1485 | if (ret != -EIOCBQUEUED && iocb->private) |
1670 | xfs_get_blocks_direct, | 1486 | xfs_destroy_ioend(iocb->private); |
1671 | xfs_end_io_direct); | 1487 | } else { |
1488 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | ||
1489 | offset, nr_segs, | ||
1490 | xfs_get_blocks_direct, | ||
1491 | NULL); | ||
1492 | } | ||
1672 | 1493 | ||
1673 | if (unlikely(ret != -EIOCBQUEUED && iocb->private)) | ||
1674 | xfs_destroy_ioend(iocb->private); | ||
1675 | return ret; | 1494 | return ret; |
1676 | } | 1495 | } |
1677 | 1496 | ||
@@ -1686,8 +1505,8 @@ xfs_vm_write_begin( | |||
1686 | void **fsdata) | 1505 | void **fsdata) |
1687 | { | 1506 | { |
1688 | *pagep = NULL; | 1507 | *pagep = NULL; |
1689 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1508 | return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS, |
1690 | xfs_get_blocks); | 1509 | pagep, fsdata, xfs_get_blocks); |
1691 | } | 1510 | } |
1692 | 1511 | ||
1693 | STATIC sector_t | 1512 | STATIC sector_t |
@@ -1698,7 +1517,7 @@ xfs_vm_bmap( | |||
1698 | struct inode *inode = (struct inode *)mapping->host; | 1517 | struct inode *inode = (struct inode *)mapping->host; |
1699 | struct xfs_inode *ip = XFS_I(inode); | 1518 | struct xfs_inode *ip = XFS_I(inode); |
1700 | 1519 | ||
1701 | xfs_itrace_entry(XFS_I(inode)); | 1520 | trace_xfs_vm_bmap(XFS_I(inode)); |
1702 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 1521 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
1703 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); | 1522 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); |
1704 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 1523 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |