diff options
Diffstat (limited to 'fs')
84 files changed, 4664 insertions, 4311 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 312393f32948..db5dc1598716 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -233,9 +233,13 @@ if NETWORK_FILESYSTEMS | |||
233 | source "fs/nfs/Kconfig" | 233 | source "fs/nfs/Kconfig" |
234 | source "fs/nfsd/Kconfig" | 234 | source "fs/nfsd/Kconfig" |
235 | 235 | ||
236 | config GRACE_PERIOD | ||
237 | tristate | ||
238 | |||
236 | config LOCKD | 239 | config LOCKD |
237 | tristate | 240 | tristate |
238 | depends on FILE_LOCKING | 241 | depends on FILE_LOCKING |
242 | select GRACE_PERIOD | ||
239 | 243 | ||
240 | config LOCKD_V4 | 244 | config LOCKD_V4 |
241 | bool | 245 | bool |
@@ -249,7 +253,7 @@ config NFS_ACL_SUPPORT | |||
249 | 253 | ||
250 | config NFS_COMMON | 254 | config NFS_COMMON |
251 | bool | 255 | bool |
252 | depends on NFSD || NFS_FS | 256 | depends on NFSD || NFS_FS || LOCKD |
253 | default y | 257 | default y |
254 | 258 | ||
255 | source "net/sunrpc/Kconfig" | 259 | source "net/sunrpc/Kconfig" |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1d1ba083ca6e..d0262ceb85e1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -3994,7 +3994,8 @@ again: | |||
3994 | if (ret < 0) { | 3994 | if (ret < 0) { |
3995 | err = ret; | 3995 | err = ret; |
3996 | goto out_unlock; | 3996 | goto out_unlock; |
3997 | } if (ret) { | 3997 | } |
3998 | if (ret) { | ||
3998 | ins_nr = 0; | 3999 | ins_nr = 0; |
3999 | btrfs_release_path(path); | 4000 | btrfs_release_path(path); |
4000 | continue; | 4001 | continue; |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ec3b7a5381fa..dd10a031c052 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -72,7 +72,22 @@ out: | |||
72 | return page; | 72 | return page; |
73 | } | 73 | } |
74 | 74 | ||
75 | static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) | 75 | struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) |
76 | { | ||
77 | bool readahead = false; | ||
78 | struct page *page; | ||
79 | |||
80 | page = find_get_page(META_MAPPING(sbi), index); | ||
81 | if (!page || (page && !PageUptodate(page))) | ||
82 | readahead = true; | ||
83 | f2fs_put_page(page, 0); | ||
84 | |||
85 | if (readahead) | ||
86 | ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); | ||
87 | return get_meta_page(sbi, index); | ||
88 | } | ||
89 | |||
90 | static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) | ||
76 | { | 91 | { |
77 | switch (type) { | 92 | switch (type) { |
78 | case META_NAT: | 93 | case META_NAT: |
@@ -82,6 +97,8 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) | |||
82 | case META_SSA: | 97 | case META_SSA: |
83 | case META_CP: | 98 | case META_CP: |
84 | return 0; | 99 | return 0; |
100 | case META_POR: | ||
101 | return MAX_BLKADDR(sbi); | ||
85 | default: | 102 | default: |
86 | BUG(); | 103 | BUG(); |
87 | } | 104 | } |
@@ -90,12 +107,12 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) | |||
90 | /* | 107 | /* |
91 | * Readahead CP/NAT/SIT/SSA pages | 108 | * Readahead CP/NAT/SIT/SSA pages |
92 | */ | 109 | */ |
93 | int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) | 110 | int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type) |
94 | { | 111 | { |
95 | block_t prev_blk_addr = 0; | 112 | block_t prev_blk_addr = 0; |
96 | struct page *page; | 113 | struct page *page; |
97 | int blkno = start; | 114 | block_t blkno = start; |
98 | int max_blks = get_max_meta_blks(sbi, type); | 115 | block_t max_blks = get_max_meta_blks(sbi, type); |
99 | 116 | ||
100 | struct f2fs_io_info fio = { | 117 | struct f2fs_io_info fio = { |
101 | .type = META, | 118 | .type = META, |
@@ -125,7 +142,11 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) | |||
125 | break; | 142 | break; |
126 | case META_SSA: | 143 | case META_SSA: |
127 | case META_CP: | 144 | case META_CP: |
128 | /* get ssa/cp block addr */ | 145 | case META_POR: |
146 | if (unlikely(blkno >= max_blks)) | ||
147 | goto out; | ||
148 | if (unlikely(blkno < SEG0_BLKADDR(sbi))) | ||
149 | goto out; | ||
129 | blk_addr = blkno; | 150 | blk_addr = blkno; |
130 | break; | 151 | break; |
131 | default: | 152 | default: |
@@ -151,8 +172,7 @@ out: | |||
151 | static int f2fs_write_meta_page(struct page *page, | 172 | static int f2fs_write_meta_page(struct page *page, |
152 | struct writeback_control *wbc) | 173 | struct writeback_control *wbc) |
153 | { | 174 | { |
154 | struct inode *inode = page->mapping->host; | 175 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); |
155 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
156 | 176 | ||
157 | trace_f2fs_writepage(page, META); | 177 | trace_f2fs_writepage(page, META); |
158 | 178 | ||
@@ -177,7 +197,7 @@ redirty_out: | |||
177 | static int f2fs_write_meta_pages(struct address_space *mapping, | 197 | static int f2fs_write_meta_pages(struct address_space *mapping, |
178 | struct writeback_control *wbc) | 198 | struct writeback_control *wbc) |
179 | { | 199 | { |
180 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | 200 | struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); |
181 | long diff, written; | 201 | long diff, written; |
182 | 202 | ||
183 | trace_f2fs_writepages(mapping->host, wbc, META); | 203 | trace_f2fs_writepages(mapping->host, wbc, META); |
@@ -259,15 +279,12 @@ continue_unlock: | |||
259 | 279 | ||
260 | static int f2fs_set_meta_page_dirty(struct page *page) | 280 | static int f2fs_set_meta_page_dirty(struct page *page) |
261 | { | 281 | { |
262 | struct address_space *mapping = page->mapping; | ||
263 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | ||
264 | |||
265 | trace_f2fs_set_page_dirty(page, META); | 282 | trace_f2fs_set_page_dirty(page, META); |
266 | 283 | ||
267 | SetPageUptodate(page); | 284 | SetPageUptodate(page); |
268 | if (!PageDirty(page)) { | 285 | if (!PageDirty(page)) { |
269 | __set_page_dirty_nobuffers(page); | 286 | __set_page_dirty_nobuffers(page); |
270 | inc_page_count(sbi, F2FS_DIRTY_META); | 287 | inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); |
271 | return 1; | 288 | return 1; |
272 | } | 289 | } |
273 | return 0; | 290 | return 0; |
@@ -378,7 +395,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) | |||
378 | void release_orphan_inode(struct f2fs_sb_info *sbi) | 395 | void release_orphan_inode(struct f2fs_sb_info *sbi) |
379 | { | 396 | { |
380 | spin_lock(&sbi->ino_lock[ORPHAN_INO]); | 397 | spin_lock(&sbi->ino_lock[ORPHAN_INO]); |
381 | f2fs_bug_on(sbi->n_orphans == 0); | 398 | f2fs_bug_on(sbi, sbi->n_orphans == 0); |
382 | sbi->n_orphans--; | 399 | sbi->n_orphans--; |
383 | spin_unlock(&sbi->ino_lock[ORPHAN_INO]); | 400 | spin_unlock(&sbi->ino_lock[ORPHAN_INO]); |
384 | } | 401 | } |
@@ -398,7 +415,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
398 | static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | 415 | static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) |
399 | { | 416 | { |
400 | struct inode *inode = f2fs_iget(sbi->sb, ino); | 417 | struct inode *inode = f2fs_iget(sbi->sb, ino); |
401 | f2fs_bug_on(IS_ERR(inode)); | 418 | f2fs_bug_on(sbi, IS_ERR(inode)); |
402 | clear_nlink(inode); | 419 | clear_nlink(inode); |
403 | 420 | ||
404 | /* truncate all the data during iput */ | 421 | /* truncate all the data during iput */ |
@@ -459,7 +476,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | |||
459 | list_for_each_entry(orphan, head, list) { | 476 | list_for_each_entry(orphan, head, list) { |
460 | if (!page) { | 477 | if (!page) { |
461 | page = find_get_page(META_MAPPING(sbi), start_blk++); | 478 | page = find_get_page(META_MAPPING(sbi), start_blk++); |
462 | f2fs_bug_on(!page); | 479 | f2fs_bug_on(sbi, !page); |
463 | orphan_blk = | 480 | orphan_blk = |
464 | (struct f2fs_orphan_block *)page_address(page); | 481 | (struct f2fs_orphan_block *)page_address(page); |
465 | memset(orphan_blk, 0, sizeof(*orphan_blk)); | 482 | memset(orphan_blk, 0, sizeof(*orphan_blk)); |
@@ -619,7 +636,7 @@ fail_no_cp: | |||
619 | 636 | ||
620 | static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) | 637 | static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) |
621 | { | 638 | { |
622 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 639 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
623 | 640 | ||
624 | if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) | 641 | if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) |
625 | return -EEXIST; | 642 | return -EEXIST; |
@@ -631,32 +648,38 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) | |||
631 | return 0; | 648 | return 0; |
632 | } | 649 | } |
633 | 650 | ||
634 | void set_dirty_dir_page(struct inode *inode, struct page *page) | 651 | void update_dirty_page(struct inode *inode, struct page *page) |
635 | { | 652 | { |
636 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 653 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
637 | struct dir_inode_entry *new; | 654 | struct dir_inode_entry *new; |
638 | int ret = 0; | 655 | int ret = 0; |
639 | 656 | ||
640 | if (!S_ISDIR(inode->i_mode)) | 657 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) |
641 | return; | 658 | return; |
642 | 659 | ||
660 | if (!S_ISDIR(inode->i_mode)) { | ||
661 | inode_inc_dirty_pages(inode); | ||
662 | goto out; | ||
663 | } | ||
664 | |||
643 | new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | 665 | new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); |
644 | new->inode = inode; | 666 | new->inode = inode; |
645 | INIT_LIST_HEAD(&new->list); | 667 | INIT_LIST_HEAD(&new->list); |
646 | 668 | ||
647 | spin_lock(&sbi->dir_inode_lock); | 669 | spin_lock(&sbi->dir_inode_lock); |
648 | ret = __add_dirty_inode(inode, new); | 670 | ret = __add_dirty_inode(inode, new); |
649 | inode_inc_dirty_dents(inode); | 671 | inode_inc_dirty_pages(inode); |
650 | SetPagePrivate(page); | ||
651 | spin_unlock(&sbi->dir_inode_lock); | 672 | spin_unlock(&sbi->dir_inode_lock); |
652 | 673 | ||
653 | if (ret) | 674 | if (ret) |
654 | kmem_cache_free(inode_entry_slab, new); | 675 | kmem_cache_free(inode_entry_slab, new); |
676 | out: | ||
677 | SetPagePrivate(page); | ||
655 | } | 678 | } |
656 | 679 | ||
657 | void add_dirty_dir_inode(struct inode *inode) | 680 | void add_dirty_dir_inode(struct inode *inode) |
658 | { | 681 | { |
659 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 682 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
660 | struct dir_inode_entry *new = | 683 | struct dir_inode_entry *new = |
661 | f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | 684 | f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); |
662 | int ret = 0; | 685 | int ret = 0; |
@@ -674,14 +697,14 @@ void add_dirty_dir_inode(struct inode *inode) | |||
674 | 697 | ||
675 | void remove_dirty_dir_inode(struct inode *inode) | 698 | void remove_dirty_dir_inode(struct inode *inode) |
676 | { | 699 | { |
677 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 700 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
678 | struct dir_inode_entry *entry; | 701 | struct dir_inode_entry *entry; |
679 | 702 | ||
680 | if (!S_ISDIR(inode->i_mode)) | 703 | if (!S_ISDIR(inode->i_mode)) |
681 | return; | 704 | return; |
682 | 705 | ||
683 | spin_lock(&sbi->dir_inode_lock); | 706 | spin_lock(&sbi->dir_inode_lock); |
684 | if (get_dirty_dents(inode) || | 707 | if (get_dirty_pages(inode) || |
685 | !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { | 708 | !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { |
686 | spin_unlock(&sbi->dir_inode_lock); | 709 | spin_unlock(&sbi->dir_inode_lock); |
687 | return; | 710 | return; |
@@ -802,11 +825,12 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) | |||
802 | finish_wait(&sbi->cp_wait, &wait); | 825 | finish_wait(&sbi->cp_wait, &wait); |
803 | } | 826 | } |
804 | 827 | ||
805 | static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | 828 | static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
806 | { | 829 | { |
807 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | 830 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); |
808 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); | 831 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); |
809 | nid_t last_nid = 0; | 832 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
833 | nid_t last_nid = nm_i->next_scan_nid; | ||
810 | block_t start_blk; | 834 | block_t start_blk; |
811 | struct page *cp_page; | 835 | struct page *cp_page; |
812 | unsigned int data_sum_blocks, orphan_blocks; | 836 | unsigned int data_sum_blocks, orphan_blocks; |
@@ -869,7 +893,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
869 | ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + | 893 | ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + |
870 | orphan_blocks); | 894 | orphan_blocks); |
871 | 895 | ||
872 | if (is_umount) { | 896 | if (cpc->reason == CP_UMOUNT) { |
873 | set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); | 897 | set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); |
874 | ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ | 898 | ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ |
875 | cp_payload_blks + data_sum_blocks + | 899 | cp_payload_blks + data_sum_blocks + |
@@ -886,6 +910,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
886 | else | 910 | else |
887 | clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); | 911 | clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); |
888 | 912 | ||
913 | if (sbi->need_fsck) | ||
914 | set_ckpt_flags(ckpt, CP_FSCK_FLAG); | ||
915 | |||
889 | /* update SIT/NAT bitmap */ | 916 | /* update SIT/NAT bitmap */ |
890 | get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); | 917 | get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); |
891 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); | 918 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); |
@@ -920,7 +947,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
920 | 947 | ||
921 | write_data_summaries(sbi, start_blk); | 948 | write_data_summaries(sbi, start_blk); |
922 | start_blk += data_sum_blocks; | 949 | start_blk += data_sum_blocks; |
923 | if (is_umount) { | 950 | if (cpc->reason == CP_UMOUNT) { |
924 | write_node_summaries(sbi, start_blk); | 951 | write_node_summaries(sbi, start_blk); |
925 | start_blk += NR_CURSEG_NODE_TYPE; | 952 | start_blk += NR_CURSEG_NODE_TYPE; |
926 | } | 953 | } |
@@ -960,23 +987,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
960 | /* | 987 | /* |
961 | * We guarantee that this checkpoint procedure will not fail. | 988 | * We guarantee that this checkpoint procedure will not fail. |
962 | */ | 989 | */ |
963 | void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | 990 | void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
964 | { | 991 | { |
965 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | 992 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); |
966 | unsigned long long ckpt_ver; | 993 | unsigned long long ckpt_ver; |
967 | 994 | ||
968 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); | 995 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); |
969 | 996 | ||
970 | mutex_lock(&sbi->cp_mutex); | 997 | mutex_lock(&sbi->cp_mutex); |
971 | 998 | ||
972 | if (!sbi->s_dirty) | 999 | if (!sbi->s_dirty && cpc->reason != CP_DISCARD) |
973 | goto out; | 1000 | goto out; |
974 | if (unlikely(f2fs_cp_error(sbi))) | 1001 | if (unlikely(f2fs_cp_error(sbi))) |
975 | goto out; | 1002 | goto out; |
976 | if (block_operations(sbi)) | 1003 | if (block_operations(sbi)) |
977 | goto out; | 1004 | goto out; |
978 | 1005 | ||
979 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); | 1006 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); |
980 | 1007 | ||
981 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | 1008 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
982 | f2fs_submit_merged_bio(sbi, NODE, WRITE); | 1009 | f2fs_submit_merged_bio(sbi, NODE, WRITE); |
@@ -992,16 +1019,16 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
992 | 1019 | ||
993 | /* write cached NAT/SIT entries to NAT/SIT area */ | 1020 | /* write cached NAT/SIT entries to NAT/SIT area */ |
994 | flush_nat_entries(sbi); | 1021 | flush_nat_entries(sbi); |
995 | flush_sit_entries(sbi); | 1022 | flush_sit_entries(sbi, cpc); |
996 | 1023 | ||
997 | /* unlock all the fs_lock[] in do_checkpoint() */ | 1024 | /* unlock all the fs_lock[] in do_checkpoint() */ |
998 | do_checkpoint(sbi, is_umount); | 1025 | do_checkpoint(sbi, cpc); |
999 | 1026 | ||
1000 | unblock_operations(sbi); | 1027 | unblock_operations(sbi); |
1001 | stat_inc_cp_count(sbi->stat_info); | 1028 | stat_inc_cp_count(sbi->stat_info); |
1002 | out: | 1029 | out: |
1003 | mutex_unlock(&sbi->cp_mutex); | 1030 | mutex_unlock(&sbi->cp_mutex); |
1004 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); | 1031 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); |
1005 | } | 1032 | } |
1006 | 1033 | ||
1007 | void init_ino_entry_info(struct f2fs_sb_info *sbi) | 1034 | void init_ino_entry_info(struct f2fs_sb_info *sbi) |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 76de83e25a89..8e58c4cc2cb9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -85,7 +85,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, | |||
85 | bio = bio_alloc(GFP_NOIO, npages); | 85 | bio = bio_alloc(GFP_NOIO, npages); |
86 | 86 | ||
87 | bio->bi_bdev = sbi->sb->s_bdev; | 87 | bio->bi_bdev = sbi->sb->s_bdev; |
88 | bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); | 88 | bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); |
89 | bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; | 89 | bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; |
90 | bio->bi_private = sbi; | 90 | bio->bi_private = sbi; |
91 | 91 | ||
@@ -193,7 +193,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, | |||
193 | __submit_merged_bio(io); | 193 | __submit_merged_bio(io); |
194 | alloc_new: | 194 | alloc_new: |
195 | if (io->bio == NULL) { | 195 | if (io->bio == NULL) { |
196 | int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 196 | int bio_blocks = MAX_BIO_BLOCKS(sbi); |
197 | 197 | ||
198 | io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); | 198 | io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); |
199 | io->fio = *fio; | 199 | io->fio = *fio; |
@@ -236,7 +236,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) | |||
236 | 236 | ||
237 | int reserve_new_block(struct dnode_of_data *dn) | 237 | int reserve_new_block(struct dnode_of_data *dn) |
238 | { | 238 | { |
239 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 239 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
240 | 240 | ||
241 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) | 241 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
242 | return -EPERM; | 242 | return -EPERM; |
@@ -258,7 +258,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) | |||
258 | int err; | 258 | int err; |
259 | 259 | ||
260 | /* if inode_page exists, index should be zero */ | 260 | /* if inode_page exists, index should be zero */ |
261 | f2fs_bug_on(!need_put && index); | 261 | f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); |
262 | 262 | ||
263 | err = get_dnode_of_data(dn, index, ALLOC_NODE); | 263 | err = get_dnode_of_data(dn, index, ALLOC_NODE); |
264 | if (err) | 264 | if (err) |
@@ -321,7 +321,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) | |||
321 | block_t start_blkaddr, end_blkaddr; | 321 | block_t start_blkaddr, end_blkaddr; |
322 | int need_update = true; | 322 | int need_update = true; |
323 | 323 | ||
324 | f2fs_bug_on(blk_addr == NEW_ADDR); | 324 | f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); |
325 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | 325 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + |
326 | dn->ofs_in_node; | 326 | dn->ofs_in_node; |
327 | 327 | ||
@@ -396,7 +396,6 @@ end_update: | |||
396 | 396 | ||
397 | struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | 397 | struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) |
398 | { | 398 | { |
399 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
400 | struct address_space *mapping = inode->i_mapping; | 399 | struct address_space *mapping = inode->i_mapping; |
401 | struct dnode_of_data dn; | 400 | struct dnode_of_data dn; |
402 | struct page *page; | 401 | struct page *page; |
@@ -429,7 +428,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
429 | return page; | 428 | return page; |
430 | } | 429 | } |
431 | 430 | ||
432 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, | 431 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, |
433 | sync ? READ_SYNC : READA); | 432 | sync ? READ_SYNC : READA); |
434 | if (err) | 433 | if (err) |
435 | return ERR_PTR(err); | 434 | return ERR_PTR(err); |
@@ -451,7 +450,6 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
451 | */ | 450 | */ |
452 | struct page *get_lock_data_page(struct inode *inode, pgoff_t index) | 451 | struct page *get_lock_data_page(struct inode *inode, pgoff_t index) |
453 | { | 452 | { |
454 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
455 | struct address_space *mapping = inode->i_mapping; | 453 | struct address_space *mapping = inode->i_mapping; |
456 | struct dnode_of_data dn; | 454 | struct dnode_of_data dn; |
457 | struct page *page; | 455 | struct page *page; |
@@ -490,7 +488,8 @@ repeat: | |||
490 | return page; | 488 | return page; |
491 | } | 489 | } |
492 | 490 | ||
493 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); | 491 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, |
492 | dn.data_blkaddr, READ_SYNC); | ||
494 | if (err) | 493 | if (err) |
495 | return ERR_PTR(err); | 494 | return ERR_PTR(err); |
496 | 495 | ||
@@ -517,7 +516,6 @@ repeat: | |||
517 | struct page *get_new_data_page(struct inode *inode, | 516 | struct page *get_new_data_page(struct inode *inode, |
518 | struct page *ipage, pgoff_t index, bool new_i_size) | 517 | struct page *ipage, pgoff_t index, bool new_i_size) |
519 | { | 518 | { |
520 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
521 | struct address_space *mapping = inode->i_mapping; | 519 | struct address_space *mapping = inode->i_mapping; |
522 | struct page *page; | 520 | struct page *page; |
523 | struct dnode_of_data dn; | 521 | struct dnode_of_data dn; |
@@ -541,8 +539,8 @@ repeat: | |||
541 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | 539 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); |
542 | SetPageUptodate(page); | 540 | SetPageUptodate(page); |
543 | } else { | 541 | } else { |
544 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, | 542 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, |
545 | READ_SYNC); | 543 | dn.data_blkaddr, READ_SYNC); |
546 | if (err) | 544 | if (err) |
547 | goto put_err; | 545 | goto put_err; |
548 | 546 | ||
@@ -573,10 +571,12 @@ put_err: | |||
573 | 571 | ||
574 | static int __allocate_data_block(struct dnode_of_data *dn) | 572 | static int __allocate_data_block(struct dnode_of_data *dn) |
575 | { | 573 | { |
576 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 574 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
575 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); | ||
577 | struct f2fs_summary sum; | 576 | struct f2fs_summary sum; |
578 | block_t new_blkaddr; | 577 | block_t new_blkaddr; |
579 | struct node_info ni; | 578 | struct node_info ni; |
579 | pgoff_t fofs; | ||
580 | int type; | 580 | int type; |
581 | 581 | ||
582 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) | 582 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
@@ -599,6 +599,12 @@ static int __allocate_data_block(struct dnode_of_data *dn) | |||
599 | update_extent_cache(new_blkaddr, dn); | 599 | update_extent_cache(new_blkaddr, dn); |
600 | clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); | 600 | clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); |
601 | 601 | ||
602 | /* update i_size */ | ||
603 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | ||
604 | dn->ofs_in_node; | ||
605 | if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) | ||
606 | i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); | ||
607 | |||
602 | dn->data_blkaddr = new_blkaddr; | 608 | dn->data_blkaddr = new_blkaddr; |
603 | return 0; | 609 | return 0; |
604 | } | 610 | } |
@@ -614,7 +620,6 @@ static int __allocate_data_block(struct dnode_of_data *dn) | |||
614 | static int __get_data_block(struct inode *inode, sector_t iblock, | 620 | static int __get_data_block(struct inode *inode, sector_t iblock, |
615 | struct buffer_head *bh_result, int create, bool fiemap) | 621 | struct buffer_head *bh_result, int create, bool fiemap) |
616 | { | 622 | { |
617 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
618 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; | 623 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; |
619 | unsigned maxblocks = bh_result->b_size >> blkbits; | 624 | unsigned maxblocks = bh_result->b_size >> blkbits; |
620 | struct dnode_of_data dn; | 625 | struct dnode_of_data dn; |
@@ -630,8 +635,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock, | |||
630 | goto out; | 635 | goto out; |
631 | 636 | ||
632 | if (create) { | 637 | if (create) { |
633 | f2fs_balance_fs(sbi); | 638 | f2fs_balance_fs(F2FS_I_SB(inode)); |
634 | f2fs_lock_op(sbi); | 639 | f2fs_lock_op(F2FS_I_SB(inode)); |
635 | } | 640 | } |
636 | 641 | ||
637 | /* When reading holes, we need its node page */ | 642 | /* When reading holes, we need its node page */ |
@@ -707,7 +712,7 @@ put_out: | |||
707 | f2fs_put_dnode(&dn); | 712 | f2fs_put_dnode(&dn); |
708 | unlock_out: | 713 | unlock_out: |
709 | if (create) | 714 | if (create) |
710 | f2fs_unlock_op(sbi); | 715 | f2fs_unlock_op(F2FS_I_SB(inode)); |
711 | out: | 716 | out: |
712 | trace_f2fs_get_data_block(inode, iblock, bh_result, err); | 717 | trace_f2fs_get_data_block(inode, iblock, bh_result, err); |
713 | return err; | 718 | return err; |
@@ -804,7 +809,7 @@ static int f2fs_write_data_page(struct page *page, | |||
804 | struct writeback_control *wbc) | 809 | struct writeback_control *wbc) |
805 | { | 810 | { |
806 | struct inode *inode = page->mapping->host; | 811 | struct inode *inode = page->mapping->host; |
807 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 812 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
808 | loff_t i_size = i_size_read(inode); | 813 | loff_t i_size = i_size_read(inode); |
809 | const pgoff_t end_index = ((unsigned long long) i_size) | 814 | const pgoff_t end_index = ((unsigned long long) i_size) |
810 | >> PAGE_CACHE_SHIFT; | 815 | >> PAGE_CACHE_SHIFT; |
@@ -846,7 +851,7 @@ write: | |||
846 | if (unlikely(f2fs_cp_error(sbi))) { | 851 | if (unlikely(f2fs_cp_error(sbi))) { |
847 | SetPageError(page); | 852 | SetPageError(page); |
848 | unlock_page(page); | 853 | unlock_page(page); |
849 | return 0; | 854 | goto out; |
850 | } | 855 | } |
851 | 856 | ||
852 | if (!wbc->for_reclaim) | 857 | if (!wbc->for_reclaim) |
@@ -866,7 +871,7 @@ done: | |||
866 | 871 | ||
867 | clear_cold_data(page); | 872 | clear_cold_data(page); |
868 | out: | 873 | out: |
869 | inode_dec_dirty_dents(inode); | 874 | inode_dec_dirty_pages(inode); |
870 | unlock_page(page); | 875 | unlock_page(page); |
871 | if (need_balance_fs) | 876 | if (need_balance_fs) |
872 | f2fs_balance_fs(sbi); | 877 | f2fs_balance_fs(sbi); |
@@ -892,7 +897,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
892 | struct writeback_control *wbc) | 897 | struct writeback_control *wbc) |
893 | { | 898 | { |
894 | struct inode *inode = mapping->host; | 899 | struct inode *inode = mapping->host; |
895 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 900 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
896 | bool locked = false; | 901 | bool locked = false; |
897 | int ret; | 902 | int ret; |
898 | long diff; | 903 | long diff; |
@@ -904,7 +909,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
904 | return 0; | 909 | return 0; |
905 | 910 | ||
906 | if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && | 911 | if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && |
907 | get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && | 912 | get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && |
908 | available_free_memory(sbi, DIRTY_DENTS)) | 913 | available_free_memory(sbi, DIRTY_DENTS)) |
909 | goto skip_write; | 914 | goto skip_write; |
910 | 915 | ||
@@ -926,7 +931,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
926 | return ret; | 931 | return ret; |
927 | 932 | ||
928 | skip_write: | 933 | skip_write: |
929 | wbc->pages_skipped += get_dirty_dents(inode); | 934 | wbc->pages_skipped += get_dirty_pages(inode); |
930 | return 0; | 935 | return 0; |
931 | } | 936 | } |
932 | 937 | ||
@@ -945,7 +950,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, | |||
945 | struct page **pagep, void **fsdata) | 950 | struct page **pagep, void **fsdata) |
946 | { | 951 | { |
947 | struct inode *inode = mapping->host; | 952 | struct inode *inode = mapping->host; |
948 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 953 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
949 | struct page *page; | 954 | struct page *page; |
950 | pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; | 955 | pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; |
951 | struct dnode_of_data dn; | 956 | struct dnode_of_data dn; |
@@ -1047,7 +1052,10 @@ static int f2fs_write_end(struct file *file, | |||
1047 | 1052 | ||
1048 | trace_f2fs_write_end(inode, pos, len, copied); | 1053 | trace_f2fs_write_end(inode, pos, len, copied); |
1049 | 1054 | ||
1050 | set_page_dirty(page); | 1055 | if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) |
1056 | register_inmem_page(inode, page); | ||
1057 | else | ||
1058 | set_page_dirty(page); | ||
1051 | 1059 | ||
1052 | if (pos + copied > i_size_read(inode)) { | 1060 | if (pos + copied > i_size_read(inode)) { |
1053 | i_size_write(inode, pos + copied); | 1061 | i_size_write(inode, pos + copied); |
@@ -1092,9 +1100,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | |||
1092 | if (check_direct_IO(inode, rw, iter, offset)) | 1100 | if (check_direct_IO(inode, rw, iter, offset)) |
1093 | return 0; | 1101 | return 0; |
1094 | 1102 | ||
1095 | /* clear fsync mark to recover these blocks */ | ||
1096 | fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); | ||
1097 | |||
1098 | trace_f2fs_direct_IO_enter(inode, offset, count, rw); | 1103 | trace_f2fs_direct_IO_enter(inode, offset, count, rw); |
1099 | 1104 | ||
1100 | err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); | 1105 | err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); |
@@ -1110,8 +1115,12 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, | |||
1110 | unsigned int length) | 1115 | unsigned int length) |
1111 | { | 1116 | { |
1112 | struct inode *inode = page->mapping->host; | 1117 | struct inode *inode = page->mapping->host; |
1118 | |||
1119 | if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) | ||
1120 | return; | ||
1121 | |||
1113 | if (PageDirty(page)) | 1122 | if (PageDirty(page)) |
1114 | inode_dec_dirty_dents(inode); | 1123 | inode_dec_dirty_pages(inode); |
1115 | ClearPagePrivate(page); | 1124 | ClearPagePrivate(page); |
1116 | } | 1125 | } |
1117 | 1126 | ||
@@ -1133,7 +1142,7 @@ static int f2fs_set_data_page_dirty(struct page *page) | |||
1133 | 1142 | ||
1134 | if (!PageDirty(page)) { | 1143 | if (!PageDirty(page)) { |
1135 | __set_page_dirty_nobuffers(page); | 1144 | __set_page_dirty_nobuffers(page); |
1136 | set_dirty_dir_page(inode, page); | 1145 | update_dirty_page(inode, page); |
1137 | return 1; | 1146 | return 1; |
1138 | } | 1147 | } |
1139 | return 0; | 1148 | return 0; |
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index fecebdbfd781..0a91ab813a9e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
@@ -93,7 +93,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) | |||
93 | total_vblocks = 0; | 93 | total_vblocks = 0; |
94 | blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); | 94 | blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); |
95 | hblks_per_sec = blks_per_sec / 2; | 95 | hblks_per_sec = blks_per_sec / 2; |
96 | for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { | 96 | for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { |
97 | vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); | 97 | vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); |
98 | dist = abs(vblocks - hblks_per_sec); | 98 | dist = abs(vblocks - hblks_per_sec); |
99 | bimodal += dist * dist; | 99 | bimodal += dist * dist; |
@@ -103,7 +103,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) | |||
103 | ndirty++; | 103 | ndirty++; |
104 | } | 104 | } |
105 | } | 105 | } |
106 | dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; | 106 | dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; |
107 | si->bimodal = bimodal / dist; | 107 | si->bimodal = bimodal / dist; |
108 | if (si->dirty_count) | 108 | if (si->dirty_count) |
109 | si->avg_vblocks = total_vblocks / ndirty; | 109 | si->avg_vblocks = total_vblocks / ndirty; |
@@ -131,17 +131,17 @@ static void update_mem_info(struct f2fs_sb_info *sbi) | |||
131 | 131 | ||
132 | /* build sit */ | 132 | /* build sit */ |
133 | si->base_mem += sizeof(struct sit_info); | 133 | si->base_mem += sizeof(struct sit_info); |
134 | si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); | 134 | si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); |
135 | si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 135 | si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); |
136 | si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); | 136 | si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); |
137 | if (sbi->segs_per_sec > 1) | 137 | if (sbi->segs_per_sec > 1) |
138 | si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); | 138 | si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); |
139 | si->base_mem += __bitmap_size(sbi, SIT_BITMAP); | 139 | si->base_mem += __bitmap_size(sbi, SIT_BITMAP); |
140 | 140 | ||
141 | /* build free segmap */ | 141 | /* build free segmap */ |
142 | si->base_mem += sizeof(struct free_segmap_info); | 142 | si->base_mem += sizeof(struct free_segmap_info); |
143 | si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 143 | si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); |
144 | si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); | 144 | si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi)); |
145 | 145 | ||
146 | /* build curseg */ | 146 | /* build curseg */ |
147 | si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; | 147 | si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; |
@@ -149,8 +149,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi) | |||
149 | 149 | ||
150 | /* build dirty segmap */ | 150 | /* build dirty segmap */ |
151 | si->base_mem += sizeof(struct dirty_seglist_info); | 151 | si->base_mem += sizeof(struct dirty_seglist_info); |
152 | si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 152 | si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(MAIN_SEGS(sbi)); |
153 | si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); | 153 | si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi)); |
154 | 154 | ||
155 | /* build nm */ | 155 | /* build nm */ |
156 | si->base_mem += sizeof(struct f2fs_nm_info); | 156 | si->base_mem += sizeof(struct f2fs_nm_info); |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 155fb056b7f1..b54f87149c09 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -126,7 +126,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, | |||
126 | * For the most part, it should be a bug when name_len is zero. | 126 | * For the most part, it should be a bug when name_len is zero. |
127 | * We stop here for figuring out where the bugs has occurred. | 127 | * We stop here for figuring out where the bugs has occurred. |
128 | */ | 128 | */ |
129 | f2fs_bug_on(!de->name_len); | 129 | f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); |
130 | 130 | ||
131 | bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); | 131 | bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); |
132 | } | 132 | } |
@@ -151,7 +151,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, | |||
151 | bool room = false; | 151 | bool room = false; |
152 | int max_slots = 0; | 152 | int max_slots = 0; |
153 | 153 | ||
154 | f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); | 154 | f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); |
155 | 155 | ||
156 | nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); | 156 | nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); |
157 | nblock = bucket_blocks(level); | 157 | nblock = bucket_blocks(level); |
@@ -284,10 +284,9 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) | |||
284 | 284 | ||
285 | int update_dent_inode(struct inode *inode, const struct qstr *name) | 285 | int update_dent_inode(struct inode *inode, const struct qstr *name) |
286 | { | 286 | { |
287 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
288 | struct page *page; | 287 | struct page *page; |
289 | 288 | ||
290 | page = get_node_page(sbi, inode->i_ino); | 289 | page = get_node_page(F2FS_I_SB(inode), inode->i_ino); |
291 | if (IS_ERR(page)) | 290 | if (IS_ERR(page)) |
292 | return PTR_ERR(page); | 291 | return PTR_ERR(page); |
293 | 292 | ||
@@ -337,7 +336,6 @@ static int make_empty_dir(struct inode *inode, | |||
337 | static struct page *init_inode_metadata(struct inode *inode, | 336 | static struct page *init_inode_metadata(struct inode *inode, |
338 | struct inode *dir, const struct qstr *name) | 337 | struct inode *dir, const struct qstr *name) |
339 | { | 338 | { |
340 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | ||
341 | struct page *page; | 339 | struct page *page; |
342 | int err; | 340 | int err; |
343 | 341 | ||
@@ -360,7 +358,7 @@ static struct page *init_inode_metadata(struct inode *inode, | |||
360 | if (err) | 358 | if (err) |
361 | goto put_error; | 359 | goto put_error; |
362 | } else { | 360 | } else { |
363 | page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); | 361 | page = get_node_page(F2FS_I_SB(dir), inode->i_ino); |
364 | if (IS_ERR(page)) | 362 | if (IS_ERR(page)) |
365 | return page; | 363 | return page; |
366 | 364 | ||
@@ -381,7 +379,7 @@ static struct page *init_inode_metadata(struct inode *inode, | |||
381 | * we should remove this inode from orphan list. | 379 | * we should remove this inode from orphan list. |
382 | */ | 380 | */ |
383 | if (inode->i_nlink == 0) | 381 | if (inode->i_nlink == 0) |
384 | remove_orphan_inode(sbi, inode->i_ino); | 382 | remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); |
385 | inc_nlink(inode); | 383 | inc_nlink(inode); |
386 | } | 384 | } |
387 | return page; | 385 | return page; |
@@ -571,8 +569,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
571 | { | 569 | { |
572 | struct f2fs_dentry_block *dentry_blk; | 570 | struct f2fs_dentry_block *dentry_blk; |
573 | unsigned int bit_pos; | 571 | unsigned int bit_pos; |
574 | struct address_space *mapping = page->mapping; | 572 | struct inode *dir = page->mapping->host; |
575 | struct inode *dir = mapping->host; | ||
576 | int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); | 573 | int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); |
577 | int i; | 574 | int i; |
578 | 575 | ||
@@ -594,7 +591,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
594 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; | 591 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; |
595 | 592 | ||
596 | if (inode) { | 593 | if (inode) { |
597 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 594 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
598 | 595 | ||
599 | down_write(&F2FS_I(inode)->i_sem); | 596 | down_write(&F2FS_I(inode)->i_sem); |
600 | 597 | ||
@@ -621,7 +618,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
621 | truncate_hole(dir, page->index, page->index + 1); | 618 | truncate_hole(dir, page->index, page->index + 1); |
622 | clear_page_dirty_for_io(page); | 619 | clear_page_dirty_for_io(page); |
623 | ClearPageUptodate(page); | 620 | ClearPageUptodate(page); |
624 | inode_dec_dirty_dents(dir); | 621 | inode_dec_dirty_pages(dir); |
625 | } | 622 | } |
626 | f2fs_put_page(page, 1); | 623 | f2fs_put_page(page, 1); |
627 | } | 624 | } |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e921242186f6..8171e80b2ee9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -21,10 +21,16 @@ | |||
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | 22 | ||
23 | #ifdef CONFIG_F2FS_CHECK_FS | 23 | #ifdef CONFIG_F2FS_CHECK_FS |
24 | #define f2fs_bug_on(condition) BUG_ON(condition) | 24 | #define f2fs_bug_on(sbi, condition) BUG_ON(condition) |
25 | #define f2fs_down_write(x, y) down_write_nest_lock(x, y) | 25 | #define f2fs_down_write(x, y) down_write_nest_lock(x, y) |
26 | #else | 26 | #else |
27 | #define f2fs_bug_on(condition) WARN_ON(condition) | 27 | #define f2fs_bug_on(sbi, condition) \ |
28 | do { \ | ||
29 | if (unlikely(condition)) { \ | ||
30 | WARN_ON(1); \ | ||
31 | sbi->need_fsck = true; \ | ||
32 | } \ | ||
33 | } while (0) | ||
28 | #define f2fs_down_write(x, y) down_write(x) | 34 | #define f2fs_down_write(x, y) down_write(x) |
29 | #endif | 35 | #endif |
30 | 36 | ||
@@ -90,6 +96,20 @@ enum { | |||
90 | SIT_BITMAP | 96 | SIT_BITMAP |
91 | }; | 97 | }; |
92 | 98 | ||
99 | enum { | ||
100 | CP_UMOUNT, | ||
101 | CP_SYNC, | ||
102 | CP_DISCARD, | ||
103 | }; | ||
104 | |||
105 | struct cp_control { | ||
106 | int reason; | ||
107 | __u64 trim_start; | ||
108 | __u64 trim_end; | ||
109 | __u64 trim_minlen; | ||
110 | __u64 trimmed; | ||
111 | }; | ||
112 | |||
93 | /* | 113 | /* |
94 | * For CP/NAT/SIT/SSA readahead | 114 | * For CP/NAT/SIT/SSA readahead |
95 | */ | 115 | */ |
@@ -97,7 +117,8 @@ enum { | |||
97 | META_CP, | 117 | META_CP, |
98 | META_NAT, | 118 | META_NAT, |
99 | META_SIT, | 119 | META_SIT, |
100 | META_SSA | 120 | META_SSA, |
121 | META_POR, | ||
101 | }; | 122 | }; |
102 | 123 | ||
103 | /* for the list of ino */ | 124 | /* for the list of ino */ |
@@ -130,7 +151,9 @@ struct discard_entry { | |||
130 | struct fsync_inode_entry { | 151 | struct fsync_inode_entry { |
131 | struct list_head list; /* list head */ | 152 | struct list_head list; /* list head */ |
132 | struct inode *inode; /* vfs inode pointer */ | 153 | struct inode *inode; /* vfs inode pointer */ |
133 | block_t blkaddr; /* block address locating the last inode */ | 154 | block_t blkaddr; /* block address locating the last fsync */ |
155 | block_t last_dentry; /* block address locating the last dentry */ | ||
156 | block_t last_inode; /* block address locating the last inode */ | ||
134 | }; | 157 | }; |
135 | 158 | ||
136 | #define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) | 159 | #define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) |
@@ -141,6 +164,9 @@ struct fsync_inode_entry { | |||
141 | #define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) | 164 | #define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) |
142 | #define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) | 165 | #define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) |
143 | 166 | ||
167 | #define MAX_NAT_JENTRIES(sum) (NAT_JOURNAL_ENTRIES - nats_in_cursum(sum)) | ||
168 | #define MAX_SIT_JENTRIES(sum) (SIT_JOURNAL_ENTRIES - sits_in_cursum(sum)) | ||
169 | |||
144 | static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) | 170 | static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) |
145 | { | 171 | { |
146 | int before = nats_in_cursum(rs); | 172 | int before = nats_in_cursum(rs); |
@@ -155,11 +181,24 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) | |||
155 | return before; | 181 | return before; |
156 | } | 182 | } |
157 | 183 | ||
184 | static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, | ||
185 | int type) | ||
186 | { | ||
187 | if (type == NAT_JOURNAL) | ||
188 | return size <= MAX_NAT_JENTRIES(sum); | ||
189 | return size <= MAX_SIT_JENTRIES(sum); | ||
190 | } | ||
191 | |||
158 | /* | 192 | /* |
159 | * ioctl commands | 193 | * ioctl commands |
160 | */ | 194 | */ |
161 | #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS | 195 | #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS |
162 | #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS | 196 | #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS |
197 | |||
198 | #define F2FS_IOCTL_MAGIC 0xf5 | ||
199 | #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) | ||
200 | #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) | ||
201 | #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) | ||
163 | 202 | ||
164 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 203 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
165 | /* | 204 | /* |
@@ -222,13 +261,16 @@ struct f2fs_inode_info { | |||
222 | /* Use below internally in f2fs*/ | 261 | /* Use below internally in f2fs*/ |
223 | unsigned long flags; /* use to pass per-file flags */ | 262 | unsigned long flags; /* use to pass per-file flags */ |
224 | struct rw_semaphore i_sem; /* protect fi info */ | 263 | struct rw_semaphore i_sem; /* protect fi info */ |
225 | atomic_t dirty_dents; /* # of dirty dentry pages */ | 264 | atomic_t dirty_pages; /* # of dirty pages */ |
226 | f2fs_hash_t chash; /* hash value of given file name */ | 265 | f2fs_hash_t chash; /* hash value of given file name */ |
227 | unsigned int clevel; /* maximum level of given file name */ | 266 | unsigned int clevel; /* maximum level of given file name */ |
228 | nid_t i_xattr_nid; /* node id that contains xattrs */ | 267 | nid_t i_xattr_nid; /* node id that contains xattrs */ |
229 | unsigned long long xattr_ver; /* cp version of xattr modification */ | 268 | unsigned long long xattr_ver; /* cp version of xattr modification */ |
230 | struct extent_info ext; /* in-memory extent cache entry */ | 269 | struct extent_info ext; /* in-memory extent cache entry */ |
231 | struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ | 270 | struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ |
271 | |||
272 | struct list_head inmem_pages; /* inmemory pages managed by f2fs */ | ||
273 | struct mutex inmem_lock; /* lock for inmemory pages */ | ||
232 | }; | 274 | }; |
233 | 275 | ||
234 | static inline void get_extent_info(struct extent_info *ext, | 276 | static inline void get_extent_info(struct extent_info *ext, |
@@ -260,11 +302,10 @@ struct f2fs_nm_info { | |||
260 | 302 | ||
261 | /* NAT cache management */ | 303 | /* NAT cache management */ |
262 | struct radix_tree_root nat_root;/* root of the nat entry cache */ | 304 | struct radix_tree_root nat_root;/* root of the nat entry cache */ |
305 | struct radix_tree_root nat_set_root;/* root of the nat set cache */ | ||
263 | rwlock_t nat_tree_lock; /* protect nat_tree_lock */ | 306 | rwlock_t nat_tree_lock; /* protect nat_tree_lock */ |
264 | unsigned int nat_cnt; /* the # of cached nat entries */ | ||
265 | struct list_head nat_entries; /* cached nat entry list (clean) */ | 307 | struct list_head nat_entries; /* cached nat entry list (clean) */ |
266 | struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ | 308 | unsigned int nat_cnt; /* the # of cached nat entries */ |
267 | struct list_head nat_entry_set; /* nat entry set list */ | ||
268 | unsigned int dirty_nat_cnt; /* total num of nat entries in set */ | 309 | unsigned int dirty_nat_cnt; /* total num of nat entries in set */ |
269 | 310 | ||
270 | /* free node ids management */ | 311 | /* free node ids management */ |
@@ -332,18 +373,16 @@ enum { | |||
332 | }; | 373 | }; |
333 | 374 | ||
334 | struct flush_cmd { | 375 | struct flush_cmd { |
335 | struct flush_cmd *next; | ||
336 | struct completion wait; | 376 | struct completion wait; |
377 | struct llist_node llnode; | ||
337 | int ret; | 378 | int ret; |
338 | }; | 379 | }; |
339 | 380 | ||
340 | struct flush_cmd_control { | 381 | struct flush_cmd_control { |
341 | struct task_struct *f2fs_issue_flush; /* flush thread */ | 382 | struct task_struct *f2fs_issue_flush; /* flush thread */ |
342 | wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ | 383 | wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ |
343 | struct flush_cmd *issue_list; /* list for command issue */ | 384 | struct llist_head issue_list; /* list for command issue */ |
344 | struct flush_cmd *dispatch_list; /* list for command dispatch */ | 385 | struct llist_node *dispatch_list; /* list for command dispatch */ |
345 | spinlock_t issue_lock; /* for issue list lock */ | ||
346 | struct flush_cmd *issue_tail; /* list tail of issue list */ | ||
347 | }; | 386 | }; |
348 | 387 | ||
349 | struct f2fs_sm_info { | 388 | struct f2fs_sm_info { |
@@ -369,8 +408,11 @@ struct f2fs_sm_info { | |||
369 | int nr_discards; /* # of discards in the list */ | 408 | int nr_discards; /* # of discards in the list */ |
370 | int max_discards; /* max. discards to be issued */ | 409 | int max_discards; /* max. discards to be issued */ |
371 | 410 | ||
411 | struct list_head sit_entry_set; /* sit entry set list */ | ||
412 | |||
372 | unsigned int ipu_policy; /* in-place-update policy */ | 413 | unsigned int ipu_policy; /* in-place-update policy */ |
373 | unsigned int min_ipu_util; /* in-place-update threshold */ | 414 | unsigned int min_ipu_util; /* in-place-update threshold */ |
415 | unsigned int min_fsync_blocks; /* threshold for fsync */ | ||
374 | 416 | ||
375 | /* for flush command control */ | 417 | /* for flush command control */ |
376 | struct flush_cmd_control *cmd_control_info; | 418 | struct flush_cmd_control *cmd_control_info; |
@@ -434,6 +476,7 @@ struct f2fs_sb_info { | |||
434 | struct buffer_head *raw_super_buf; /* buffer head of raw sb */ | 476 | struct buffer_head *raw_super_buf; /* buffer head of raw sb */ |
435 | struct f2fs_super_block *raw_super; /* raw super block pointer */ | 477 | struct f2fs_super_block *raw_super; /* raw super block pointer */ |
436 | int s_dirty; /* dirty flag for checkpoint */ | 478 | int s_dirty; /* dirty flag for checkpoint */ |
479 | bool need_fsck; /* need fsck.f2fs to fix */ | ||
437 | 480 | ||
438 | /* for node-related operations */ | 481 | /* for node-related operations */ |
439 | struct f2fs_nm_info *nm_info; /* node manager */ | 482 | struct f2fs_nm_info *nm_info; /* node manager */ |
@@ -539,6 +582,21 @@ static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) | |||
539 | return sb->s_fs_info; | 582 | return sb->s_fs_info; |
540 | } | 583 | } |
541 | 584 | ||
585 | static inline struct f2fs_sb_info *F2FS_I_SB(struct inode *inode) | ||
586 | { | ||
587 | return F2FS_SB(inode->i_sb); | ||
588 | } | ||
589 | |||
590 | static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) | ||
591 | { | ||
592 | return F2FS_I_SB(mapping->host); | ||
593 | } | ||
594 | |||
595 | static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) | ||
596 | { | ||
597 | return F2FS_M_SB(page->mapping); | ||
598 | } | ||
599 | |||
542 | static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) | 600 | static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) |
543 | { | 601 | { |
544 | return (struct f2fs_super_block *)(sbi->raw_super); | 602 | return (struct f2fs_super_block *)(sbi->raw_super); |
@@ -703,8 +761,8 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, | |||
703 | blkcnt_t count) | 761 | blkcnt_t count) |
704 | { | 762 | { |
705 | spin_lock(&sbi->stat_lock); | 763 | spin_lock(&sbi->stat_lock); |
706 | f2fs_bug_on(sbi->total_valid_block_count < (block_t) count); | 764 | f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); |
707 | f2fs_bug_on(inode->i_blocks < count); | 765 | f2fs_bug_on(sbi, inode->i_blocks < count); |
708 | inode->i_blocks -= count; | 766 | inode->i_blocks -= count; |
709 | sbi->total_valid_block_count -= (block_t)count; | 767 | sbi->total_valid_block_count -= (block_t)count; |
710 | spin_unlock(&sbi->stat_lock); | 768 | spin_unlock(&sbi->stat_lock); |
@@ -716,10 +774,11 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) | |||
716 | F2FS_SET_SB_DIRT(sbi); | 774 | F2FS_SET_SB_DIRT(sbi); |
717 | } | 775 | } |
718 | 776 | ||
719 | static inline void inode_inc_dirty_dents(struct inode *inode) | 777 | static inline void inode_inc_dirty_pages(struct inode *inode) |
720 | { | 778 | { |
721 | inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); | 779 | atomic_inc(&F2FS_I(inode)->dirty_pages); |
722 | atomic_inc(&F2FS_I(inode)->dirty_dents); | 780 | if (S_ISDIR(inode->i_mode)) |
781 | inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); | ||
723 | } | 782 | } |
724 | 783 | ||
725 | static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) | 784 | static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) |
@@ -727,13 +786,15 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) | |||
727 | atomic_dec(&sbi->nr_pages[count_type]); | 786 | atomic_dec(&sbi->nr_pages[count_type]); |
728 | } | 787 | } |
729 | 788 | ||
730 | static inline void inode_dec_dirty_dents(struct inode *inode) | 789 | static inline void inode_dec_dirty_pages(struct inode *inode) |
731 | { | 790 | { |
732 | if (!S_ISDIR(inode->i_mode)) | 791 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) |
733 | return; | 792 | return; |
734 | 793 | ||
735 | dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); | 794 | atomic_dec(&F2FS_I(inode)->dirty_pages); |
736 | atomic_dec(&F2FS_I(inode)->dirty_dents); | 795 | |
796 | if (S_ISDIR(inode->i_mode)) | ||
797 | dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); | ||
737 | } | 798 | } |
738 | 799 | ||
739 | static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) | 800 | static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) |
@@ -741,9 +802,9 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) | |||
741 | return atomic_read(&sbi->nr_pages[count_type]); | 802 | return atomic_read(&sbi->nr_pages[count_type]); |
742 | } | 803 | } |
743 | 804 | ||
744 | static inline int get_dirty_dents(struct inode *inode) | 805 | static inline int get_dirty_pages(struct inode *inode) |
745 | { | 806 | { |
746 | return atomic_read(&F2FS_I(inode)->dirty_dents); | 807 | return atomic_read(&F2FS_I(inode)->dirty_pages); |
747 | } | 808 | } |
748 | 809 | ||
749 | static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) | 810 | static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) |
@@ -848,9 +909,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, | |||
848 | { | 909 | { |
849 | spin_lock(&sbi->stat_lock); | 910 | spin_lock(&sbi->stat_lock); |
850 | 911 | ||
851 | f2fs_bug_on(!sbi->total_valid_block_count); | 912 | f2fs_bug_on(sbi, !sbi->total_valid_block_count); |
852 | f2fs_bug_on(!sbi->total_valid_node_count); | 913 | f2fs_bug_on(sbi, !sbi->total_valid_node_count); |
853 | f2fs_bug_on(!inode->i_blocks); | 914 | f2fs_bug_on(sbi, !inode->i_blocks); |
854 | 915 | ||
855 | inode->i_blocks--; | 916 | inode->i_blocks--; |
856 | sbi->total_valid_node_count--; | 917 | sbi->total_valid_node_count--; |
@@ -867,7 +928,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) | |||
867 | static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) | 928 | static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) |
868 | { | 929 | { |
869 | spin_lock(&sbi->stat_lock); | 930 | spin_lock(&sbi->stat_lock); |
870 | f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count); | 931 | f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count); |
871 | sbi->total_valid_inode_count++; | 932 | sbi->total_valid_inode_count++; |
872 | spin_unlock(&sbi->stat_lock); | 933 | spin_unlock(&sbi->stat_lock); |
873 | } | 934 | } |
@@ -875,7 +936,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) | |||
875 | static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) | 936 | static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) |
876 | { | 937 | { |
877 | spin_lock(&sbi->stat_lock); | 938 | spin_lock(&sbi->stat_lock); |
878 | f2fs_bug_on(!sbi->total_valid_inode_count); | 939 | f2fs_bug_on(sbi, !sbi->total_valid_inode_count); |
879 | sbi->total_valid_inode_count--; | 940 | sbi->total_valid_inode_count--; |
880 | spin_unlock(&sbi->stat_lock); | 941 | spin_unlock(&sbi->stat_lock); |
881 | } | 942 | } |
@@ -891,7 +952,7 @@ static inline void f2fs_put_page(struct page *page, int unlock) | |||
891 | return; | 952 | return; |
892 | 953 | ||
893 | if (unlock) { | 954 | if (unlock) { |
894 | f2fs_bug_on(!PageLocked(page)); | 955 | f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page)); |
895 | unlock_page(page); | 956 | unlock_page(page); |
896 | } | 957 | } |
897 | page_cache_release(page); | 958 | page_cache_release(page); |
@@ -998,7 +1059,9 @@ enum { | |||
998 | FI_INLINE_DATA, /* used for inline data*/ | 1059 | FI_INLINE_DATA, /* used for inline data*/ |
999 | FI_APPEND_WRITE, /* inode has appended data */ | 1060 | FI_APPEND_WRITE, /* inode has appended data */ |
1000 | FI_UPDATE_WRITE, /* inode has in-place-update data */ | 1061 | FI_UPDATE_WRITE, /* inode has in-place-update data */ |
1001 | FI_NEED_IPU, /* used fo ipu for fdatasync */ | 1062 | FI_NEED_IPU, /* used for ipu per file */ |
1063 | FI_ATOMIC_FILE, /* indicate atomic file */ | ||
1064 | FI_VOLATILE_FILE, /* indicate volatile file */ | ||
1002 | }; | 1065 | }; |
1003 | 1066 | ||
1004 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) | 1067 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) |
@@ -1085,6 +1148,16 @@ static inline int f2fs_has_inline_data(struct inode *inode) | |||
1085 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); | 1148 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); |
1086 | } | 1149 | } |
1087 | 1150 | ||
1151 | static inline bool f2fs_is_atomic_file(struct inode *inode) | ||
1152 | { | ||
1153 | return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); | ||
1154 | } | ||
1155 | |||
1156 | static inline bool f2fs_is_volatile_file(struct inode *inode) | ||
1157 | { | ||
1158 | return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); | ||
1159 | } | ||
1160 | |||
1088 | static inline void *inline_data_addr(struct page *page) | 1161 | static inline void *inline_data_addr(struct page *page) |
1089 | { | 1162 | { |
1090 | struct f2fs_inode *ri = F2FS_INODE(page); | 1163 | struct f2fs_inode *ri = F2FS_INODE(page); |
@@ -1141,6 +1214,7 @@ void update_inode(struct inode *, struct page *); | |||
1141 | void update_inode_page(struct inode *); | 1214 | void update_inode_page(struct inode *); |
1142 | int f2fs_write_inode(struct inode *, struct writeback_control *); | 1215 | int f2fs_write_inode(struct inode *, struct writeback_control *); |
1143 | void f2fs_evict_inode(struct inode *); | 1216 | void f2fs_evict_inode(struct inode *); |
1217 | void handle_failed_inode(struct inode *); | ||
1144 | 1218 | ||
1145 | /* | 1219 | /* |
1146 | * namei.c | 1220 | * namei.c |
@@ -1188,9 +1262,9 @@ struct dnode_of_data; | |||
1188 | struct node_info; | 1262 | struct node_info; |
1189 | 1263 | ||
1190 | bool available_free_memory(struct f2fs_sb_info *, int); | 1264 | bool available_free_memory(struct f2fs_sb_info *, int); |
1191 | int is_checkpointed_node(struct f2fs_sb_info *, nid_t); | 1265 | bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); |
1192 | bool fsync_mark_done(struct f2fs_sb_info *, nid_t); | 1266 | bool has_fsynced_inode(struct f2fs_sb_info *, nid_t); |
1193 | void fsync_mark_clear(struct f2fs_sb_info *, nid_t); | 1267 | bool need_inode_block_update(struct f2fs_sb_info *, nid_t); |
1194 | void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); | 1268 | void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); |
1195 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); | 1269 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); |
1196 | int truncate_inode_blocks(struct inode *, pgoff_t); | 1270 | int truncate_inode_blocks(struct inode *, pgoff_t); |
@@ -1221,6 +1295,8 @@ void destroy_node_manager_caches(void); | |||
1221 | /* | 1295 | /* |
1222 | * segment.c | 1296 | * segment.c |
1223 | */ | 1297 | */ |
1298 | void register_inmem_page(struct inode *, struct page *); | ||
1299 | void commit_inmem_pages(struct inode *, bool); | ||
1224 | void f2fs_balance_fs(struct f2fs_sb_info *); | 1300 | void f2fs_balance_fs(struct f2fs_sb_info *); |
1225 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); | 1301 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); |
1226 | int f2fs_issue_flush(struct f2fs_sb_info *); | 1302 | int f2fs_issue_flush(struct f2fs_sb_info *); |
@@ -1229,9 +1305,11 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *); | |||
1229 | void invalidate_blocks(struct f2fs_sb_info *, block_t); | 1305 | void invalidate_blocks(struct f2fs_sb_info *, block_t); |
1230 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); | 1306 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); |
1231 | void clear_prefree_segments(struct f2fs_sb_info *); | 1307 | void clear_prefree_segments(struct f2fs_sb_info *); |
1308 | void release_discard_addrs(struct f2fs_sb_info *); | ||
1232 | void discard_next_dnode(struct f2fs_sb_info *, block_t); | 1309 | void discard_next_dnode(struct f2fs_sb_info *, block_t); |
1233 | int npages_for_summary_flush(struct f2fs_sb_info *); | 1310 | int npages_for_summary_flush(struct f2fs_sb_info *); |
1234 | void allocate_new_segments(struct f2fs_sb_info *); | 1311 | void allocate_new_segments(struct f2fs_sb_info *); |
1312 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); | ||
1235 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); | 1313 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); |
1236 | void write_meta_page(struct f2fs_sb_info *, struct page *); | 1314 | void write_meta_page(struct f2fs_sb_info *, struct page *); |
1237 | void write_node_page(struct f2fs_sb_info *, struct page *, | 1315 | void write_node_page(struct f2fs_sb_info *, struct page *, |
@@ -1248,7 +1326,7 @@ void write_data_summaries(struct f2fs_sb_info *, block_t); | |||
1248 | void write_node_summaries(struct f2fs_sb_info *, block_t); | 1326 | void write_node_summaries(struct f2fs_sb_info *, block_t); |
1249 | int lookup_journal_in_cursum(struct f2fs_summary_block *, | 1327 | int lookup_journal_in_cursum(struct f2fs_summary_block *, |
1250 | int, unsigned int, int); | 1328 | int, unsigned int, int); |
1251 | void flush_sit_entries(struct f2fs_sb_info *); | 1329 | void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *); |
1252 | int build_segment_manager(struct f2fs_sb_info *); | 1330 | int build_segment_manager(struct f2fs_sb_info *); |
1253 | void destroy_segment_manager(struct f2fs_sb_info *); | 1331 | void destroy_segment_manager(struct f2fs_sb_info *); |
1254 | int __init create_segment_manager_caches(void); | 1332 | int __init create_segment_manager_caches(void); |
@@ -1259,7 +1337,8 @@ void destroy_segment_manager_caches(void); | |||
1259 | */ | 1337 | */ |
1260 | struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); | 1338 | struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); |
1261 | struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); | 1339 | struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); |
1262 | int ra_meta_pages(struct f2fs_sb_info *, int, int, int); | 1340 | struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); |
1341 | int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); | ||
1263 | long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); | 1342 | long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); |
1264 | void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); | 1343 | void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); |
1265 | void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); | 1344 | void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); |
@@ -1271,11 +1350,11 @@ void add_orphan_inode(struct f2fs_sb_info *, nid_t); | |||
1271 | void remove_orphan_inode(struct f2fs_sb_info *, nid_t); | 1350 | void remove_orphan_inode(struct f2fs_sb_info *, nid_t); |
1272 | void recover_orphan_inodes(struct f2fs_sb_info *); | 1351 | void recover_orphan_inodes(struct f2fs_sb_info *); |
1273 | int get_valid_checkpoint(struct f2fs_sb_info *); | 1352 | int get_valid_checkpoint(struct f2fs_sb_info *); |
1274 | void set_dirty_dir_page(struct inode *, struct page *); | 1353 | void update_dirty_page(struct inode *, struct page *); |
1275 | void add_dirty_dir_inode(struct inode *); | 1354 | void add_dirty_dir_inode(struct inode *); |
1276 | void remove_dirty_dir_inode(struct inode *); | 1355 | void remove_dirty_dir_inode(struct inode *); |
1277 | void sync_dirty_dir_inodes(struct f2fs_sb_info *); | 1356 | void sync_dirty_dir_inodes(struct f2fs_sb_info *); |
1278 | void write_checkpoint(struct f2fs_sb_info *, bool); | 1357 | void write_checkpoint(struct f2fs_sb_info *, struct cp_control *); |
1279 | void init_ino_entry_info(struct f2fs_sb_info *); | 1358 | void init_ino_entry_info(struct f2fs_sb_info *); |
1280 | int __init create_checkpoint_caches(void); | 1359 | int __init create_checkpoint_caches(void); |
1281 | void destroy_checkpoint_caches(void); | 1360 | void destroy_checkpoint_caches(void); |
@@ -1359,12 +1438,12 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | |||
1359 | #define stat_inc_inline_inode(inode) \ | 1438 | #define stat_inc_inline_inode(inode) \ |
1360 | do { \ | 1439 | do { \ |
1361 | if (f2fs_has_inline_data(inode)) \ | 1440 | if (f2fs_has_inline_data(inode)) \ |
1362 | ((F2FS_SB(inode->i_sb))->inline_inode++); \ | 1441 | ((F2FS_I_SB(inode))->inline_inode++); \ |
1363 | } while (0) | 1442 | } while (0) |
1364 | #define stat_dec_inline_inode(inode) \ | 1443 | #define stat_dec_inline_inode(inode) \ |
1365 | do { \ | 1444 | do { \ |
1366 | if (f2fs_has_inline_data(inode)) \ | 1445 | if (f2fs_has_inline_data(inode)) \ |
1367 | ((F2FS_SB(inode->i_sb))->inline_inode--); \ | 1446 | ((F2FS_I_SB(inode))->inline_inode--); \ |
1368 | } while (0) | 1447 | } while (0) |
1369 | 1448 | ||
1370 | #define stat_inc_seg_type(sbi, curseg) \ | 1449 | #define stat_inc_seg_type(sbi, curseg) \ |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 060aee65aee8..8e68bb64f835 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -33,7 +33,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
33 | { | 33 | { |
34 | struct page *page = vmf->page; | 34 | struct page *page = vmf->page; |
35 | struct inode *inode = file_inode(vma->vm_file); | 35 | struct inode *inode = file_inode(vma->vm_file); |
36 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 36 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
37 | struct dnode_of_data dn; | 37 | struct dnode_of_data dn; |
38 | int err; | 38 | int err; |
39 | 39 | ||
@@ -117,7 +117,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) | |||
117 | 117 | ||
118 | static inline bool need_do_checkpoint(struct inode *inode) | 118 | static inline bool need_do_checkpoint(struct inode *inode) |
119 | { | 119 | { |
120 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 120 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
121 | bool need_cp = false; | 121 | bool need_cp = false; |
122 | 122 | ||
123 | if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) | 123 | if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) |
@@ -138,7 +138,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
138 | { | 138 | { |
139 | struct inode *inode = file->f_mapping->host; | 139 | struct inode *inode = file->f_mapping->host; |
140 | struct f2fs_inode_info *fi = F2FS_I(inode); | 140 | struct f2fs_inode_info *fi = F2FS_I(inode); |
141 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 141 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
142 | nid_t ino = inode->i_ino; | ||
142 | int ret = 0; | 143 | int ret = 0; |
143 | bool need_cp = false; | 144 | bool need_cp = false; |
144 | struct writeback_control wbc = { | 145 | struct writeback_control wbc = { |
@@ -153,12 +154,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
153 | trace_f2fs_sync_file_enter(inode); | 154 | trace_f2fs_sync_file_enter(inode); |
154 | 155 | ||
155 | /* if fdatasync is triggered, let's do in-place-update */ | 156 | /* if fdatasync is triggered, let's do in-place-update */ |
156 | if (datasync) | 157 | if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) |
157 | set_inode_flag(fi, FI_NEED_IPU); | 158 | set_inode_flag(fi, FI_NEED_IPU); |
158 | |||
159 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 159 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
160 | if (datasync) | 160 | clear_inode_flag(fi, FI_NEED_IPU); |
161 | clear_inode_flag(fi, FI_NEED_IPU); | 161 | |
162 | if (ret) { | 162 | if (ret) { |
163 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); | 163 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); |
164 | return ret; | 164 | return ret; |
@@ -168,13 +168,22 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
168 | * if there is no written data, don't waste time to write recovery info. | 168 | * if there is no written data, don't waste time to write recovery info. |
169 | */ | 169 | */ |
170 | if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && | 170 | if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && |
171 | !exist_written_data(sbi, inode->i_ino, APPEND_INO)) { | 171 | !exist_written_data(sbi, ino, APPEND_INO)) { |
172 | struct page *i = find_get_page(NODE_MAPPING(sbi), ino); | ||
173 | |||
174 | /* But we need to avoid that there are some inode updates */ | ||
175 | if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { | ||
176 | f2fs_put_page(i, 0); | ||
177 | goto go_write; | ||
178 | } | ||
179 | f2fs_put_page(i, 0); | ||
180 | |||
172 | if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || | 181 | if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || |
173 | exist_written_data(sbi, inode->i_ino, UPDATE_INO)) | 182 | exist_written_data(sbi, ino, UPDATE_INO)) |
174 | goto flush_out; | 183 | goto flush_out; |
175 | goto out; | 184 | goto out; |
176 | } | 185 | } |
177 | 186 | go_write: | |
178 | /* guarantee free sections for fsync */ | 187 | /* guarantee free sections for fsync */ |
179 | f2fs_balance_fs(sbi); | 188 | f2fs_balance_fs(sbi); |
180 | 189 | ||
@@ -207,26 +216,28 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
207 | up_write(&fi->i_sem); | 216 | up_write(&fi->i_sem); |
208 | } | 217 | } |
209 | } else { | 218 | } else { |
210 | /* if there is no written node page, write its inode page */ | 219 | sync_nodes: |
211 | while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { | 220 | sync_node_pages(sbi, ino, &wbc); |
212 | if (fsync_mark_done(sbi, inode->i_ino)) | 221 | |
213 | goto out; | 222 | if (need_inode_block_update(sbi, ino)) { |
214 | mark_inode_dirty_sync(inode); | 223 | mark_inode_dirty_sync(inode); |
215 | ret = f2fs_write_inode(inode, NULL); | 224 | ret = f2fs_write_inode(inode, NULL); |
216 | if (ret) | 225 | if (ret) |
217 | goto out; | 226 | goto out; |
227 | goto sync_nodes; | ||
218 | } | 228 | } |
219 | ret = wait_on_node_pages_writeback(sbi, inode->i_ino); | 229 | |
230 | ret = wait_on_node_pages_writeback(sbi, ino); | ||
220 | if (ret) | 231 | if (ret) |
221 | goto out; | 232 | goto out; |
222 | 233 | ||
223 | /* once recovery info is written, don't need to tack this */ | 234 | /* once recovery info is written, don't need to tack this */ |
224 | remove_dirty_inode(sbi, inode->i_ino, APPEND_INO); | 235 | remove_dirty_inode(sbi, ino, APPEND_INO); |
225 | clear_inode_flag(fi, FI_APPEND_WRITE); | 236 | clear_inode_flag(fi, FI_APPEND_WRITE); |
226 | flush_out: | 237 | flush_out: |
227 | remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO); | 238 | remove_dirty_inode(sbi, ino, UPDATE_INO); |
228 | clear_inode_flag(fi, FI_UPDATE_WRITE); | 239 | clear_inode_flag(fi, FI_UPDATE_WRITE); |
229 | ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); | 240 | ret = f2fs_issue_flush(F2FS_I_SB(inode)); |
230 | } | 241 | } |
231 | out: | 242 | out: |
232 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); | 243 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); |
@@ -353,6 +364,8 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) | |||
353 | maxbytes, i_size_read(inode)); | 364 | maxbytes, i_size_read(inode)); |
354 | case SEEK_DATA: | 365 | case SEEK_DATA: |
355 | case SEEK_HOLE: | 366 | case SEEK_HOLE: |
367 | if (offset < 0) | ||
368 | return -ENXIO; | ||
356 | return f2fs_seek_block(file, offset, whence); | 369 | return f2fs_seek_block(file, offset, whence); |
357 | } | 370 | } |
358 | 371 | ||
@@ -369,7 +382,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
369 | int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | 382 | int truncate_data_blocks_range(struct dnode_of_data *dn, int count) |
370 | { | 383 | { |
371 | int nr_free = 0, ofs = dn->ofs_in_node; | 384 | int nr_free = 0, ofs = dn->ofs_in_node; |
372 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 385 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
373 | struct f2fs_node *raw_node; | 386 | struct f2fs_node *raw_node; |
374 | __le32 *addr; | 387 | __le32 *addr; |
375 | 388 | ||
@@ -432,7 +445,7 @@ out: | |||
432 | 445 | ||
433 | int truncate_blocks(struct inode *inode, u64 from, bool lock) | 446 | int truncate_blocks(struct inode *inode, u64 from, bool lock) |
434 | { | 447 | { |
435 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 448 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
436 | unsigned int blocksize = inode->i_sb->s_blocksize; | 449 | unsigned int blocksize = inode->i_sb->s_blocksize; |
437 | struct dnode_of_data dn; | 450 | struct dnode_of_data dn; |
438 | pgoff_t free_from; | 451 | pgoff_t free_from; |
@@ -463,7 +476,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) | |||
463 | count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); | 476 | count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); |
464 | 477 | ||
465 | count -= dn.ofs_in_node; | 478 | count -= dn.ofs_in_node; |
466 | f2fs_bug_on(count < 0); | 479 | f2fs_bug_on(sbi, count < 0); |
467 | 480 | ||
468 | if (dn.ofs_in_node || IS_INODE(dn.node_page)) { | 481 | if (dn.ofs_in_node || IS_INODE(dn.node_page)) { |
469 | truncate_data_blocks_range(&dn, count); | 482 | truncate_data_blocks_range(&dn, count); |
@@ -547,15 +560,22 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) | |||
547 | if (err) | 560 | if (err) |
548 | return err; | 561 | return err; |
549 | 562 | ||
550 | if ((attr->ia_valid & ATTR_SIZE) && | 563 | if (attr->ia_valid & ATTR_SIZE) { |
551 | attr->ia_size != i_size_read(inode)) { | ||
552 | err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); | 564 | err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); |
553 | if (err) | 565 | if (err) |
554 | return err; | 566 | return err; |
555 | 567 | ||
556 | truncate_setsize(inode, attr->ia_size); | 568 | if (attr->ia_size != i_size_read(inode)) { |
557 | f2fs_truncate(inode); | 569 | truncate_setsize(inode, attr->ia_size); |
558 | f2fs_balance_fs(F2FS_SB(inode->i_sb)); | 570 | f2fs_truncate(inode); |
571 | f2fs_balance_fs(F2FS_I_SB(inode)); | ||
572 | } else { | ||
573 | /* | ||
574 | * giving a chance to truncate blocks past EOF which | ||
575 | * are fallocated with FALLOC_FL_KEEP_SIZE. | ||
576 | */ | ||
577 | f2fs_truncate(inode); | ||
578 | } | ||
559 | } | 579 | } |
560 | 580 | ||
561 | __setattr_copy(inode, attr); | 581 | __setattr_copy(inode, attr); |
@@ -589,7 +609,7 @@ const struct inode_operations f2fs_file_inode_operations = { | |||
589 | static void fill_zero(struct inode *inode, pgoff_t index, | 609 | static void fill_zero(struct inode *inode, pgoff_t index, |
590 | loff_t start, loff_t len) | 610 | loff_t start, loff_t len) |
591 | { | 611 | { |
592 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 612 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
593 | struct page *page; | 613 | struct page *page; |
594 | 614 | ||
595 | if (!len) | 615 | if (!len) |
@@ -638,6 +658,13 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
638 | loff_t off_start, off_end; | 658 | loff_t off_start, off_end; |
639 | int ret = 0; | 659 | int ret = 0; |
640 | 660 | ||
661 | if (!S_ISREG(inode->i_mode)) | ||
662 | return -EOPNOTSUPP; | ||
663 | |||
664 | /* skip punching hole beyond i_size */ | ||
665 | if (offset >= inode->i_size) | ||
666 | return ret; | ||
667 | |||
641 | ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); | 668 | ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); |
642 | if (ret) | 669 | if (ret) |
643 | return ret; | 670 | return ret; |
@@ -661,7 +688,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
661 | if (pg_start < pg_end) { | 688 | if (pg_start < pg_end) { |
662 | struct address_space *mapping = inode->i_mapping; | 689 | struct address_space *mapping = inode->i_mapping; |
663 | loff_t blk_start, blk_end; | 690 | loff_t blk_start, blk_end; |
664 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 691 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
665 | 692 | ||
666 | f2fs_balance_fs(sbi); | 693 | f2fs_balance_fs(sbi); |
667 | 694 | ||
@@ -682,7 +709,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
682 | static int expand_inode_data(struct inode *inode, loff_t offset, | 709 | static int expand_inode_data(struct inode *inode, loff_t offset, |
683 | loff_t len, int mode) | 710 | loff_t len, int mode) |
684 | { | 711 | { |
685 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 712 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
686 | pgoff_t index, pg_start, pg_end; | 713 | pgoff_t index, pg_start, pg_end; |
687 | loff_t new_size = i_size_read(inode); | 714 | loff_t new_size = i_size_read(inode); |
688 | loff_t off_start, off_end; | 715 | loff_t off_start, off_end; |
@@ -778,61 +805,157 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) | |||
778 | return flags & F2FS_OTHER_FLMASK; | 805 | return flags & F2FS_OTHER_FLMASK; |
779 | } | 806 | } |
780 | 807 | ||
781 | long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 808 | static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) |
809 | { | ||
810 | struct inode *inode = file_inode(filp); | ||
811 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
812 | unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; | ||
813 | return put_user(flags, (int __user *)arg); | ||
814 | } | ||
815 | |||
816 | static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) | ||
782 | { | 817 | { |
783 | struct inode *inode = file_inode(filp); | 818 | struct inode *inode = file_inode(filp); |
784 | struct f2fs_inode_info *fi = F2FS_I(inode); | 819 | struct f2fs_inode_info *fi = F2FS_I(inode); |
785 | unsigned int flags; | 820 | unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; |
821 | unsigned int oldflags; | ||
786 | int ret; | 822 | int ret; |
787 | 823 | ||
788 | switch (cmd) { | 824 | ret = mnt_want_write_file(filp); |
789 | case F2FS_IOC_GETFLAGS: | 825 | if (ret) |
790 | flags = fi->i_flags & FS_FL_USER_VISIBLE; | 826 | return ret; |
791 | return put_user(flags, (int __user *) arg); | ||
792 | case F2FS_IOC_SETFLAGS: | ||
793 | { | ||
794 | unsigned int oldflags; | ||
795 | 827 | ||
796 | ret = mnt_want_write_file(filp); | 828 | if (!inode_owner_or_capable(inode)) { |
797 | if (ret) | 829 | ret = -EACCES; |
798 | return ret; | 830 | goto out; |
831 | } | ||
799 | 832 | ||
800 | if (!inode_owner_or_capable(inode)) { | 833 | if (get_user(flags, (int __user *)arg)) { |
801 | ret = -EACCES; | 834 | ret = -EFAULT; |
802 | goto out; | 835 | goto out; |
803 | } | 836 | } |
837 | |||
838 | flags = f2fs_mask_flags(inode->i_mode, flags); | ||
839 | |||
840 | mutex_lock(&inode->i_mutex); | ||
804 | 841 | ||
805 | if (get_user(flags, (int __user *) arg)) { | 842 | oldflags = fi->i_flags; |
806 | ret = -EFAULT; | 843 | |
844 | if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { | ||
845 | if (!capable(CAP_LINUX_IMMUTABLE)) { | ||
846 | mutex_unlock(&inode->i_mutex); | ||
847 | ret = -EPERM; | ||
807 | goto out; | 848 | goto out; |
808 | } | 849 | } |
850 | } | ||
809 | 851 | ||
810 | flags = f2fs_mask_flags(inode->i_mode, flags); | 852 | flags = flags & FS_FL_USER_MODIFIABLE; |
853 | flags |= oldflags & ~FS_FL_USER_MODIFIABLE; | ||
854 | fi->i_flags = flags; | ||
855 | mutex_unlock(&inode->i_mutex); | ||
811 | 856 | ||
812 | mutex_lock(&inode->i_mutex); | 857 | f2fs_set_inode_flags(inode); |
858 | inode->i_ctime = CURRENT_TIME; | ||
859 | mark_inode_dirty(inode); | ||
860 | out: | ||
861 | mnt_drop_write_file(filp); | ||
862 | return ret; | ||
863 | } | ||
813 | 864 | ||
814 | oldflags = fi->i_flags; | 865 | static int f2fs_ioc_start_atomic_write(struct file *filp) |
866 | { | ||
867 | struct inode *inode = file_inode(filp); | ||
868 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
815 | 869 | ||
816 | if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { | 870 | if (!inode_owner_or_capable(inode)) |
817 | if (!capable(CAP_LINUX_IMMUTABLE)) { | 871 | return -EACCES; |
818 | mutex_unlock(&inode->i_mutex); | ||
819 | ret = -EPERM; | ||
820 | goto out; | ||
821 | } | ||
822 | } | ||
823 | 872 | ||
824 | flags = flags & FS_FL_USER_MODIFIABLE; | 873 | f2fs_balance_fs(sbi); |
825 | flags |= oldflags & ~FS_FL_USER_MODIFIABLE; | ||
826 | fi->i_flags = flags; | ||
827 | mutex_unlock(&inode->i_mutex); | ||
828 | 874 | ||
829 | f2fs_set_inode_flags(inode); | 875 | set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); |
830 | inode->i_ctime = CURRENT_TIME; | 876 | |
831 | mark_inode_dirty(inode); | 877 | return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); |
832 | out: | 878 | } |
833 | mnt_drop_write_file(filp); | 879 | |
880 | static int f2fs_ioc_commit_atomic_write(struct file *filp) | ||
881 | { | ||
882 | struct inode *inode = file_inode(filp); | ||
883 | int ret; | ||
884 | |||
885 | if (!inode_owner_or_capable(inode)) | ||
886 | return -EACCES; | ||
887 | |||
888 | if (f2fs_is_volatile_file(inode)) | ||
889 | return 0; | ||
890 | |||
891 | ret = mnt_want_write_file(filp); | ||
892 | if (ret) | ||
834 | return ret; | 893 | return ret; |
835 | } | 894 | |
895 | if (f2fs_is_atomic_file(inode)) | ||
896 | commit_inmem_pages(inode, false); | ||
897 | |||
898 | ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); | ||
899 | mnt_drop_write_file(filp); | ||
900 | return ret; | ||
901 | } | ||
902 | |||
903 | static int f2fs_ioc_start_volatile_write(struct file *filp) | ||
904 | { | ||
905 | struct inode *inode = file_inode(filp); | ||
906 | |||
907 | if (!inode_owner_or_capable(inode)) | ||
908 | return -EACCES; | ||
909 | |||
910 | set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); | ||
911 | return 0; | ||
912 | } | ||
913 | |||
914 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) | ||
915 | { | ||
916 | struct inode *inode = file_inode(filp); | ||
917 | struct super_block *sb = inode->i_sb; | ||
918 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
919 | struct fstrim_range range; | ||
920 | int ret; | ||
921 | |||
922 | if (!capable(CAP_SYS_ADMIN)) | ||
923 | return -EPERM; | ||
924 | |||
925 | if (!blk_queue_discard(q)) | ||
926 | return -EOPNOTSUPP; | ||
927 | |||
928 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, | ||
929 | sizeof(range))) | ||
930 | return -EFAULT; | ||
931 | |||
932 | range.minlen = max((unsigned int)range.minlen, | ||
933 | q->limits.discard_granularity); | ||
934 | ret = f2fs_trim_fs(F2FS_SB(sb), &range); | ||
935 | if (ret < 0) | ||
936 | return ret; | ||
937 | |||
938 | if (copy_to_user((struct fstrim_range __user *)arg, &range, | ||
939 | sizeof(range))) | ||
940 | return -EFAULT; | ||
941 | return 0; | ||
942 | } | ||
943 | |||
944 | long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
945 | { | ||
946 | switch (cmd) { | ||
947 | case F2FS_IOC_GETFLAGS: | ||
948 | return f2fs_ioc_getflags(filp, arg); | ||
949 | case F2FS_IOC_SETFLAGS: | ||
950 | return f2fs_ioc_setflags(filp, arg); | ||
951 | case F2FS_IOC_START_ATOMIC_WRITE: | ||
952 | return f2fs_ioc_start_atomic_write(filp); | ||
953 | case F2FS_IOC_COMMIT_ATOMIC_WRITE: | ||
954 | return f2fs_ioc_commit_atomic_write(filp); | ||
955 | case F2FS_IOC_START_VOLATILE_WRITE: | ||
956 | return f2fs_ioc_start_volatile_write(filp); | ||
957 | case FITRIM: | ||
958 | return f2fs_ioc_fitrim(filp, arg); | ||
836 | default: | 959 | default: |
837 | return -ENOTTY; | 960 | return -ENOTTY; |
838 | } | 961 | } |
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 943a31db7cc3..2a8f4acdb86b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
@@ -193,7 +193,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) | |||
193 | * selected by background GC before. | 193 | * selected by background GC before. |
194 | * Those segments guarantee they have small valid blocks. | 194 | * Those segments guarantee they have small valid blocks. |
195 | */ | 195 | */ |
196 | for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) { | 196 | for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { |
197 | if (sec_usage_check(sbi, secno)) | 197 | if (sec_usage_check(sbi, secno)) |
198 | continue; | 198 | continue; |
199 | clear_bit(secno, dirty_i->victim_secmap); | 199 | clear_bit(secno, dirty_i->victim_secmap); |
@@ -263,14 +263,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
263 | unsigned int secno, max_cost; | 263 | unsigned int secno, max_cost; |
264 | int nsearched = 0; | 264 | int nsearched = 0; |
265 | 265 | ||
266 | mutex_lock(&dirty_i->seglist_lock); | ||
267 | |||
266 | p.alloc_mode = alloc_mode; | 268 | p.alloc_mode = alloc_mode; |
267 | select_policy(sbi, gc_type, type, &p); | 269 | select_policy(sbi, gc_type, type, &p); |
268 | 270 | ||
269 | p.min_segno = NULL_SEGNO; | 271 | p.min_segno = NULL_SEGNO; |
270 | p.min_cost = max_cost = get_max_cost(sbi, &p); | 272 | p.min_cost = max_cost = get_max_cost(sbi, &p); |
271 | 273 | ||
272 | mutex_lock(&dirty_i->seglist_lock); | ||
273 | |||
274 | if (p.alloc_mode == LFS && gc_type == FG_GC) { | 274 | if (p.alloc_mode == LFS && gc_type == FG_GC) { |
275 | p.min_segno = check_bg_victims(sbi); | 275 | p.min_segno = check_bg_victims(sbi); |
276 | if (p.min_segno != NULL_SEGNO) | 276 | if (p.min_segno != NULL_SEGNO) |
@@ -281,9 +281,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
281 | unsigned long cost; | 281 | unsigned long cost; |
282 | unsigned int segno; | 282 | unsigned int segno; |
283 | 283 | ||
284 | segno = find_next_bit(p.dirty_segmap, | 284 | segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset); |
285 | TOTAL_SEGS(sbi), p.offset); | 285 | if (segno >= MAIN_SEGS(sbi)) { |
286 | if (segno >= TOTAL_SEGS(sbi)) { | ||
287 | if (sbi->last_victim[p.gc_mode]) { | 286 | if (sbi->last_victim[p.gc_mode]) { |
288 | sbi->last_victim[p.gc_mode] = 0; | 287 | sbi->last_victim[p.gc_mode] = 0; |
289 | p.offset = 0; | 288 | p.offset = 0; |
@@ -423,6 +422,12 @@ next_step: | |||
423 | if (IS_ERR(node_page)) | 422 | if (IS_ERR(node_page)) |
424 | continue; | 423 | continue; |
425 | 424 | ||
425 | /* block may become invalid during get_node_page */ | ||
426 | if (check_valid_map(sbi, segno, off) == 0) { | ||
427 | f2fs_put_page(node_page, 1); | ||
428 | continue; | ||
429 | } | ||
430 | |||
426 | /* set page dirty and write it */ | 431 | /* set page dirty and write it */ |
427 | if (gc_type == FG_GC) { | 432 | if (gc_type == FG_GC) { |
428 | f2fs_wait_on_page_writeback(node_page, NODE); | 433 | f2fs_wait_on_page_writeback(node_page, NODE); |
@@ -531,7 +536,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) | |||
531 | f2fs_wait_on_page_writeback(page, DATA); | 536 | f2fs_wait_on_page_writeback(page, DATA); |
532 | 537 | ||
533 | if (clear_page_dirty_for_io(page)) | 538 | if (clear_page_dirty_for_io(page)) |
534 | inode_dec_dirty_dents(inode); | 539 | inode_dec_dirty_pages(inode); |
535 | set_cold_data(page); | 540 | set_cold_data(page); |
536 | do_write_data_page(page, &fio); | 541 | do_write_data_page(page, &fio); |
537 | clear_cold_data(page); | 542 | clear_cold_data(page); |
@@ -688,6 +693,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi) | |||
688 | int gc_type = BG_GC; | 693 | int gc_type = BG_GC; |
689 | int nfree = 0; | 694 | int nfree = 0; |
690 | int ret = -1; | 695 | int ret = -1; |
696 | struct cp_control cpc = { | ||
697 | .reason = CP_SYNC, | ||
698 | }; | ||
691 | 699 | ||
692 | INIT_LIST_HEAD(&ilist); | 700 | INIT_LIST_HEAD(&ilist); |
693 | gc_more: | 701 | gc_more: |
@@ -698,7 +706,7 @@ gc_more: | |||
698 | 706 | ||
699 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { | 707 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { |
700 | gc_type = FG_GC; | 708 | gc_type = FG_GC; |
701 | write_checkpoint(sbi, false); | 709 | write_checkpoint(sbi, &cpc); |
702 | } | 710 | } |
703 | 711 | ||
704 | if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) | 712 | if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) |
@@ -723,7 +731,7 @@ gc_more: | |||
723 | goto gc_more; | 731 | goto gc_more; |
724 | 732 | ||
725 | if (gc_type == FG_GC) | 733 | if (gc_type == FG_GC) |
726 | write_checkpoint(sbi, false); | 734 | write_checkpoint(sbi, &cpc); |
727 | stop: | 735 | stop: |
728 | mutex_unlock(&sbi->gc_mutex); | 736 | mutex_unlock(&sbi->gc_mutex); |
729 | 737 | ||
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 3e8ecdf3742b..88036fd75797 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c | |||
@@ -15,11 +15,13 @@ | |||
15 | 15 | ||
16 | bool f2fs_may_inline(struct inode *inode) | 16 | bool f2fs_may_inline(struct inode *inode) |
17 | { | 17 | { |
18 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
19 | block_t nr_blocks; | 18 | block_t nr_blocks; |
20 | loff_t i_size; | 19 | loff_t i_size; |
21 | 20 | ||
22 | if (!test_opt(sbi, INLINE_DATA)) | 21 | if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) |
22 | return false; | ||
23 | |||
24 | if (f2fs_is_atomic_file(inode)) | ||
23 | return false; | 25 | return false; |
24 | 26 | ||
25 | nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; | 27 | nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; |
@@ -35,7 +37,6 @@ bool f2fs_may_inline(struct inode *inode) | |||
35 | 37 | ||
36 | int f2fs_read_inline_data(struct inode *inode, struct page *page) | 38 | int f2fs_read_inline_data(struct inode *inode, struct page *page) |
37 | { | 39 | { |
38 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
39 | struct page *ipage; | 40 | struct page *ipage; |
40 | void *src_addr, *dst_addr; | 41 | void *src_addr, *dst_addr; |
41 | 42 | ||
@@ -44,7 +45,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) | |||
44 | goto out; | 45 | goto out; |
45 | } | 46 | } |
46 | 47 | ||
47 | ipage = get_node_page(sbi, inode->i_ino); | 48 | ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); |
48 | if (IS_ERR(ipage)) { | 49 | if (IS_ERR(ipage)) { |
49 | unlock_page(page); | 50 | unlock_page(page); |
50 | return PTR_ERR(ipage); | 51 | return PTR_ERR(ipage); |
@@ -73,7 +74,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) | |||
73 | struct dnode_of_data dn; | 74 | struct dnode_of_data dn; |
74 | void *src_addr, *dst_addr; | 75 | void *src_addr, *dst_addr; |
75 | block_t new_blk_addr; | 76 | block_t new_blk_addr; |
76 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 77 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
77 | struct f2fs_io_info fio = { | 78 | struct f2fs_io_info fio = { |
78 | .type = DATA, | 79 | .type = DATA, |
79 | .rw = WRITE_SYNC | REQ_PRIO, | 80 | .rw = WRITE_SYNC | REQ_PRIO, |
@@ -189,13 +190,12 @@ int f2fs_write_inline_data(struct inode *inode, | |||
189 | 190 | ||
190 | void truncate_inline_data(struct inode *inode, u64 from) | 191 | void truncate_inline_data(struct inode *inode, u64 from) |
191 | { | 192 | { |
192 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
193 | struct page *ipage; | 193 | struct page *ipage; |
194 | 194 | ||
195 | if (from >= MAX_INLINE_DATA) | 195 | if (from >= MAX_INLINE_DATA) |
196 | return; | 196 | return; |
197 | 197 | ||
198 | ipage = get_node_page(sbi, inode->i_ino); | 198 | ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); |
199 | if (IS_ERR(ipage)) | 199 | if (IS_ERR(ipage)) |
200 | return; | 200 | return; |
201 | 201 | ||
@@ -209,7 +209,7 @@ void truncate_inline_data(struct inode *inode, u64 from) | |||
209 | 209 | ||
210 | bool recover_inline_data(struct inode *inode, struct page *npage) | 210 | bool recover_inline_data(struct inode *inode, struct page *npage) |
211 | { | 211 | { |
212 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 212 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
213 | struct f2fs_inode *ri = NULL; | 213 | struct f2fs_inode *ri = NULL; |
214 | void *src_addr, *dst_addr; | 214 | void *src_addr, *dst_addr; |
215 | struct page *ipage; | 215 | struct page *ipage; |
@@ -229,7 +229,7 @@ bool recover_inline_data(struct inode *inode, struct page *npage) | |||
229 | ri && (ri->i_inline & F2FS_INLINE_DATA)) { | 229 | ri && (ri->i_inline & F2FS_INLINE_DATA)) { |
230 | process_inline: | 230 | process_inline: |
231 | ipage = get_node_page(sbi, inode->i_ino); | 231 | ipage = get_node_page(sbi, inode->i_ino); |
232 | f2fs_bug_on(IS_ERR(ipage)); | 232 | f2fs_bug_on(sbi, IS_ERR(ipage)); |
233 | 233 | ||
234 | f2fs_wait_on_page_writeback(ipage, NODE); | 234 | f2fs_wait_on_page_writeback(ipage, NODE); |
235 | 235 | ||
@@ -243,7 +243,7 @@ process_inline: | |||
243 | 243 | ||
244 | if (f2fs_has_inline_data(inode)) { | 244 | if (f2fs_has_inline_data(inode)) { |
245 | ipage = get_node_page(sbi, inode->i_ino); | 245 | ipage = get_node_page(sbi, inode->i_ino); |
246 | f2fs_bug_on(IS_ERR(ipage)); | 246 | f2fs_bug_on(sbi, IS_ERR(ipage)); |
247 | f2fs_wait_on_page_writeback(ipage, NODE); | 247 | f2fs_wait_on_page_writeback(ipage, NODE); |
248 | zero_user_segment(ipage, INLINE_DATA_OFFSET, | 248 | zero_user_segment(ipage, INLINE_DATA_OFFSET, |
249 | INLINE_DATA_OFFSET + MAX_INLINE_DATA); | 249 | INLINE_DATA_OFFSET + MAX_INLINE_DATA); |
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2c39999f3868..0deead4505e7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
@@ -69,7 +69,7 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) | |||
69 | 69 | ||
70 | static int do_read_inode(struct inode *inode) | 70 | static int do_read_inode(struct inode *inode) |
71 | { | 71 | { |
72 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 72 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
73 | struct f2fs_inode_info *fi = F2FS_I(inode); | 73 | struct f2fs_inode_info *fi = F2FS_I(inode); |
74 | struct page *node_page; | 74 | struct page *node_page; |
75 | struct f2fs_inode *ri; | 75 | struct f2fs_inode *ri; |
@@ -218,7 +218,7 @@ void update_inode(struct inode *inode, struct page *node_page) | |||
218 | 218 | ||
219 | void update_inode_page(struct inode *inode) | 219 | void update_inode_page(struct inode *inode) |
220 | { | 220 | { |
221 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 221 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
222 | struct page *node_page; | 222 | struct page *node_page; |
223 | retry: | 223 | retry: |
224 | node_page = get_node_page(sbi, inode->i_ino); | 224 | node_page = get_node_page(sbi, inode->i_ino); |
@@ -238,7 +238,7 @@ retry: | |||
238 | 238 | ||
239 | int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | 239 | int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) |
240 | { | 240 | { |
241 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 241 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
242 | 242 | ||
243 | if (inode->i_ino == F2FS_NODE_INO(sbi) || | 243 | if (inode->i_ino == F2FS_NODE_INO(sbi) || |
244 | inode->i_ino == F2FS_META_INO(sbi)) | 244 | inode->i_ino == F2FS_META_INO(sbi)) |
@@ -266,9 +266,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
266 | */ | 266 | */ |
267 | void f2fs_evict_inode(struct inode *inode) | 267 | void f2fs_evict_inode(struct inode *inode) |
268 | { | 268 | { |
269 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 269 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
270 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; | 270 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; |
271 | 271 | ||
272 | /* some remained atomic pages should discarded */ | ||
273 | if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) | ||
274 | commit_inmem_pages(inode, true); | ||
275 | |||
272 | trace_f2fs_evict_inode(inode); | 276 | trace_f2fs_evict_inode(inode); |
273 | truncate_inode_pages_final(&inode->i_data); | 277 | truncate_inode_pages_final(&inode->i_data); |
274 | 278 | ||
@@ -276,7 +280,7 @@ void f2fs_evict_inode(struct inode *inode) | |||
276 | inode->i_ino == F2FS_META_INO(sbi)) | 280 | inode->i_ino == F2FS_META_INO(sbi)) |
277 | goto out_clear; | 281 | goto out_clear; |
278 | 282 | ||
279 | f2fs_bug_on(get_dirty_dents(inode)); | 283 | f2fs_bug_on(sbi, get_dirty_pages(inode)); |
280 | remove_dirty_dir_inode(inode); | 284 | remove_dirty_dir_inode(inode); |
281 | 285 | ||
282 | if (inode->i_nlink || is_bad_inode(inode)) | 286 | if (inode->i_nlink || is_bad_inode(inode)) |
@@ -306,3 +310,26 @@ no_delete: | |||
306 | out_clear: | 310 | out_clear: |
307 | clear_inode(inode); | 311 | clear_inode(inode); |
308 | } | 312 | } |
313 | |||
314 | /* caller should call f2fs_lock_op() */ | ||
315 | void handle_failed_inode(struct inode *inode) | ||
316 | { | ||
317 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
318 | |||
319 | clear_nlink(inode); | ||
320 | make_bad_inode(inode); | ||
321 | unlock_new_inode(inode); | ||
322 | |||
323 | i_size_write(inode, 0); | ||
324 | if (F2FS_HAS_BLOCKS(inode)) | ||
325 | f2fs_truncate(inode); | ||
326 | |||
327 | remove_inode_page(inode); | ||
328 | stat_dec_inline_inode(inode); | ||
329 | |||
330 | alloc_nid_failed(sbi, inode->i_ino); | ||
331 | f2fs_unlock_op(sbi); | ||
332 | |||
333 | /* iput will drop the inode object */ | ||
334 | iput(inode); | ||
335 | } | ||
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index ee103fd7283c..0d2526e5aa11 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c | |||
@@ -23,7 +23,7 @@ | |||
23 | 23 | ||
24 | static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) | 24 | static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) |
25 | { | 25 | { |
26 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 26 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
27 | nid_t ino; | 27 | nid_t ino; |
28 | struct inode *inode; | 28 | struct inode *inode; |
29 | bool nid_free = false; | 29 | bool nid_free = false; |
@@ -102,7 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, | |||
102 | static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | 102 | static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, |
103 | bool excl) | 103 | bool excl) |
104 | { | 104 | { |
105 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 105 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
106 | struct inode *inode; | 106 | struct inode *inode; |
107 | nid_t ino = 0; | 107 | nid_t ino = 0; |
108 | int err; | 108 | int err; |
@@ -123,9 +123,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
123 | 123 | ||
124 | f2fs_lock_op(sbi); | 124 | f2fs_lock_op(sbi); |
125 | err = f2fs_add_link(dentry, inode); | 125 | err = f2fs_add_link(dentry, inode); |
126 | f2fs_unlock_op(sbi); | ||
127 | if (err) | 126 | if (err) |
128 | goto out; | 127 | goto out; |
128 | f2fs_unlock_op(sbi); | ||
129 | 129 | ||
130 | alloc_nid_done(sbi, ino); | 130 | alloc_nid_done(sbi, ino); |
131 | 131 | ||
@@ -133,9 +133,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
133 | unlock_new_inode(inode); | 133 | unlock_new_inode(inode); |
134 | return 0; | 134 | return 0; |
135 | out: | 135 | out: |
136 | clear_nlink(inode); | 136 | handle_failed_inode(inode); |
137 | iget_failed(inode); | ||
138 | alloc_nid_failed(sbi, ino); | ||
139 | return err; | 137 | return err; |
140 | } | 138 | } |
141 | 139 | ||
@@ -143,7 +141,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, | |||
143 | struct dentry *dentry) | 141 | struct dentry *dentry) |
144 | { | 142 | { |
145 | struct inode *inode = old_dentry->d_inode; | 143 | struct inode *inode = old_dentry->d_inode; |
146 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 144 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
147 | int err; | 145 | int err; |
148 | 146 | ||
149 | f2fs_balance_fs(sbi); | 147 | f2fs_balance_fs(sbi); |
@@ -154,15 +152,16 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, | |||
154 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); | 152 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); |
155 | f2fs_lock_op(sbi); | 153 | f2fs_lock_op(sbi); |
156 | err = f2fs_add_link(dentry, inode); | 154 | err = f2fs_add_link(dentry, inode); |
157 | f2fs_unlock_op(sbi); | ||
158 | if (err) | 155 | if (err) |
159 | goto out; | 156 | goto out; |
157 | f2fs_unlock_op(sbi); | ||
160 | 158 | ||
161 | d_instantiate(dentry, inode); | 159 | d_instantiate(dentry, inode); |
162 | return 0; | 160 | return 0; |
163 | out: | 161 | out: |
164 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | 162 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); |
165 | iput(inode); | 163 | iput(inode); |
164 | f2fs_unlock_op(sbi); | ||
166 | return err; | 165 | return err; |
167 | } | 166 | } |
168 | 167 | ||
@@ -203,7 +202,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, | |||
203 | 202 | ||
204 | static int f2fs_unlink(struct inode *dir, struct dentry *dentry) | 203 | static int f2fs_unlink(struct inode *dir, struct dentry *dentry) |
205 | { | 204 | { |
206 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 205 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
207 | struct inode *inode = dentry->d_inode; | 206 | struct inode *inode = dentry->d_inode; |
208 | struct f2fs_dir_entry *de; | 207 | struct f2fs_dir_entry *de; |
209 | struct page *page; | 208 | struct page *page; |
@@ -237,7 +236,7 @@ fail: | |||
237 | static int f2fs_symlink(struct inode *dir, struct dentry *dentry, | 236 | static int f2fs_symlink(struct inode *dir, struct dentry *dentry, |
238 | const char *symname) | 237 | const char *symname) |
239 | { | 238 | { |
240 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 239 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
241 | struct inode *inode; | 240 | struct inode *inode; |
242 | size_t symlen = strlen(symname) + 1; | 241 | size_t symlen = strlen(symname) + 1; |
243 | int err; | 242 | int err; |
@@ -253,9 +252,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, | |||
253 | 252 | ||
254 | f2fs_lock_op(sbi); | 253 | f2fs_lock_op(sbi); |
255 | err = f2fs_add_link(dentry, inode); | 254 | err = f2fs_add_link(dentry, inode); |
256 | f2fs_unlock_op(sbi); | ||
257 | if (err) | 255 | if (err) |
258 | goto out; | 256 | goto out; |
257 | f2fs_unlock_op(sbi); | ||
259 | 258 | ||
260 | err = page_symlink(inode, symname, symlen); | 259 | err = page_symlink(inode, symname, symlen); |
261 | alloc_nid_done(sbi, inode->i_ino); | 260 | alloc_nid_done(sbi, inode->i_ino); |
@@ -264,15 +263,13 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, | |||
264 | unlock_new_inode(inode); | 263 | unlock_new_inode(inode); |
265 | return err; | 264 | return err; |
266 | out: | 265 | out: |
267 | clear_nlink(inode); | 266 | handle_failed_inode(inode); |
268 | iget_failed(inode); | ||
269 | alloc_nid_failed(sbi, inode->i_ino); | ||
270 | return err; | 267 | return err; |
271 | } | 268 | } |
272 | 269 | ||
273 | static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | 270 | static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
274 | { | 271 | { |
275 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 272 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
276 | struct inode *inode; | 273 | struct inode *inode; |
277 | int err; | 274 | int err; |
278 | 275 | ||
@@ -290,9 +287,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
290 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); | 287 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); |
291 | f2fs_lock_op(sbi); | 288 | f2fs_lock_op(sbi); |
292 | err = f2fs_add_link(dentry, inode); | 289 | err = f2fs_add_link(dentry, inode); |
293 | f2fs_unlock_op(sbi); | ||
294 | if (err) | 290 | if (err) |
295 | goto out_fail; | 291 | goto out_fail; |
292 | f2fs_unlock_op(sbi); | ||
296 | 293 | ||
297 | alloc_nid_done(sbi, inode->i_ino); | 294 | alloc_nid_done(sbi, inode->i_ino); |
298 | 295 | ||
@@ -303,9 +300,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
303 | 300 | ||
304 | out_fail: | 301 | out_fail: |
305 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | 302 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); |
306 | clear_nlink(inode); | 303 | handle_failed_inode(inode); |
307 | iget_failed(inode); | ||
308 | alloc_nid_failed(sbi, inode->i_ino); | ||
309 | return err; | 304 | return err; |
310 | } | 305 | } |
311 | 306 | ||
@@ -320,7 +315,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) | |||
320 | static int f2fs_mknod(struct inode *dir, struct dentry *dentry, | 315 | static int f2fs_mknod(struct inode *dir, struct dentry *dentry, |
321 | umode_t mode, dev_t rdev) | 316 | umode_t mode, dev_t rdev) |
322 | { | 317 | { |
323 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 318 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
324 | struct inode *inode; | 319 | struct inode *inode; |
325 | int err = 0; | 320 | int err = 0; |
326 | 321 | ||
@@ -338,25 +333,23 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, | |||
338 | 333 | ||
339 | f2fs_lock_op(sbi); | 334 | f2fs_lock_op(sbi); |
340 | err = f2fs_add_link(dentry, inode); | 335 | err = f2fs_add_link(dentry, inode); |
341 | f2fs_unlock_op(sbi); | ||
342 | if (err) | 336 | if (err) |
343 | goto out; | 337 | goto out; |
338 | f2fs_unlock_op(sbi); | ||
344 | 339 | ||
345 | alloc_nid_done(sbi, inode->i_ino); | 340 | alloc_nid_done(sbi, inode->i_ino); |
346 | d_instantiate(dentry, inode); | 341 | d_instantiate(dentry, inode); |
347 | unlock_new_inode(inode); | 342 | unlock_new_inode(inode); |
348 | return 0; | 343 | return 0; |
349 | out: | 344 | out: |
350 | clear_nlink(inode); | 345 | handle_failed_inode(inode); |
351 | iget_failed(inode); | ||
352 | alloc_nid_failed(sbi, inode->i_ino); | ||
353 | return err; | 346 | return err; |
354 | } | 347 | } |
355 | 348 | ||
356 | static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, | 349 | static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, |
357 | struct inode *new_dir, struct dentry *new_dentry) | 350 | struct inode *new_dir, struct dentry *new_dentry) |
358 | { | 351 | { |
359 | struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb); | 352 | struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); |
360 | struct inode *old_inode = old_dentry->d_inode; | 353 | struct inode *old_inode = old_dentry->d_inode; |
361 | struct inode *new_inode = new_dentry->d_inode; | 354 | struct inode *new_inode = new_dentry->d_inode; |
362 | struct page *old_dir_page; | 355 | struct page *old_dir_page; |
@@ -480,8 +473,7 @@ out: | |||
480 | static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, | 473 | static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, |
481 | struct inode *new_dir, struct dentry *new_dentry) | 474 | struct inode *new_dir, struct dentry *new_dentry) |
482 | { | 475 | { |
483 | struct super_block *sb = old_dir->i_sb; | 476 | struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); |
484 | struct f2fs_sb_info *sbi = F2FS_SB(sb); | ||
485 | struct inode *old_inode = old_dentry->d_inode; | 477 | struct inode *old_inode = old_dentry->d_inode; |
486 | struct inode *new_inode = new_dentry->d_inode; | 478 | struct inode *new_inode = new_dentry->d_inode; |
487 | struct page *old_dir_page, *new_dir_page; | 479 | struct page *old_dir_page, *new_dir_page; |
@@ -642,7 +634,7 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, | |||
642 | 634 | ||
643 | static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | 635 | static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) |
644 | { | 636 | { |
645 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 637 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
646 | struct inode *inode; | 638 | struct inode *inode; |
647 | int err; | 639 | int err; |
648 | 640 | ||
@@ -678,10 +670,7 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
678 | release_out: | 670 | release_out: |
679 | release_orphan_inode(sbi); | 671 | release_orphan_inode(sbi); |
680 | out: | 672 | out: |
681 | f2fs_unlock_op(sbi); | 673 | handle_failed_inode(inode); |
682 | clear_nlink(inode); | ||
683 | iget_failed(inode); | ||
684 | alloc_nid_failed(sbi, inode->i_ino); | ||
685 | return err; | 674 | return err; |
686 | } | 675 | } |
687 | 676 | ||
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 45378196e19a..44b8afef43d9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -54,7 +54,6 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) | |||
54 | static void clear_node_page_dirty(struct page *page) | 54 | static void clear_node_page_dirty(struct page *page) |
55 | { | 55 | { |
56 | struct address_space *mapping = page->mapping; | 56 | struct address_space *mapping = page->mapping; |
57 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | ||
58 | unsigned int long flags; | 57 | unsigned int long flags; |
59 | 58 | ||
60 | if (PageDirty(page)) { | 59 | if (PageDirty(page)) { |
@@ -65,7 +64,7 @@ static void clear_node_page_dirty(struct page *page) | |||
65 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 64 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
66 | 65 | ||
67 | clear_page_dirty_for_io(page); | 66 | clear_page_dirty_for_io(page); |
68 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 67 | dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); |
69 | } | 68 | } |
70 | ClearPageUptodate(page); | 69 | ClearPageUptodate(page); |
71 | } | 70 | } |
@@ -92,7 +91,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) | |||
92 | /* get current nat block page with lock */ | 91 | /* get current nat block page with lock */ |
93 | src_page = get_meta_page(sbi, src_off); | 92 | src_page = get_meta_page(sbi, src_off); |
94 | dst_page = grab_meta_page(sbi, dst_off); | 93 | dst_page = grab_meta_page(sbi, dst_off); |
95 | f2fs_bug_on(PageDirty(src_page)); | 94 | f2fs_bug_on(sbi, PageDirty(src_page)); |
96 | 95 | ||
97 | src_addr = page_address(src_page); | 96 | src_addr = page_address(src_page); |
98 | dst_addr = page_address(dst_page); | 97 | dst_addr = page_address(dst_page); |
@@ -124,44 +123,99 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) | |||
124 | kmem_cache_free(nat_entry_slab, e); | 123 | kmem_cache_free(nat_entry_slab, e); |
125 | } | 124 | } |
126 | 125 | ||
127 | int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) | 126 | static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, |
127 | struct nat_entry *ne) | ||
128 | { | ||
129 | nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); | ||
130 | struct nat_entry_set *head; | ||
131 | |||
132 | if (get_nat_flag(ne, IS_DIRTY)) | ||
133 | return; | ||
134 | retry: | ||
135 | head = radix_tree_lookup(&nm_i->nat_set_root, set); | ||
136 | if (!head) { | ||
137 | head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); | ||
138 | |||
139 | INIT_LIST_HEAD(&head->entry_list); | ||
140 | INIT_LIST_HEAD(&head->set_list); | ||
141 | head->set = set; | ||
142 | head->entry_cnt = 0; | ||
143 | |||
144 | if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { | ||
145 | cond_resched(); | ||
146 | goto retry; | ||
147 | } | ||
148 | } | ||
149 | list_move_tail(&ne->list, &head->entry_list); | ||
150 | nm_i->dirty_nat_cnt++; | ||
151 | head->entry_cnt++; | ||
152 | set_nat_flag(ne, IS_DIRTY, true); | ||
153 | } | ||
154 | |||
155 | static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, | ||
156 | struct nat_entry *ne) | ||
157 | { | ||
158 | nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; | ||
159 | struct nat_entry_set *head; | ||
160 | |||
161 | head = radix_tree_lookup(&nm_i->nat_set_root, set); | ||
162 | if (head) { | ||
163 | list_move_tail(&ne->list, &nm_i->nat_entries); | ||
164 | set_nat_flag(ne, IS_DIRTY, false); | ||
165 | head->entry_cnt--; | ||
166 | nm_i->dirty_nat_cnt--; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, | ||
171 | nid_t start, unsigned int nr, struct nat_entry_set **ep) | ||
172 | { | ||
173 | return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep, | ||
174 | start, nr); | ||
175 | } | ||
176 | |||
177 | bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) | ||
128 | { | 178 | { |
129 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 179 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
130 | struct nat_entry *e; | 180 | struct nat_entry *e; |
131 | int is_cp = 1; | 181 | bool is_cp = true; |
132 | 182 | ||
133 | read_lock(&nm_i->nat_tree_lock); | 183 | read_lock(&nm_i->nat_tree_lock); |
134 | e = __lookup_nat_cache(nm_i, nid); | 184 | e = __lookup_nat_cache(nm_i, nid); |
135 | if (e && !e->checkpointed) | 185 | if (e && !get_nat_flag(e, IS_CHECKPOINTED)) |
136 | is_cp = 0; | 186 | is_cp = false; |
137 | read_unlock(&nm_i->nat_tree_lock); | 187 | read_unlock(&nm_i->nat_tree_lock); |
138 | return is_cp; | 188 | return is_cp; |
139 | } | 189 | } |
140 | 190 | ||
141 | bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) | 191 | bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) |
142 | { | 192 | { |
143 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 193 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
144 | struct nat_entry *e; | 194 | struct nat_entry *e; |
145 | bool fsync_done = false; | 195 | bool fsynced = false; |
146 | 196 | ||
147 | read_lock(&nm_i->nat_tree_lock); | 197 | read_lock(&nm_i->nat_tree_lock); |
148 | e = __lookup_nat_cache(nm_i, nid); | 198 | e = __lookup_nat_cache(nm_i, ino); |
149 | if (e) | 199 | if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) |
150 | fsync_done = e->fsync_done; | 200 | fsynced = true; |
151 | read_unlock(&nm_i->nat_tree_lock); | 201 | read_unlock(&nm_i->nat_tree_lock); |
152 | return fsync_done; | 202 | return fsynced; |
153 | } | 203 | } |
154 | 204 | ||
155 | void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) | 205 | bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) |
156 | { | 206 | { |
157 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 207 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
158 | struct nat_entry *e; | 208 | struct nat_entry *e; |
209 | bool need_update = true; | ||
159 | 210 | ||
160 | write_lock(&nm_i->nat_tree_lock); | 211 | read_lock(&nm_i->nat_tree_lock); |
161 | e = __lookup_nat_cache(nm_i, nid); | 212 | e = __lookup_nat_cache(nm_i, ino); |
162 | if (e) | 213 | if (e && get_nat_flag(e, HAS_LAST_FSYNC) && |
163 | e->fsync_done = false; | 214 | (get_nat_flag(e, IS_CHECKPOINTED) || |
164 | write_unlock(&nm_i->nat_tree_lock); | 215 | get_nat_flag(e, HAS_FSYNCED_INODE))) |
216 | need_update = false; | ||
217 | read_unlock(&nm_i->nat_tree_lock); | ||
218 | return need_update; | ||
165 | } | 219 | } |
166 | 220 | ||
167 | static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) | 221 | static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) |
@@ -177,7 +231,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) | |||
177 | } | 231 | } |
178 | memset(new, 0, sizeof(struct nat_entry)); | 232 | memset(new, 0, sizeof(struct nat_entry)); |
179 | nat_set_nid(new, nid); | 233 | nat_set_nid(new, nid); |
180 | new->checkpointed = true; | 234 | nat_reset_flag(new); |
181 | list_add_tail(&new->list, &nm_i->nat_entries); | 235 | list_add_tail(&new->list, &nm_i->nat_entries); |
182 | nm_i->nat_cnt++; | 236 | nm_i->nat_cnt++; |
183 | return new; | 237 | return new; |
@@ -216,7 +270,7 @@ retry: | |||
216 | goto retry; | 270 | goto retry; |
217 | } | 271 | } |
218 | e->ni = *ni; | 272 | e->ni = *ni; |
219 | f2fs_bug_on(ni->blk_addr == NEW_ADDR); | 273 | f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); |
220 | } else if (new_blkaddr == NEW_ADDR) { | 274 | } else if (new_blkaddr == NEW_ADDR) { |
221 | /* | 275 | /* |
222 | * when nid is reallocated, | 276 | * when nid is reallocated, |
@@ -224,16 +278,16 @@ retry: | |||
224 | * So, reinitialize it with new information. | 278 | * So, reinitialize it with new information. |
225 | */ | 279 | */ |
226 | e->ni = *ni; | 280 | e->ni = *ni; |
227 | f2fs_bug_on(ni->blk_addr != NULL_ADDR); | 281 | f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); |
228 | } | 282 | } |
229 | 283 | ||
230 | /* sanity check */ | 284 | /* sanity check */ |
231 | f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); | 285 | f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); |
232 | f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && | 286 | f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR && |
233 | new_blkaddr == NULL_ADDR); | 287 | new_blkaddr == NULL_ADDR); |
234 | f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR && | 288 | f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && |
235 | new_blkaddr == NEW_ADDR); | 289 | new_blkaddr == NEW_ADDR); |
236 | f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR && | 290 | f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && |
237 | nat_get_blkaddr(e) != NULL_ADDR && | 291 | nat_get_blkaddr(e) != NULL_ADDR && |
238 | new_blkaddr == NEW_ADDR); | 292 | new_blkaddr == NEW_ADDR); |
239 | 293 | ||
@@ -245,12 +299,17 @@ retry: | |||
245 | 299 | ||
246 | /* change address */ | 300 | /* change address */ |
247 | nat_set_blkaddr(e, new_blkaddr); | 301 | nat_set_blkaddr(e, new_blkaddr); |
302 | if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) | ||
303 | set_nat_flag(e, IS_CHECKPOINTED, false); | ||
248 | __set_nat_cache_dirty(nm_i, e); | 304 | __set_nat_cache_dirty(nm_i, e); |
249 | 305 | ||
250 | /* update fsync_mark if its inode nat entry is still alive */ | 306 | /* update fsync_mark if its inode nat entry is still alive */ |
251 | e = __lookup_nat_cache(nm_i, ni->ino); | 307 | e = __lookup_nat_cache(nm_i, ni->ino); |
252 | if (e) | 308 | if (e) { |
253 | e->fsync_done = fsync_done; | 309 | if (fsync_done && ni->nid == ni->ino) |
310 | set_nat_flag(e, HAS_FSYNCED_INODE, true); | ||
311 | set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); | ||
312 | } | ||
254 | write_unlock(&nm_i->nat_tree_lock); | 313 | write_unlock(&nm_i->nat_tree_lock); |
255 | } | 314 | } |
256 | 315 | ||
@@ -411,7 +470,7 @@ got: | |||
411 | */ | 470 | */ |
412 | int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | 471 | int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) |
413 | { | 472 | { |
414 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 473 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
415 | struct page *npage[4]; | 474 | struct page *npage[4]; |
416 | struct page *parent; | 475 | struct page *parent; |
417 | int offset[4]; | 476 | int offset[4]; |
@@ -504,15 +563,15 @@ release_out: | |||
504 | 563 | ||
505 | static void truncate_node(struct dnode_of_data *dn) | 564 | static void truncate_node(struct dnode_of_data *dn) |
506 | { | 565 | { |
507 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 566 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
508 | struct node_info ni; | 567 | struct node_info ni; |
509 | 568 | ||
510 | get_node_info(sbi, dn->nid, &ni); | 569 | get_node_info(sbi, dn->nid, &ni); |
511 | if (dn->inode->i_blocks == 0) { | 570 | if (dn->inode->i_blocks == 0) { |
512 | f2fs_bug_on(ni.blk_addr != NULL_ADDR); | 571 | f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR); |
513 | goto invalidate; | 572 | goto invalidate; |
514 | } | 573 | } |
515 | f2fs_bug_on(ni.blk_addr == NULL_ADDR); | 574 | f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); |
516 | 575 | ||
517 | /* Deallocate node address */ | 576 | /* Deallocate node address */ |
518 | invalidate_blocks(sbi, ni.blk_addr); | 577 | invalidate_blocks(sbi, ni.blk_addr); |
@@ -540,14 +599,13 @@ invalidate: | |||
540 | 599 | ||
541 | static int truncate_dnode(struct dnode_of_data *dn) | 600 | static int truncate_dnode(struct dnode_of_data *dn) |
542 | { | 601 | { |
543 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | ||
544 | struct page *page; | 602 | struct page *page; |
545 | 603 | ||
546 | if (dn->nid == 0) | 604 | if (dn->nid == 0) |
547 | return 1; | 605 | return 1; |
548 | 606 | ||
549 | /* get direct node */ | 607 | /* get direct node */ |
550 | page = get_node_page(sbi, dn->nid); | 608 | page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); |
551 | if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) | 609 | if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) |
552 | return 1; | 610 | return 1; |
553 | else if (IS_ERR(page)) | 611 | else if (IS_ERR(page)) |
@@ -564,7 +622,6 @@ static int truncate_dnode(struct dnode_of_data *dn) | |||
564 | static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, | 622 | static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, |
565 | int ofs, int depth) | 623 | int ofs, int depth) |
566 | { | 624 | { |
567 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | ||
568 | struct dnode_of_data rdn = *dn; | 625 | struct dnode_of_data rdn = *dn; |
569 | struct page *page; | 626 | struct page *page; |
570 | struct f2fs_node *rn; | 627 | struct f2fs_node *rn; |
@@ -578,7 +635,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, | |||
578 | 635 | ||
579 | trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); | 636 | trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); |
580 | 637 | ||
581 | page = get_node_page(sbi, dn->nid); | 638 | page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); |
582 | if (IS_ERR(page)) { | 639 | if (IS_ERR(page)) { |
583 | trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); | 640 | trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); |
584 | return PTR_ERR(page); | 641 | return PTR_ERR(page); |
@@ -636,7 +693,6 @@ out_err: | |||
636 | static int truncate_partial_nodes(struct dnode_of_data *dn, | 693 | static int truncate_partial_nodes(struct dnode_of_data *dn, |
637 | struct f2fs_inode *ri, int *offset, int depth) | 694 | struct f2fs_inode *ri, int *offset, int depth) |
638 | { | 695 | { |
639 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | ||
640 | struct page *pages[2]; | 696 | struct page *pages[2]; |
641 | nid_t nid[3]; | 697 | nid_t nid[3]; |
642 | nid_t child_nid; | 698 | nid_t child_nid; |
@@ -651,7 +707,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, | |||
651 | /* get indirect nodes in the path */ | 707 | /* get indirect nodes in the path */ |
652 | for (i = 0; i < idx + 1; i++) { | 708 | for (i = 0; i < idx + 1; i++) { |
653 | /* reference count'll be increased */ | 709 | /* reference count'll be increased */ |
654 | pages[i] = get_node_page(sbi, nid[i]); | 710 | pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]); |
655 | if (IS_ERR(pages[i])) { | 711 | if (IS_ERR(pages[i])) { |
656 | err = PTR_ERR(pages[i]); | 712 | err = PTR_ERR(pages[i]); |
657 | idx = i - 1; | 713 | idx = i - 1; |
@@ -696,7 +752,7 @@ fail: | |||
696 | */ | 752 | */ |
697 | int truncate_inode_blocks(struct inode *inode, pgoff_t from) | 753 | int truncate_inode_blocks(struct inode *inode, pgoff_t from) |
698 | { | 754 | { |
699 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 755 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
700 | int err = 0, cont = 1; | 756 | int err = 0, cont = 1; |
701 | int level, offset[4], noffset[4]; | 757 | int level, offset[4], noffset[4]; |
702 | unsigned int nofs = 0; | 758 | unsigned int nofs = 0; |
@@ -792,7 +848,7 @@ fail: | |||
792 | 848 | ||
793 | int truncate_xattr_node(struct inode *inode, struct page *page) | 849 | int truncate_xattr_node(struct inode *inode, struct page *page) |
794 | { | 850 | { |
795 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 851 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
796 | nid_t nid = F2FS_I(inode)->i_xattr_nid; | 852 | nid_t nid = F2FS_I(inode)->i_xattr_nid; |
797 | struct dnode_of_data dn; | 853 | struct dnode_of_data dn; |
798 | struct page *npage; | 854 | struct page *npage; |
@@ -840,7 +896,8 @@ void remove_inode_page(struct inode *inode) | |||
840 | truncate_data_blocks_range(&dn, 1); | 896 | truncate_data_blocks_range(&dn, 1); |
841 | 897 | ||
842 | /* 0 is possible, after f2fs_new_inode() has failed */ | 898 | /* 0 is possible, after f2fs_new_inode() has failed */ |
843 | f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); | 899 | f2fs_bug_on(F2FS_I_SB(inode), |
900 | inode->i_blocks != 0 && inode->i_blocks != 1); | ||
844 | 901 | ||
845 | /* will put inode & node pages */ | 902 | /* will put inode & node pages */ |
846 | truncate_node(&dn); | 903 | truncate_node(&dn); |
@@ -860,7 +917,7 @@ struct page *new_inode_page(struct inode *inode) | |||
860 | struct page *new_node_page(struct dnode_of_data *dn, | 917 | struct page *new_node_page(struct dnode_of_data *dn, |
861 | unsigned int ofs, struct page *ipage) | 918 | unsigned int ofs, struct page *ipage) |
862 | { | 919 | { |
863 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 920 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
864 | struct node_info old_ni, new_ni; | 921 | struct node_info old_ni, new_ni; |
865 | struct page *page; | 922 | struct page *page; |
866 | int err; | 923 | int err; |
@@ -880,7 +937,7 @@ struct page *new_node_page(struct dnode_of_data *dn, | |||
880 | get_node_info(sbi, dn->nid, &old_ni); | 937 | get_node_info(sbi, dn->nid, &old_ni); |
881 | 938 | ||
882 | /* Reinitialize old_ni with new node page */ | 939 | /* Reinitialize old_ni with new node page */ |
883 | f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); | 940 | f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR); |
884 | new_ni = old_ni; | 941 | new_ni = old_ni; |
885 | new_ni.ino = dn->inode->i_ino; | 942 | new_ni.ino = dn->inode->i_ino; |
886 | set_node_addr(sbi, &new_ni, NEW_ADDR, false); | 943 | set_node_addr(sbi, &new_ni, NEW_ADDR, false); |
@@ -918,7 +975,7 @@ fail: | |||
918 | */ | 975 | */ |
919 | static int read_node_page(struct page *page, int rw) | 976 | static int read_node_page(struct page *page, int rw) |
920 | { | 977 | { |
921 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 978 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); |
922 | struct node_info ni; | 979 | struct node_info ni; |
923 | 980 | ||
924 | get_node_info(sbi, page->index, &ni); | 981 | get_node_info(sbi, page->index, &ni); |
@@ -994,7 +1051,7 @@ got_it: | |||
994 | */ | 1051 | */ |
995 | struct page *get_node_page_ra(struct page *parent, int start) | 1052 | struct page *get_node_page_ra(struct page *parent, int start) |
996 | { | 1053 | { |
997 | struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); | 1054 | struct f2fs_sb_info *sbi = F2FS_P_SB(parent); |
998 | struct blk_plug plug; | 1055 | struct blk_plug plug; |
999 | struct page *page; | 1056 | struct page *page; |
1000 | int err, i, end; | 1057 | int err, i, end; |
@@ -1124,10 +1181,14 @@ continue_unlock: | |||
1124 | 1181 | ||
1125 | /* called by fsync() */ | 1182 | /* called by fsync() */ |
1126 | if (ino && IS_DNODE(page)) { | 1183 | if (ino && IS_DNODE(page)) { |
1127 | int mark = !is_checkpointed_node(sbi, ino); | ||
1128 | set_fsync_mark(page, 1); | 1184 | set_fsync_mark(page, 1); |
1129 | if (IS_INODE(page)) | 1185 | if (IS_INODE(page)) { |
1130 | set_dentry_mark(page, mark); | 1186 | if (!is_checkpointed_node(sbi, ino) && |
1187 | !has_fsynced_inode(sbi, ino)) | ||
1188 | set_dentry_mark(page, 1); | ||
1189 | else | ||
1190 | set_dentry_mark(page, 0); | ||
1191 | } | ||
1131 | nwritten++; | 1192 | nwritten++; |
1132 | } else { | 1193 | } else { |
1133 | set_fsync_mark(page, 0); | 1194 | set_fsync_mark(page, 0); |
@@ -1206,7 +1267,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) | |||
1206 | static int f2fs_write_node_page(struct page *page, | 1267 | static int f2fs_write_node_page(struct page *page, |
1207 | struct writeback_control *wbc) | 1268 | struct writeback_control *wbc) |
1208 | { | 1269 | { |
1209 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 1270 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); |
1210 | nid_t nid; | 1271 | nid_t nid; |
1211 | block_t new_addr; | 1272 | block_t new_addr; |
1212 | struct node_info ni; | 1273 | struct node_info ni; |
@@ -1226,7 +1287,7 @@ static int f2fs_write_node_page(struct page *page, | |||
1226 | 1287 | ||
1227 | /* get old block addr of this node page */ | 1288 | /* get old block addr of this node page */ |
1228 | nid = nid_of_node(page); | 1289 | nid = nid_of_node(page); |
1229 | f2fs_bug_on(page->index != nid); | 1290 | f2fs_bug_on(sbi, page->index != nid); |
1230 | 1291 | ||
1231 | get_node_info(sbi, nid, &ni); | 1292 | get_node_info(sbi, nid, &ni); |
1232 | 1293 | ||
@@ -1257,7 +1318,7 @@ redirty_out: | |||
1257 | static int f2fs_write_node_pages(struct address_space *mapping, | 1318 | static int f2fs_write_node_pages(struct address_space *mapping, |
1258 | struct writeback_control *wbc) | 1319 | struct writeback_control *wbc) |
1259 | { | 1320 | { |
1260 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | 1321 | struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); |
1261 | long diff; | 1322 | long diff; |
1262 | 1323 | ||
1263 | trace_f2fs_writepages(mapping->host, wbc, NODE); | 1324 | trace_f2fs_writepages(mapping->host, wbc, NODE); |
@@ -1282,15 +1343,12 @@ skip_write: | |||
1282 | 1343 | ||
1283 | static int f2fs_set_node_page_dirty(struct page *page) | 1344 | static int f2fs_set_node_page_dirty(struct page *page) |
1284 | { | 1345 | { |
1285 | struct address_space *mapping = page->mapping; | ||
1286 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | ||
1287 | |||
1288 | trace_f2fs_set_page_dirty(page, NODE); | 1346 | trace_f2fs_set_page_dirty(page, NODE); |
1289 | 1347 | ||
1290 | SetPageUptodate(page); | 1348 | SetPageUptodate(page); |
1291 | if (!PageDirty(page)) { | 1349 | if (!PageDirty(page)) { |
1292 | __set_page_dirty_nobuffers(page); | 1350 | __set_page_dirty_nobuffers(page); |
1293 | inc_page_count(sbi, F2FS_DIRTY_NODES); | 1351 | inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); |
1294 | SetPagePrivate(page); | 1352 | SetPagePrivate(page); |
1295 | return 1; | 1353 | return 1; |
1296 | } | 1354 | } |
@@ -1301,9 +1359,8 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, | |||
1301 | unsigned int length) | 1359 | unsigned int length) |
1302 | { | 1360 | { |
1303 | struct inode *inode = page->mapping->host; | 1361 | struct inode *inode = page->mapping->host; |
1304 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
1305 | if (PageDirty(page)) | 1362 | if (PageDirty(page)) |
1306 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 1363 | dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES); |
1307 | ClearPagePrivate(page); | 1364 | ClearPagePrivate(page); |
1308 | } | 1365 | } |
1309 | 1366 | ||
@@ -1356,7 +1413,8 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) | |||
1356 | read_lock(&nm_i->nat_tree_lock); | 1413 | read_lock(&nm_i->nat_tree_lock); |
1357 | ne = __lookup_nat_cache(nm_i, nid); | 1414 | ne = __lookup_nat_cache(nm_i, nid); |
1358 | if (ne && | 1415 | if (ne && |
1359 | (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR)) | 1416 | (!get_nat_flag(ne, IS_CHECKPOINTED) || |
1417 | nat_get_blkaddr(ne) != NULL_ADDR)) | ||
1360 | allocated = true; | 1418 | allocated = true; |
1361 | read_unlock(&nm_i->nat_tree_lock); | 1419 | read_unlock(&nm_i->nat_tree_lock); |
1362 | if (allocated) | 1420 | if (allocated) |
@@ -1413,7 +1471,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, | |||
1413 | break; | 1471 | break; |
1414 | 1472 | ||
1415 | blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); | 1473 | blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); |
1416 | f2fs_bug_on(blk_addr == NEW_ADDR); | 1474 | f2fs_bug_on(sbi, blk_addr == NEW_ADDR); |
1417 | if (blk_addr == NULL_ADDR) { | 1475 | if (blk_addr == NULL_ADDR) { |
1418 | if (add_free_nid(sbi, start_nid, true) < 0) | 1476 | if (add_free_nid(sbi, start_nid, true) < 0) |
1419 | break; | 1477 | break; |
@@ -1483,12 +1541,12 @@ retry: | |||
1483 | 1541 | ||
1484 | /* We should not use stale free nids created by build_free_nids */ | 1542 | /* We should not use stale free nids created by build_free_nids */ |
1485 | if (nm_i->fcnt && !on_build_free_nids(nm_i)) { | 1543 | if (nm_i->fcnt && !on_build_free_nids(nm_i)) { |
1486 | f2fs_bug_on(list_empty(&nm_i->free_nid_list)); | 1544 | f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); |
1487 | list_for_each_entry(i, &nm_i->free_nid_list, list) | 1545 | list_for_each_entry(i, &nm_i->free_nid_list, list) |
1488 | if (i->state == NID_NEW) | 1546 | if (i->state == NID_NEW) |
1489 | break; | 1547 | break; |
1490 | 1548 | ||
1491 | f2fs_bug_on(i->state != NID_NEW); | 1549 | f2fs_bug_on(sbi, i->state != NID_NEW); |
1492 | *nid = i->nid; | 1550 | *nid = i->nid; |
1493 | i->state = NID_ALLOC; | 1551 | i->state = NID_ALLOC; |
1494 | nm_i->fcnt--; | 1552 | nm_i->fcnt--; |
@@ -1514,7 +1572,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) | |||
1514 | 1572 | ||
1515 | spin_lock(&nm_i->free_nid_list_lock); | 1573 | spin_lock(&nm_i->free_nid_list_lock); |
1516 | i = __lookup_free_nid_list(nm_i, nid); | 1574 | i = __lookup_free_nid_list(nm_i, nid); |
1517 | f2fs_bug_on(!i || i->state != NID_ALLOC); | 1575 | f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); |
1518 | __del_from_free_nid_list(nm_i, i); | 1576 | __del_from_free_nid_list(nm_i, i); |
1519 | spin_unlock(&nm_i->free_nid_list_lock); | 1577 | spin_unlock(&nm_i->free_nid_list_lock); |
1520 | 1578 | ||
@@ -1535,7 +1593,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) | |||
1535 | 1593 | ||
1536 | spin_lock(&nm_i->free_nid_list_lock); | 1594 | spin_lock(&nm_i->free_nid_list_lock); |
1537 | i = __lookup_free_nid_list(nm_i, nid); | 1595 | i = __lookup_free_nid_list(nm_i, nid); |
1538 | f2fs_bug_on(!i || i->state != NID_ALLOC); | 1596 | f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); |
1539 | if (!available_free_memory(sbi, FREE_NIDS)) { | 1597 | if (!available_free_memory(sbi, FREE_NIDS)) { |
1540 | __del_from_free_nid_list(nm_i, i); | 1598 | __del_from_free_nid_list(nm_i, i); |
1541 | need_free = true; | 1599 | need_free = true; |
@@ -1551,14 +1609,13 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) | |||
1551 | 1609 | ||
1552 | void recover_inline_xattr(struct inode *inode, struct page *page) | 1610 | void recover_inline_xattr(struct inode *inode, struct page *page) |
1553 | { | 1611 | { |
1554 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
1555 | void *src_addr, *dst_addr; | 1612 | void *src_addr, *dst_addr; |
1556 | size_t inline_size; | 1613 | size_t inline_size; |
1557 | struct page *ipage; | 1614 | struct page *ipage; |
1558 | struct f2fs_inode *ri; | 1615 | struct f2fs_inode *ri; |
1559 | 1616 | ||
1560 | ipage = get_node_page(sbi, inode->i_ino); | 1617 | ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); |
1561 | f2fs_bug_on(IS_ERR(ipage)); | 1618 | f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); |
1562 | 1619 | ||
1563 | ri = F2FS_INODE(page); | 1620 | ri = F2FS_INODE(page); |
1564 | if (!(ri->i_inline & F2FS_INLINE_XATTR)) { | 1621 | if (!(ri->i_inline & F2FS_INLINE_XATTR)) { |
@@ -1579,7 +1636,7 @@ update_inode: | |||
1579 | 1636 | ||
1580 | void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) | 1637 | void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) |
1581 | { | 1638 | { |
1582 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 1639 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
1583 | nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; | 1640 | nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; |
1584 | nid_t new_xnid = nid_of_node(page); | 1641 | nid_t new_xnid = nid_of_node(page); |
1585 | struct node_info ni; | 1642 | struct node_info ni; |
@@ -1590,7 +1647,7 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) | |||
1590 | 1647 | ||
1591 | /* Deallocate node address */ | 1648 | /* Deallocate node address */ |
1592 | get_node_info(sbi, prev_xnid, &ni); | 1649 | get_node_info(sbi, prev_xnid, &ni); |
1593 | f2fs_bug_on(ni.blk_addr == NULL_ADDR); | 1650 | f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); |
1594 | invalidate_blocks(sbi, ni.blk_addr); | 1651 | invalidate_blocks(sbi, ni.blk_addr); |
1595 | dec_valid_node_count(sbi, inode); | 1652 | dec_valid_node_count(sbi, inode); |
1596 | set_node_addr(sbi, &ni, NULL_ADDR, false); | 1653 | set_node_addr(sbi, &ni, NULL_ADDR, false); |
@@ -1598,7 +1655,7 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) | |||
1598 | recover_xnid: | 1655 | recover_xnid: |
1599 | /* 2: allocate new xattr nid */ | 1656 | /* 2: allocate new xattr nid */ |
1600 | if (unlikely(!inc_valid_node_count(sbi, inode))) | 1657 | if (unlikely(!inc_valid_node_count(sbi, inode))) |
1601 | f2fs_bug_on(1); | 1658 | f2fs_bug_on(sbi, 1); |
1602 | 1659 | ||
1603 | remove_free_nid(NM_I(sbi), new_xnid); | 1660 | remove_free_nid(NM_I(sbi), new_xnid); |
1604 | get_node_info(sbi, new_xnid, &ni); | 1661 | get_node_info(sbi, new_xnid, &ni); |
@@ -1691,7 +1748,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, | |||
1691 | struct f2fs_summary *sum_entry; | 1748 | struct f2fs_summary *sum_entry; |
1692 | struct inode *inode = sbi->sb->s_bdev->bd_inode; | 1749 | struct inode *inode = sbi->sb->s_bdev->bd_inode; |
1693 | block_t addr; | 1750 | block_t addr; |
1694 | int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 1751 | int bio_blocks = MAX_BIO_BLOCKS(sbi); |
1695 | struct page *pages[bio_blocks]; | 1752 | struct page *pages[bio_blocks]; |
1696 | int i, idx, last_offset, nrpages, err = 0; | 1753 | int i, idx, last_offset, nrpages, err = 0; |
1697 | 1754 | ||
@@ -1733,89 +1790,6 @@ skip: | |||
1733 | return err; | 1790 | return err; |
1734 | } | 1791 | } |
1735 | 1792 | ||
1736 | static struct nat_entry_set *grab_nat_entry_set(void) | ||
1737 | { | ||
1738 | struct nat_entry_set *nes = | ||
1739 | f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); | ||
1740 | |||
1741 | nes->entry_cnt = 0; | ||
1742 | INIT_LIST_HEAD(&nes->set_list); | ||
1743 | INIT_LIST_HEAD(&nes->entry_list); | ||
1744 | return nes; | ||
1745 | } | ||
1746 | |||
1747 | static void release_nat_entry_set(struct nat_entry_set *nes, | ||
1748 | struct f2fs_nm_info *nm_i) | ||
1749 | { | ||
1750 | f2fs_bug_on(!list_empty(&nes->entry_list)); | ||
1751 | |||
1752 | nm_i->dirty_nat_cnt -= nes->entry_cnt; | ||
1753 | list_del(&nes->set_list); | ||
1754 | kmem_cache_free(nat_entry_set_slab, nes); | ||
1755 | } | ||
1756 | |||
1757 | static void adjust_nat_entry_set(struct nat_entry_set *nes, | ||
1758 | struct list_head *head) | ||
1759 | { | ||
1760 | struct nat_entry_set *next = nes; | ||
1761 | |||
1762 | if (list_is_last(&nes->set_list, head)) | ||
1763 | return; | ||
1764 | |||
1765 | list_for_each_entry_continue(next, head, set_list) | ||
1766 | if (nes->entry_cnt <= next->entry_cnt) | ||
1767 | break; | ||
1768 | |||
1769 | list_move_tail(&nes->set_list, &next->set_list); | ||
1770 | } | ||
1771 | |||
1772 | static void add_nat_entry(struct nat_entry *ne, struct list_head *head) | ||
1773 | { | ||
1774 | struct nat_entry_set *nes; | ||
1775 | nid_t start_nid = START_NID(ne->ni.nid); | ||
1776 | |||
1777 | list_for_each_entry(nes, head, set_list) { | ||
1778 | if (nes->start_nid == start_nid) { | ||
1779 | list_move_tail(&ne->list, &nes->entry_list); | ||
1780 | nes->entry_cnt++; | ||
1781 | adjust_nat_entry_set(nes, head); | ||
1782 | return; | ||
1783 | } | ||
1784 | } | ||
1785 | |||
1786 | nes = grab_nat_entry_set(); | ||
1787 | |||
1788 | nes->start_nid = start_nid; | ||
1789 | list_move_tail(&ne->list, &nes->entry_list); | ||
1790 | nes->entry_cnt++; | ||
1791 | list_add(&nes->set_list, head); | ||
1792 | } | ||
1793 | |||
1794 | static void merge_nats_in_set(struct f2fs_sb_info *sbi) | ||
1795 | { | ||
1796 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
1797 | struct list_head *dirty_list = &nm_i->dirty_nat_entries; | ||
1798 | struct list_head *set_list = &nm_i->nat_entry_set; | ||
1799 | struct nat_entry *ne, *tmp; | ||
1800 | |||
1801 | write_lock(&nm_i->nat_tree_lock); | ||
1802 | list_for_each_entry_safe(ne, tmp, dirty_list, list) { | ||
1803 | if (nat_get_blkaddr(ne) == NEW_ADDR) | ||
1804 | continue; | ||
1805 | add_nat_entry(ne, set_list); | ||
1806 | nm_i->dirty_nat_cnt++; | ||
1807 | } | ||
1808 | write_unlock(&nm_i->nat_tree_lock); | ||
1809 | } | ||
1810 | |||
1811 | static bool __has_cursum_space(struct f2fs_summary_block *sum, int size) | ||
1812 | { | ||
1813 | if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES) | ||
1814 | return true; | ||
1815 | else | ||
1816 | return false; | ||
1817 | } | ||
1818 | |||
1819 | static void remove_nats_in_journal(struct f2fs_sb_info *sbi) | 1793 | static void remove_nats_in_journal(struct f2fs_sb_info *sbi) |
1820 | { | 1794 | { |
1821 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1795 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
@@ -1850,99 +1824,130 @@ found: | |||
1850 | mutex_unlock(&curseg->curseg_mutex); | 1824 | mutex_unlock(&curseg->curseg_mutex); |
1851 | } | 1825 | } |
1852 | 1826 | ||
1853 | /* | 1827 | static void __adjust_nat_entry_set(struct nat_entry_set *nes, |
1854 | * This function is called during the checkpointing process. | 1828 | struct list_head *head, int max) |
1855 | */ | ||
1856 | void flush_nat_entries(struct f2fs_sb_info *sbi) | ||
1857 | { | 1829 | { |
1858 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1830 | struct nat_entry_set *cur; |
1859 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | ||
1860 | struct f2fs_summary_block *sum = curseg->sum_blk; | ||
1861 | struct nat_entry_set *nes, *tmp; | ||
1862 | struct list_head *head = &nm_i->nat_entry_set; | ||
1863 | bool to_journal = true; | ||
1864 | 1831 | ||
1865 | /* merge nat entries of dirty list to nat entry set temporarily */ | 1832 | if (nes->entry_cnt >= max) |
1866 | merge_nats_in_set(sbi); | 1833 | goto add_out; |
1867 | 1834 | ||
1868 | /* | 1835 | list_for_each_entry(cur, head, set_list) { |
1869 | * if there are no enough space in journal to store dirty nat | 1836 | if (cur->entry_cnt >= nes->entry_cnt) { |
1870 | * entries, remove all entries from journal and merge them | 1837 | list_add(&nes->set_list, cur->set_list.prev); |
1871 | * into nat entry set. | 1838 | return; |
1872 | */ | 1839 | } |
1873 | if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) { | ||
1874 | remove_nats_in_journal(sbi); | ||
1875 | |||
1876 | /* | ||
1877 | * merge nat entries of dirty list to nat entry set temporarily | ||
1878 | */ | ||
1879 | merge_nats_in_set(sbi); | ||
1880 | } | 1840 | } |
1841 | add_out: | ||
1842 | list_add_tail(&nes->set_list, head); | ||
1843 | } | ||
1881 | 1844 | ||
1882 | if (!nm_i->dirty_nat_cnt) | 1845 | static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, |
1883 | return; | 1846 | struct nat_entry_set *set) |
1847 | { | ||
1848 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | ||
1849 | struct f2fs_summary_block *sum = curseg->sum_blk; | ||
1850 | nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK; | ||
1851 | bool to_journal = true; | ||
1852 | struct f2fs_nat_block *nat_blk; | ||
1853 | struct nat_entry *ne, *cur; | ||
1854 | struct page *page = NULL; | ||
1884 | 1855 | ||
1885 | /* | 1856 | /* |
1886 | * there are two steps to flush nat entries: | 1857 | * there are two steps to flush nat entries: |
1887 | * #1, flush nat entries to journal in current hot data summary block. | 1858 | * #1, flush nat entries to journal in current hot data summary block. |
1888 | * #2, flush nat entries to nat page. | 1859 | * #2, flush nat entries to nat page. |
1889 | */ | 1860 | */ |
1890 | list_for_each_entry_safe(nes, tmp, head, set_list) { | 1861 | if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL)) |
1891 | struct f2fs_nat_block *nat_blk; | 1862 | to_journal = false; |
1892 | struct nat_entry *ne, *cur; | ||
1893 | struct page *page; | ||
1894 | nid_t start_nid = nes->start_nid; | ||
1895 | 1863 | ||
1896 | if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) | 1864 | if (to_journal) { |
1897 | to_journal = false; | 1865 | mutex_lock(&curseg->curseg_mutex); |
1866 | } else { | ||
1867 | page = get_next_nat_page(sbi, start_nid); | ||
1868 | nat_blk = page_address(page); | ||
1869 | f2fs_bug_on(sbi, !nat_blk); | ||
1870 | } | ||
1871 | |||
1872 | /* flush dirty nats in nat entry set */ | ||
1873 | list_for_each_entry_safe(ne, cur, &set->entry_list, list) { | ||
1874 | struct f2fs_nat_entry *raw_ne; | ||
1875 | nid_t nid = nat_get_nid(ne); | ||
1876 | int offset; | ||
1877 | |||
1878 | if (nat_get_blkaddr(ne) == NEW_ADDR) | ||
1879 | continue; | ||
1898 | 1880 | ||
1899 | if (to_journal) { | 1881 | if (to_journal) { |
1900 | mutex_lock(&curseg->curseg_mutex); | 1882 | offset = lookup_journal_in_cursum(sum, |
1883 | NAT_JOURNAL, nid, 1); | ||
1884 | f2fs_bug_on(sbi, offset < 0); | ||
1885 | raw_ne = &nat_in_journal(sum, offset); | ||
1886 | nid_in_journal(sum, offset) = cpu_to_le32(nid); | ||
1901 | } else { | 1887 | } else { |
1902 | page = get_next_nat_page(sbi, start_nid); | 1888 | raw_ne = &nat_blk->entries[nid - start_nid]; |
1903 | nat_blk = page_address(page); | ||
1904 | f2fs_bug_on(!nat_blk); | ||
1905 | } | 1889 | } |
1890 | raw_nat_from_node_info(raw_ne, &ne->ni); | ||
1906 | 1891 | ||
1907 | /* flush dirty nats in nat entry set */ | 1892 | write_lock(&NM_I(sbi)->nat_tree_lock); |
1908 | list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { | 1893 | nat_reset_flag(ne); |
1909 | struct f2fs_nat_entry *raw_ne; | 1894 | __clear_nat_cache_dirty(NM_I(sbi), ne); |
1910 | nid_t nid = nat_get_nid(ne); | 1895 | write_unlock(&NM_I(sbi)->nat_tree_lock); |
1911 | int offset; | ||
1912 | 1896 | ||
1913 | if (to_journal) { | 1897 | if (nat_get_blkaddr(ne) == NULL_ADDR) |
1914 | offset = lookup_journal_in_cursum(sum, | 1898 | add_free_nid(sbi, nid, false); |
1915 | NAT_JOURNAL, nid, 1); | 1899 | } |
1916 | f2fs_bug_on(offset < 0); | ||
1917 | raw_ne = &nat_in_journal(sum, offset); | ||
1918 | nid_in_journal(sum, offset) = cpu_to_le32(nid); | ||
1919 | } else { | ||
1920 | raw_ne = &nat_blk->entries[nid - start_nid]; | ||
1921 | } | ||
1922 | raw_nat_from_node_info(raw_ne, &ne->ni); | ||
1923 | 1900 | ||
1924 | if (nat_get_blkaddr(ne) == NULL_ADDR && | 1901 | if (to_journal) |
1925 | add_free_nid(sbi, nid, false) <= 0) { | 1902 | mutex_unlock(&curseg->curseg_mutex); |
1926 | write_lock(&nm_i->nat_tree_lock); | 1903 | else |
1927 | __del_from_nat_cache(nm_i, ne); | 1904 | f2fs_put_page(page, 1); |
1928 | write_unlock(&nm_i->nat_tree_lock); | ||
1929 | } else { | ||
1930 | write_lock(&nm_i->nat_tree_lock); | ||
1931 | __clear_nat_cache_dirty(nm_i, ne); | ||
1932 | write_unlock(&nm_i->nat_tree_lock); | ||
1933 | } | ||
1934 | } | ||
1935 | 1905 | ||
1936 | if (to_journal) | 1906 | if (!set->entry_cnt) { |
1937 | mutex_unlock(&curseg->curseg_mutex); | 1907 | radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); |
1938 | else | 1908 | kmem_cache_free(nat_entry_set_slab, set); |
1939 | f2fs_put_page(page, 1); | 1909 | } |
1910 | } | ||
1911 | |||
1912 | /* | ||
1913 | * This function is called during the checkpointing process. | ||
1914 | */ | ||
1915 | void flush_nat_entries(struct f2fs_sb_info *sbi) | ||
1916 | { | ||
1917 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
1918 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | ||
1919 | struct f2fs_summary_block *sum = curseg->sum_blk; | ||
1920 | struct nat_entry_set *setvec[NATVEC_SIZE]; | ||
1921 | struct nat_entry_set *set, *tmp; | ||
1922 | unsigned int found; | ||
1923 | nid_t set_idx = 0; | ||
1924 | LIST_HEAD(sets); | ||
1925 | |||
1926 | /* | ||
1927 | * if there are no enough space in journal to store dirty nat | ||
1928 | * entries, remove all entries from journal and merge them | ||
1929 | * into nat entry set. | ||
1930 | */ | ||
1931 | if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) | ||
1932 | remove_nats_in_journal(sbi); | ||
1940 | 1933 | ||
1941 | release_nat_entry_set(nes, nm_i); | 1934 | if (!nm_i->dirty_nat_cnt) |
1935 | return; | ||
1936 | |||
1937 | while ((found = __gang_lookup_nat_set(nm_i, | ||
1938 | set_idx, NATVEC_SIZE, setvec))) { | ||
1939 | unsigned idx; | ||
1940 | set_idx = setvec[found - 1]->set + 1; | ||
1941 | for (idx = 0; idx < found; idx++) | ||
1942 | __adjust_nat_entry_set(setvec[idx], &sets, | ||
1943 | MAX_NAT_JENTRIES(sum)); | ||
1942 | } | 1944 | } |
1943 | 1945 | ||
1944 | f2fs_bug_on(!list_empty(head)); | 1946 | /* flush dirty nats in nat entry set */ |
1945 | f2fs_bug_on(nm_i->dirty_nat_cnt); | 1947 | list_for_each_entry_safe(set, tmp, &sets, set_list) |
1948 | __flush_nat_entry_set(sbi, set); | ||
1949 | |||
1950 | f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); | ||
1946 | } | 1951 | } |
1947 | 1952 | ||
1948 | static int init_node_manager(struct f2fs_sb_info *sbi) | 1953 | static int init_node_manager(struct f2fs_sb_info *sbi) |
@@ -1969,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi) | |||
1969 | INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); | 1974 | INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); |
1970 | INIT_LIST_HEAD(&nm_i->free_nid_list); | 1975 | INIT_LIST_HEAD(&nm_i->free_nid_list); |
1971 | INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); | 1976 | INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); |
1977 | INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); | ||
1972 | INIT_LIST_HEAD(&nm_i->nat_entries); | 1978 | INIT_LIST_HEAD(&nm_i->nat_entries); |
1973 | INIT_LIST_HEAD(&nm_i->dirty_nat_entries); | ||
1974 | INIT_LIST_HEAD(&nm_i->nat_entry_set); | ||
1975 | 1979 | ||
1976 | mutex_init(&nm_i->build_lock); | 1980 | mutex_init(&nm_i->build_lock); |
1977 | spin_lock_init(&nm_i->free_nid_list_lock); | 1981 | spin_lock_init(&nm_i->free_nid_list_lock); |
@@ -2020,14 +2024,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
2020 | /* destroy free nid list */ | 2024 | /* destroy free nid list */ |
2021 | spin_lock(&nm_i->free_nid_list_lock); | 2025 | spin_lock(&nm_i->free_nid_list_lock); |
2022 | list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { | 2026 | list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { |
2023 | f2fs_bug_on(i->state == NID_ALLOC); | 2027 | f2fs_bug_on(sbi, i->state == NID_ALLOC); |
2024 | __del_from_free_nid_list(nm_i, i); | 2028 | __del_from_free_nid_list(nm_i, i); |
2025 | nm_i->fcnt--; | 2029 | nm_i->fcnt--; |
2026 | spin_unlock(&nm_i->free_nid_list_lock); | 2030 | spin_unlock(&nm_i->free_nid_list_lock); |
2027 | kmem_cache_free(free_nid_slab, i); | 2031 | kmem_cache_free(free_nid_slab, i); |
2028 | spin_lock(&nm_i->free_nid_list_lock); | 2032 | spin_lock(&nm_i->free_nid_list_lock); |
2029 | } | 2033 | } |
2030 | f2fs_bug_on(nm_i->fcnt); | 2034 | f2fs_bug_on(sbi, nm_i->fcnt); |
2031 | spin_unlock(&nm_i->free_nid_list_lock); | 2035 | spin_unlock(&nm_i->free_nid_list_lock); |
2032 | 2036 | ||
2033 | /* destroy nat cache */ | 2037 | /* destroy nat cache */ |
@@ -2039,7 +2043,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
2039 | for (idx = 0; idx < found; idx++) | 2043 | for (idx = 0; idx < found; idx++) |
2040 | __del_from_nat_cache(nm_i, natvec[idx]); | 2044 | __del_from_nat_cache(nm_i, natvec[idx]); |
2041 | } | 2045 | } |
2042 | f2fs_bug_on(nm_i->nat_cnt); | 2046 | f2fs_bug_on(sbi, nm_i->nat_cnt); |
2043 | write_unlock(&nm_i->nat_tree_lock); | 2047 | write_unlock(&nm_i->nat_tree_lock); |
2044 | 2048 | ||
2045 | kfree(nm_i->nat_bitmap); | 2049 | kfree(nm_i->nat_bitmap); |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8a116a407599..8d5e6e0dd840 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h | |||
@@ -39,10 +39,16 @@ struct node_info { | |||
39 | unsigned char version; /* version of the node */ | 39 | unsigned char version; /* version of the node */ |
40 | }; | 40 | }; |
41 | 41 | ||
42 | enum { | ||
43 | IS_CHECKPOINTED, /* is it checkpointed before? */ | ||
44 | HAS_FSYNCED_INODE, /* is the inode fsynced before? */ | ||
45 | HAS_LAST_FSYNC, /* has the latest node fsync mark? */ | ||
46 | IS_DIRTY, /* this nat entry is dirty? */ | ||
47 | }; | ||
48 | |||
42 | struct nat_entry { | 49 | struct nat_entry { |
43 | struct list_head list; /* for clean or dirty nat list */ | 50 | struct list_head list; /* for clean or dirty nat list */ |
44 | bool checkpointed; /* whether it is checkpointed or not */ | 51 | unsigned char flag; /* for node information bits */ |
45 | bool fsync_done; /* whether the latest node has fsync mark */ | ||
46 | struct node_info ni; /* in-memory node information */ | 52 | struct node_info ni; /* in-memory node information */ |
47 | }; | 53 | }; |
48 | 54 | ||
@@ -55,18 +61,32 @@ struct nat_entry { | |||
55 | #define nat_get_version(nat) (nat->ni.version) | 61 | #define nat_get_version(nat) (nat->ni.version) |
56 | #define nat_set_version(nat, v) (nat->ni.version = v) | 62 | #define nat_set_version(nat, v) (nat->ni.version = v) |
57 | 63 | ||
58 | #define __set_nat_cache_dirty(nm_i, ne) \ | ||
59 | do { \ | ||
60 | ne->checkpointed = false; \ | ||
61 | list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \ | ||
62 | } while (0) | ||
63 | #define __clear_nat_cache_dirty(nm_i, ne) \ | ||
64 | do { \ | ||
65 | ne->checkpointed = true; \ | ||
66 | list_move_tail(&ne->list, &nm_i->nat_entries); \ | ||
67 | } while (0) | ||
68 | #define inc_node_version(version) (++version) | 64 | #define inc_node_version(version) (++version) |
69 | 65 | ||
66 | static inline void set_nat_flag(struct nat_entry *ne, | ||
67 | unsigned int type, bool set) | ||
68 | { | ||
69 | unsigned char mask = 0x01 << type; | ||
70 | if (set) | ||
71 | ne->flag |= mask; | ||
72 | else | ||
73 | ne->flag &= ~mask; | ||
74 | } | ||
75 | |||
76 | static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) | ||
77 | { | ||
78 | unsigned char mask = 0x01 << type; | ||
79 | return ne->flag & mask; | ||
80 | } | ||
81 | |||
82 | static inline void nat_reset_flag(struct nat_entry *ne) | ||
83 | { | ||
84 | /* these states can be set only after checkpoint was done */ | ||
85 | set_nat_flag(ne, IS_CHECKPOINTED, true); | ||
86 | set_nat_flag(ne, HAS_FSYNCED_INODE, false); | ||
87 | set_nat_flag(ne, HAS_LAST_FSYNC, true); | ||
88 | } | ||
89 | |||
70 | static inline void node_info_from_raw_nat(struct node_info *ni, | 90 | static inline void node_info_from_raw_nat(struct node_info *ni, |
71 | struct f2fs_nat_entry *raw_ne) | 91 | struct f2fs_nat_entry *raw_ne) |
72 | { | 92 | { |
@@ -90,9 +110,9 @@ enum mem_type { | |||
90 | }; | 110 | }; |
91 | 111 | ||
92 | struct nat_entry_set { | 112 | struct nat_entry_set { |
93 | struct list_head set_list; /* link with all nat sets */ | 113 | struct list_head set_list; /* link with other nat sets */ |
94 | struct list_head entry_list; /* link with dirty nat entries */ | 114 | struct list_head entry_list; /* link with dirty nat entries */ |
95 | nid_t start_nid; /* start nid of nats in set */ | 115 | nid_t set; /* set number*/ |
96 | unsigned int entry_cnt; /* the # of nat entries in set */ | 116 | unsigned int entry_cnt; /* the # of nat entries in set */ |
97 | }; | 117 | }; |
98 | 118 | ||
@@ -110,18 +130,19 @@ struct free_nid { | |||
110 | int state; /* in use or not: NID_NEW or NID_ALLOC */ | 130 | int state; /* in use or not: NID_NEW or NID_ALLOC */ |
111 | }; | 131 | }; |
112 | 132 | ||
113 | static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) | 133 | static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) |
114 | { | 134 | { |
115 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 135 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
116 | struct free_nid *fnid; | 136 | struct free_nid *fnid; |
117 | 137 | ||
118 | if (nm_i->fcnt <= 0) | ||
119 | return -1; | ||
120 | spin_lock(&nm_i->free_nid_list_lock); | 138 | spin_lock(&nm_i->free_nid_list_lock); |
139 | if (nm_i->fcnt <= 0) { | ||
140 | spin_unlock(&nm_i->free_nid_list_lock); | ||
141 | return; | ||
142 | } | ||
121 | fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); | 143 | fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); |
122 | *nid = fnid->nid; | 144 | *nid = fnid->nid; |
123 | spin_unlock(&nm_i->free_nid_list_lock); | 145 | spin_unlock(&nm_i->free_nid_list_lock); |
124 | return 0; | ||
125 | } | 146 | } |
126 | 147 | ||
127 | /* | 148 | /* |
@@ -197,8 +218,7 @@ static inline void copy_node_footer(struct page *dst, struct page *src) | |||
197 | 218 | ||
198 | static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) | 219 | static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) |
199 | { | 220 | { |
200 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 221 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); |
201 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | ||
202 | struct f2fs_node *rn = F2FS_NODE(page); | 222 | struct f2fs_node *rn = F2FS_NODE(page); |
203 | 223 | ||
204 | rn->footer.cp_ver = ckpt->checkpoint_ver; | 224 | rn->footer.cp_ver = ckpt->checkpoint_ver; |
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 756c41cd2582..ebd013225788 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
@@ -14,6 +14,37 @@ | |||
14 | #include "node.h" | 14 | #include "node.h" |
15 | #include "segment.h" | 15 | #include "segment.h" |
16 | 16 | ||
17 | /* | ||
18 | * Roll forward recovery scenarios. | ||
19 | * | ||
20 | * [Term] F: fsync_mark, D: dentry_mark | ||
21 | * | ||
22 | * 1. inode(x) | CP | inode(x) | dnode(F) | ||
23 | * -> Update the latest inode(x). | ||
24 | * | ||
25 | * 2. inode(x) | CP | inode(F) | dnode(F) | ||
26 | * -> No problem. | ||
27 | * | ||
28 | * 3. inode(x) | CP | dnode(F) | inode(x) | ||
29 | * -> Recover to the latest dnode(F), and drop the last inode(x) | ||
30 | * | ||
31 | * 4. inode(x) | CP | dnode(F) | inode(F) | ||
32 | * -> No problem. | ||
33 | * | ||
34 | * 5. CP | inode(x) | dnode(F) | ||
35 | * -> The inode(DF) was missing. Should drop this dnode(F). | ||
36 | * | ||
37 | * 6. CP | inode(DF) | dnode(F) | ||
38 | * -> No problem. | ||
39 | * | ||
40 | * 7. CP | dnode(F) | inode(DF) | ||
41 | * -> If f2fs_iget fails, then goto next to find inode(DF). | ||
42 | * | ||
43 | * 8. CP | dnode(F) | inode(x) | ||
44 | * -> If f2fs_iget fails, then goto next to find inode(DF). | ||
45 | * But it will fail due to no inode(DF). | ||
46 | */ | ||
47 | |||
17 | static struct kmem_cache *fsync_entry_slab; | 48 | static struct kmem_cache *fsync_entry_slab; |
18 | 49 | ||
19 | bool space_for_roll_forward(struct f2fs_sb_info *sbi) | 50 | bool space_for_roll_forward(struct f2fs_sb_info *sbi) |
@@ -36,7 +67,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, | |||
36 | return NULL; | 67 | return NULL; |
37 | } | 68 | } |
38 | 69 | ||
39 | static int recover_dentry(struct page *ipage, struct inode *inode) | 70 | static int recover_dentry(struct inode *inode, struct page *ipage) |
40 | { | 71 | { |
41 | struct f2fs_inode *raw_inode = F2FS_INODE(ipage); | 72 | struct f2fs_inode *raw_inode = F2FS_INODE(ipage); |
42 | nid_t pino = le32_to_cpu(raw_inode->i_pino); | 73 | nid_t pino = le32_to_cpu(raw_inode->i_pino); |
@@ -75,7 +106,7 @@ retry: | |||
75 | err = -EEXIST; | 106 | err = -EEXIST; |
76 | goto out_unmap_put; | 107 | goto out_unmap_put; |
77 | } | 108 | } |
78 | err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); | 109 | err = acquire_orphan_inode(F2FS_I_SB(inode)); |
79 | if (err) { | 110 | if (err) { |
80 | iput(einode); | 111 | iput(einode); |
81 | goto out_unmap_put; | 112 | goto out_unmap_put; |
@@ -110,35 +141,28 @@ out: | |||
110 | return err; | 141 | return err; |
111 | } | 142 | } |
112 | 143 | ||
113 | static int recover_inode(struct inode *inode, struct page *node_page) | 144 | static void recover_inode(struct inode *inode, struct page *page) |
114 | { | 145 | { |
115 | struct f2fs_inode *raw_inode = F2FS_INODE(node_page); | 146 | struct f2fs_inode *raw = F2FS_INODE(page); |
116 | |||
117 | if (!IS_INODE(node_page)) | ||
118 | return 0; | ||
119 | 147 | ||
120 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 148 | inode->i_mode = le16_to_cpu(raw->i_mode); |
121 | i_size_write(inode, le64_to_cpu(raw_inode->i_size)); | 149 | i_size_write(inode, le64_to_cpu(raw->i_size)); |
122 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | 150 | inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime); |
123 | inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); | 151 | inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); |
124 | inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | 152 | inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); |
125 | inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | 153 | inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); |
126 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); | 154 | inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); |
127 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | 155 | inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); |
128 | |||
129 | if (is_dent_dnode(node_page)) | ||
130 | return recover_dentry(node_page, inode); | ||
131 | 156 | ||
132 | f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", | 157 | f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", |
133 | ino_of_node(node_page), raw_inode->i_name); | 158 | ino_of_node(page), F2FS_INODE(page)->i_name); |
134 | return 0; | ||
135 | } | 159 | } |
136 | 160 | ||
137 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | 161 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) |
138 | { | 162 | { |
139 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | 163 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); |
140 | struct curseg_info *curseg; | 164 | struct curseg_info *curseg; |
141 | struct page *page; | 165 | struct page *page = NULL; |
142 | block_t blkaddr; | 166 | block_t blkaddr; |
143 | int err = 0; | 167 | int err = 0; |
144 | 168 | ||
@@ -146,20 +170,13 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
146 | curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); | 170 | curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); |
147 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | 171 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
148 | 172 | ||
149 | /* read node page */ | ||
150 | page = alloc_page(GFP_F2FS_ZERO); | ||
151 | if (!page) | ||
152 | return -ENOMEM; | ||
153 | lock_page(page); | ||
154 | |||
155 | while (1) { | 173 | while (1) { |
156 | struct fsync_inode_entry *entry; | 174 | struct fsync_inode_entry *entry; |
157 | 175 | ||
158 | err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); | 176 | if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) |
159 | if (err) | 177 | return 0; |
160 | return err; | ||
161 | 178 | ||
162 | lock_page(page); | 179 | page = get_meta_page_ra(sbi, blkaddr); |
163 | 180 | ||
164 | if (cp_ver != cpver_of_node(page)) | 181 | if (cp_ver != cpver_of_node(page)) |
165 | break; | 182 | break; |
@@ -180,33 +197,38 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
180 | } | 197 | } |
181 | 198 | ||
182 | /* add this fsync inode to the list */ | 199 | /* add this fsync inode to the list */ |
183 | entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); | 200 | entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); |
184 | if (!entry) { | 201 | if (!entry) { |
185 | err = -ENOMEM; | 202 | err = -ENOMEM; |
186 | break; | 203 | break; |
187 | } | 204 | } |
188 | 205 | /* | |
206 | * CP | dnode(F) | inode(DF) | ||
207 | * For this case, we should not give up now. | ||
208 | */ | ||
189 | entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); | 209 | entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); |
190 | if (IS_ERR(entry->inode)) { | 210 | if (IS_ERR(entry->inode)) { |
191 | err = PTR_ERR(entry->inode); | 211 | err = PTR_ERR(entry->inode); |
192 | kmem_cache_free(fsync_entry_slab, entry); | 212 | kmem_cache_free(fsync_entry_slab, entry); |
213 | if (err == -ENOENT) | ||
214 | goto next; | ||
193 | break; | 215 | break; |
194 | } | 216 | } |
195 | list_add_tail(&entry->list, head); | 217 | list_add_tail(&entry->list, head); |
196 | } | 218 | } |
197 | entry->blkaddr = blkaddr; | 219 | entry->blkaddr = blkaddr; |
198 | 220 | ||
199 | err = recover_inode(entry->inode, page); | 221 | if (IS_INODE(page)) { |
200 | if (err && err != -ENOENT) | 222 | entry->last_inode = blkaddr; |
201 | break; | 223 | if (is_dent_dnode(page)) |
224 | entry->last_dentry = blkaddr; | ||
225 | } | ||
202 | next: | 226 | next: |
203 | /* check next segment */ | 227 | /* check next segment */ |
204 | blkaddr = next_blkaddr_of_node(page); | 228 | blkaddr = next_blkaddr_of_node(page); |
229 | f2fs_put_page(page, 1); | ||
205 | } | 230 | } |
206 | 231 | f2fs_put_page(page, 1); | |
207 | unlock_page(page); | ||
208 | __free_pages(page, 0); | ||
209 | |||
210 | return err; | 232 | return err; |
211 | } | 233 | } |
212 | 234 | ||
@@ -279,16 +301,30 @@ got_it: | |||
279 | ino = ino_of_node(node_page); | 301 | ino = ino_of_node(node_page); |
280 | f2fs_put_page(node_page, 1); | 302 | f2fs_put_page(node_page, 1); |
281 | 303 | ||
282 | /* Deallocate previous index in the node page */ | 304 | if (ino != dn->inode->i_ino) { |
283 | inode = f2fs_iget(sbi->sb, ino); | 305 | /* Deallocate previous index in the node page */ |
284 | if (IS_ERR(inode)) | 306 | inode = f2fs_iget(sbi->sb, ino); |
285 | return PTR_ERR(inode); | 307 | if (IS_ERR(inode)) |
308 | return PTR_ERR(inode); | ||
309 | } else { | ||
310 | inode = dn->inode; | ||
311 | } | ||
286 | 312 | ||
287 | bidx = start_bidx_of_node(offset, F2FS_I(inode)) + | 313 | bidx = start_bidx_of_node(offset, F2FS_I(inode)) + |
288 | le16_to_cpu(sum.ofs_in_node); | 314 | le16_to_cpu(sum.ofs_in_node); |
289 | 315 | ||
290 | truncate_hole(inode, bidx, bidx + 1); | 316 | if (ino != dn->inode->i_ino) { |
291 | iput(inode); | 317 | truncate_hole(inode, bidx, bidx + 1); |
318 | iput(inode); | ||
319 | } else { | ||
320 | struct dnode_of_data tdn; | ||
321 | set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); | ||
322 | if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) | ||
323 | return 0; | ||
324 | if (tdn.data_blkaddr != NULL_ADDR) | ||
325 | truncate_data_blocks_range(&tdn, 1); | ||
326 | f2fs_put_page(tdn.node_page, 1); | ||
327 | } | ||
292 | return 0; | 328 | return 0; |
293 | } | 329 | } |
294 | 330 | ||
@@ -331,8 +367,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
331 | f2fs_wait_on_page_writeback(dn.node_page, NODE); | 367 | f2fs_wait_on_page_writeback(dn.node_page, NODE); |
332 | 368 | ||
333 | get_node_info(sbi, dn.nid, &ni); | 369 | get_node_info(sbi, dn.nid, &ni); |
334 | f2fs_bug_on(ni.ino != ino_of_node(page)); | 370 | f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); |
335 | f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page)); | 371 | f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); |
336 | 372 | ||
337 | for (; start < end; start++) { | 373 | for (; start < end; start++) { |
338 | block_t src, dest; | 374 | block_t src, dest; |
@@ -344,7 +380,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
344 | if (src == NULL_ADDR) { | 380 | if (src == NULL_ADDR) { |
345 | err = reserve_new_block(&dn); | 381 | err = reserve_new_block(&dn); |
346 | /* We should not get -ENOSPC */ | 382 | /* We should not get -ENOSPC */ |
347 | f2fs_bug_on(err); | 383 | f2fs_bug_on(sbi, err); |
348 | } | 384 | } |
349 | 385 | ||
350 | /* Check the previous node page having this index */ | 386 | /* Check the previous node page having this index */ |
@@ -386,7 +422,7 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
386 | { | 422 | { |
387 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | 423 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); |
388 | struct curseg_info *curseg; | 424 | struct curseg_info *curseg; |
389 | struct page *page; | 425 | struct page *page = NULL; |
390 | int err = 0; | 426 | int err = 0; |
391 | block_t blkaddr; | 427 | block_t blkaddr; |
392 | 428 | ||
@@ -394,32 +430,41 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
394 | curseg = CURSEG_I(sbi, type); | 430 | curseg = CURSEG_I(sbi, type); |
395 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | 431 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
396 | 432 | ||
397 | /* read node page */ | ||
398 | page = alloc_page(GFP_F2FS_ZERO); | ||
399 | if (!page) | ||
400 | return -ENOMEM; | ||
401 | |||
402 | lock_page(page); | ||
403 | |||
404 | while (1) { | 433 | while (1) { |
405 | struct fsync_inode_entry *entry; | 434 | struct fsync_inode_entry *entry; |
406 | 435 | ||
407 | err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); | 436 | if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) |
408 | if (err) | 437 | break; |
409 | return err; | ||
410 | 438 | ||
411 | lock_page(page); | 439 | page = get_meta_page_ra(sbi, blkaddr); |
412 | 440 | ||
413 | if (cp_ver != cpver_of_node(page)) | 441 | if (cp_ver != cpver_of_node(page)) { |
442 | f2fs_put_page(page, 1); | ||
414 | break; | 443 | break; |
444 | } | ||
415 | 445 | ||
416 | entry = get_fsync_inode(head, ino_of_node(page)); | 446 | entry = get_fsync_inode(head, ino_of_node(page)); |
417 | if (!entry) | 447 | if (!entry) |
418 | goto next; | 448 | goto next; |
419 | 449 | /* | |
450 | * inode(x) | CP | inode(x) | dnode(F) | ||
451 | * In this case, we can lose the latest inode(x). | ||
452 | * So, call recover_inode for the inode update. | ||
453 | */ | ||
454 | if (entry->last_inode == blkaddr) | ||
455 | recover_inode(entry->inode, page); | ||
456 | if (entry->last_dentry == blkaddr) { | ||
457 | err = recover_dentry(entry->inode, page); | ||
458 | if (err) { | ||
459 | f2fs_put_page(page, 1); | ||
460 | break; | ||
461 | } | ||
462 | } | ||
420 | err = do_recover_data(sbi, entry->inode, page, blkaddr); | 463 | err = do_recover_data(sbi, entry->inode, page, blkaddr); |
421 | if (err) | 464 | if (err) { |
465 | f2fs_put_page(page, 1); | ||
422 | break; | 466 | break; |
467 | } | ||
423 | 468 | ||
424 | if (entry->blkaddr == blkaddr) { | 469 | if (entry->blkaddr == blkaddr) { |
425 | iput(entry->inode); | 470 | iput(entry->inode); |
@@ -429,11 +474,8 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
429 | next: | 474 | next: |
430 | /* check next segment */ | 475 | /* check next segment */ |
431 | blkaddr = next_blkaddr_of_node(page); | 476 | blkaddr = next_blkaddr_of_node(page); |
477 | f2fs_put_page(page, 1); | ||
432 | } | 478 | } |
433 | |||
434 | unlock_page(page); | ||
435 | __free_pages(page, 0); | ||
436 | |||
437 | if (!err) | 479 | if (!err) |
438 | allocate_new_segments(sbi); | 480 | allocate_new_segments(sbi); |
439 | return err; | 481 | return err; |
@@ -474,11 +516,15 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
474 | /* step #2: recover data */ | 516 | /* step #2: recover data */ |
475 | err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); | 517 | err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); |
476 | if (!err) | 518 | if (!err) |
477 | f2fs_bug_on(!list_empty(&inode_list)); | 519 | f2fs_bug_on(sbi, !list_empty(&inode_list)); |
478 | out: | 520 | out: |
479 | destroy_fsync_dnodes(&inode_list); | 521 | destroy_fsync_dnodes(&inode_list); |
480 | kmem_cache_destroy(fsync_entry_slab); | 522 | kmem_cache_destroy(fsync_entry_slab); |
481 | 523 | ||
524 | /* truncate meta pages to be used by the recovery */ | ||
525 | truncate_inode_pages_range(META_MAPPING(sbi), | ||
526 | MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); | ||
527 | |||
482 | if (err) { | 528 | if (err) { |
483 | truncate_inode_pages_final(NODE_MAPPING(sbi)); | 529 | truncate_inode_pages_final(NODE_MAPPING(sbi)); |
484 | truncate_inode_pages_final(META_MAPPING(sbi)); | 530 | truncate_inode_pages_final(META_MAPPING(sbi)); |
@@ -494,8 +540,11 @@ out: | |||
494 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | 540 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); |
495 | mutex_unlock(&sbi->cp_mutex); | 541 | mutex_unlock(&sbi->cp_mutex); |
496 | } else if (need_writecp) { | 542 | } else if (need_writecp) { |
543 | struct cp_control cpc = { | ||
544 | .reason = CP_SYNC, | ||
545 | }; | ||
497 | mutex_unlock(&sbi->cp_mutex); | 546 | mutex_unlock(&sbi->cp_mutex); |
498 | write_checkpoint(sbi, false); | 547 | write_checkpoint(sbi, &cpc); |
499 | } else { | 548 | } else { |
500 | mutex_unlock(&sbi->cp_mutex); | 549 | mutex_unlock(&sbi->cp_mutex); |
501 | } | 550 | } |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0aa337cd5bba..923cb76fdc46 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #define __reverse_ffz(x) __reverse_ffs(~(x)) | 25 | #define __reverse_ffz(x) __reverse_ffs(~(x)) |
26 | 26 | ||
27 | static struct kmem_cache *discard_entry_slab; | 27 | static struct kmem_cache *discard_entry_slab; |
28 | static struct kmem_cache *sit_entry_set_slab; | ||
29 | static struct kmem_cache *inmem_entry_slab; | ||
28 | 30 | ||
29 | /* | 31 | /* |
30 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since | 32 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since |
@@ -172,6 +174,60 @@ found_middle: | |||
172 | return result + __reverse_ffz(tmp); | 174 | return result + __reverse_ffz(tmp); |
173 | } | 175 | } |
174 | 176 | ||
177 | void register_inmem_page(struct inode *inode, struct page *page) | ||
178 | { | ||
179 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
180 | struct inmem_pages *new; | ||
181 | |||
182 | new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); | ||
183 | |||
184 | /* add atomic page indices to the list */ | ||
185 | new->page = page; | ||
186 | INIT_LIST_HEAD(&new->list); | ||
187 | |||
188 | /* increase reference count with clean state */ | ||
189 | mutex_lock(&fi->inmem_lock); | ||
190 | get_page(page); | ||
191 | list_add_tail(&new->list, &fi->inmem_pages); | ||
192 | mutex_unlock(&fi->inmem_lock); | ||
193 | } | ||
194 | |||
195 | void commit_inmem_pages(struct inode *inode, bool abort) | ||
196 | { | ||
197 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
198 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
199 | struct inmem_pages *cur, *tmp; | ||
200 | bool submit_bio = false; | ||
201 | struct f2fs_io_info fio = { | ||
202 | .type = DATA, | ||
203 | .rw = WRITE_SYNC, | ||
204 | }; | ||
205 | |||
206 | f2fs_balance_fs(sbi); | ||
207 | f2fs_lock_op(sbi); | ||
208 | |||
209 | mutex_lock(&fi->inmem_lock); | ||
210 | list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { | ||
211 | lock_page(cur->page); | ||
212 | if (!abort && cur->page->mapping == inode->i_mapping) { | ||
213 | f2fs_wait_on_page_writeback(cur->page, DATA); | ||
214 | if (clear_page_dirty_for_io(cur->page)) | ||
215 | inode_dec_dirty_pages(inode); | ||
216 | do_write_data_page(cur->page, &fio); | ||
217 | submit_bio = true; | ||
218 | } | ||
219 | f2fs_put_page(cur->page, 1); | ||
220 | list_del(&cur->list); | ||
221 | kmem_cache_free(inmem_entry_slab, cur); | ||
222 | } | ||
223 | if (submit_bio) | ||
224 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | ||
225 | mutex_unlock(&fi->inmem_lock); | ||
226 | |||
227 | filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); | ||
228 | f2fs_unlock_op(sbi); | ||
229 | } | ||
230 | |||
175 | /* | 231 | /* |
176 | * This function balances dirty node and dentry pages. | 232 | * This function balances dirty node and dentry pages. |
177 | * In addition, it controls garbage collection. | 233 | * In addition, it controls garbage collection. |
@@ -205,24 +261,20 @@ repeat: | |||
205 | if (kthread_should_stop()) | 261 | if (kthread_should_stop()) |
206 | return 0; | 262 | return 0; |
207 | 263 | ||
208 | spin_lock(&fcc->issue_lock); | 264 | if (!llist_empty(&fcc->issue_list)) { |
209 | if (fcc->issue_list) { | ||
210 | fcc->dispatch_list = fcc->issue_list; | ||
211 | fcc->issue_list = fcc->issue_tail = NULL; | ||
212 | } | ||
213 | spin_unlock(&fcc->issue_lock); | ||
214 | |||
215 | if (fcc->dispatch_list) { | ||
216 | struct bio *bio = bio_alloc(GFP_NOIO, 0); | 265 | struct bio *bio = bio_alloc(GFP_NOIO, 0); |
217 | struct flush_cmd *cmd, *next; | 266 | struct flush_cmd *cmd, *next; |
218 | int ret; | 267 | int ret; |
219 | 268 | ||
269 | fcc->dispatch_list = llist_del_all(&fcc->issue_list); | ||
270 | fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); | ||
271 | |||
220 | bio->bi_bdev = sbi->sb->s_bdev; | 272 | bio->bi_bdev = sbi->sb->s_bdev; |
221 | ret = submit_bio_wait(WRITE_FLUSH, bio); | 273 | ret = submit_bio_wait(WRITE_FLUSH, bio); |
222 | 274 | ||
223 | for (cmd = fcc->dispatch_list; cmd; cmd = next) { | 275 | llist_for_each_entry_safe(cmd, next, |
276 | fcc->dispatch_list, llnode) { | ||
224 | cmd->ret = ret; | 277 | cmd->ret = ret; |
225 | next = cmd->next; | ||
226 | complete(&cmd->wait); | 278 | complete(&cmd->wait); |
227 | } | 279 | } |
228 | bio_put(bio); | 280 | bio_put(bio); |
@@ -230,7 +282,7 @@ repeat: | |||
230 | } | 282 | } |
231 | 283 | ||
232 | wait_event_interruptible(*q, | 284 | wait_event_interruptible(*q, |
233 | kthread_should_stop() || fcc->issue_list); | 285 | kthread_should_stop() || !llist_empty(&fcc->issue_list)); |
234 | goto repeat; | 286 | goto repeat; |
235 | } | 287 | } |
236 | 288 | ||
@@ -249,15 +301,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) | |||
249 | return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); | 301 | return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); |
250 | 302 | ||
251 | init_completion(&cmd.wait); | 303 | init_completion(&cmd.wait); |
252 | cmd.next = NULL; | ||
253 | 304 | ||
254 | spin_lock(&fcc->issue_lock); | 305 | llist_add(&cmd.llnode, &fcc->issue_list); |
255 | if (fcc->issue_list) | ||
256 | fcc->issue_tail->next = &cmd; | ||
257 | else | ||
258 | fcc->issue_list = &cmd; | ||
259 | fcc->issue_tail = &cmd; | ||
260 | spin_unlock(&fcc->issue_lock); | ||
261 | 306 | ||
262 | if (!fcc->dispatch_list) | 307 | if (!fcc->dispatch_list) |
263 | wake_up(&fcc->flush_wait_queue); | 308 | wake_up(&fcc->flush_wait_queue); |
@@ -276,8 +321,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) | |||
276 | fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); | 321 | fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); |
277 | if (!fcc) | 322 | if (!fcc) |
278 | return -ENOMEM; | 323 | return -ENOMEM; |
279 | spin_lock_init(&fcc->issue_lock); | ||
280 | init_waitqueue_head(&fcc->flush_wait_queue); | 324 | init_waitqueue_head(&fcc->flush_wait_queue); |
325 | init_llist_head(&fcc->issue_list); | ||
281 | SM_I(sbi)->cmd_control_info = fcc; | 326 | SM_I(sbi)->cmd_control_info = fcc; |
282 | fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, | 327 | fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, |
283 | "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); | 328 | "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); |
@@ -317,6 +362,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, | |||
317 | struct seg_entry *sentry = get_seg_entry(sbi, segno); | 362 | struct seg_entry *sentry = get_seg_entry(sbi, segno); |
318 | enum dirty_type t = sentry->type; | 363 | enum dirty_type t = sentry->type; |
319 | 364 | ||
365 | if (unlikely(t >= DIRTY)) { | ||
366 | f2fs_bug_on(sbi, 1); | ||
367 | return; | ||
368 | } | ||
320 | if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) | 369 | if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) |
321 | dirty_i->nr_dirty[t]++; | 370 | dirty_i->nr_dirty[t]++; |
322 | } | 371 | } |
@@ -376,8 +425,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) | |||
376 | static int f2fs_issue_discard(struct f2fs_sb_info *sbi, | 425 | static int f2fs_issue_discard(struct f2fs_sb_info *sbi, |
377 | block_t blkstart, block_t blklen) | 426 | block_t blkstart, block_t blklen) |
378 | { | 427 | { |
379 | sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); | 428 | sector_t start = SECTOR_FROM_BLOCK(blkstart); |
380 | sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); | 429 | sector_t len = SECTOR_FROM_BLOCK(blklen); |
381 | trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); | 430 | trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); |
382 | return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); | 431 | return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); |
383 | } | 432 | } |
@@ -392,22 +441,48 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) | |||
392 | } | 441 | } |
393 | } | 442 | } |
394 | 443 | ||
395 | static void add_discard_addrs(struct f2fs_sb_info *sbi, | 444 | static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
396 | unsigned int segno, struct seg_entry *se) | ||
397 | { | 445 | { |
398 | struct list_head *head = &SM_I(sbi)->discard_list; | 446 | struct list_head *head = &SM_I(sbi)->discard_list; |
399 | struct discard_entry *new; | 447 | struct discard_entry *new; |
400 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); | 448 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); |
401 | int max_blocks = sbi->blocks_per_seg; | 449 | int max_blocks = sbi->blocks_per_seg; |
450 | struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); | ||
402 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; | 451 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; |
403 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; | 452 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; |
404 | unsigned long dmap[entries]; | 453 | unsigned long dmap[entries]; |
405 | unsigned int start = 0, end = -1; | 454 | unsigned int start = 0, end = -1; |
455 | bool force = (cpc->reason == CP_DISCARD); | ||
406 | int i; | 456 | int i; |
407 | 457 | ||
408 | if (!test_opt(sbi, DISCARD)) | 458 | if (!force && !test_opt(sbi, DISCARD)) |
409 | return; | 459 | return; |
410 | 460 | ||
461 | if (force && !se->valid_blocks) { | ||
462 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | ||
463 | /* | ||
464 | * if this segment is registered in the prefree list, then | ||
465 | * we should skip adding a discard candidate, and let the | ||
466 | * checkpoint do that later. | ||
467 | */ | ||
468 | mutex_lock(&dirty_i->seglist_lock); | ||
469 | if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) { | ||
470 | mutex_unlock(&dirty_i->seglist_lock); | ||
471 | cpc->trimmed += sbi->blocks_per_seg; | ||
472 | return; | ||
473 | } | ||
474 | mutex_unlock(&dirty_i->seglist_lock); | ||
475 | |||
476 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); | ||
477 | INIT_LIST_HEAD(&new->list); | ||
478 | new->blkaddr = START_BLOCK(sbi, cpc->trim_start); | ||
479 | new->len = sbi->blocks_per_seg; | ||
480 | list_add_tail(&new->list, head); | ||
481 | SM_I(sbi)->nr_discards += sbi->blocks_per_seg; | ||
482 | cpc->trimmed += sbi->blocks_per_seg; | ||
483 | return; | ||
484 | } | ||
485 | |||
411 | /* zero block will be discarded through the prefree list */ | 486 | /* zero block will be discarded through the prefree list */ |
412 | if (!se->valid_blocks || se->valid_blocks == max_blocks) | 487 | if (!se->valid_blocks || se->valid_blocks == max_blocks) |
413 | return; | 488 | return; |
@@ -416,23 +491,39 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, | |||
416 | for (i = 0; i < entries; i++) | 491 | for (i = 0; i < entries; i++) |
417 | dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; | 492 | dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; |
418 | 493 | ||
419 | while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { | 494 | while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { |
420 | start = __find_rev_next_bit(dmap, max_blocks, end + 1); | 495 | start = __find_rev_next_bit(dmap, max_blocks, end + 1); |
421 | if (start >= max_blocks) | 496 | if (start >= max_blocks) |
422 | break; | 497 | break; |
423 | 498 | ||
424 | end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); | 499 | end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); |
425 | 500 | ||
501 | if (end - start < cpc->trim_minlen) | ||
502 | continue; | ||
503 | |||
426 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); | 504 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); |
427 | INIT_LIST_HEAD(&new->list); | 505 | INIT_LIST_HEAD(&new->list); |
428 | new->blkaddr = START_BLOCK(sbi, segno) + start; | 506 | new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; |
429 | new->len = end - start; | 507 | new->len = end - start; |
508 | cpc->trimmed += end - start; | ||
430 | 509 | ||
431 | list_add_tail(&new->list, head); | 510 | list_add_tail(&new->list, head); |
432 | SM_I(sbi)->nr_discards += end - start; | 511 | SM_I(sbi)->nr_discards += end - start; |
433 | } | 512 | } |
434 | } | 513 | } |
435 | 514 | ||
515 | void release_discard_addrs(struct f2fs_sb_info *sbi) | ||
516 | { | ||
517 | struct list_head *head = &(SM_I(sbi)->discard_list); | ||
518 | struct discard_entry *entry, *this; | ||
519 | |||
520 | /* drop caches */ | ||
521 | list_for_each_entry_safe(entry, this, head, list) { | ||
522 | list_del(&entry->list); | ||
523 | kmem_cache_free(discard_entry_slab, entry); | ||
524 | } | ||
525 | } | ||
526 | |||
436 | /* | 527 | /* |
437 | * Should call clear_prefree_segments after checkpoint is done. | 528 | * Should call clear_prefree_segments after checkpoint is done. |
438 | */ | 529 | */ |
@@ -440,10 +531,9 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) | |||
440 | { | 531 | { |
441 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 532 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
442 | unsigned int segno; | 533 | unsigned int segno; |
443 | unsigned int total_segs = TOTAL_SEGS(sbi); | ||
444 | 534 | ||
445 | mutex_lock(&dirty_i->seglist_lock); | 535 | mutex_lock(&dirty_i->seglist_lock); |
446 | for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs) | 536 | for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) |
447 | __set_test_and_free(sbi, segno); | 537 | __set_test_and_free(sbi, segno); |
448 | mutex_unlock(&dirty_i->seglist_lock); | 538 | mutex_unlock(&dirty_i->seglist_lock); |
449 | } | 539 | } |
@@ -454,17 +544,17 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) | |||
454 | struct discard_entry *entry, *this; | 544 | struct discard_entry *entry, *this; |
455 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 545 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
456 | unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; | 546 | unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; |
457 | unsigned int total_segs = TOTAL_SEGS(sbi); | ||
458 | unsigned int start = 0, end = -1; | 547 | unsigned int start = 0, end = -1; |
459 | 548 | ||
460 | mutex_lock(&dirty_i->seglist_lock); | 549 | mutex_lock(&dirty_i->seglist_lock); |
461 | 550 | ||
462 | while (1) { | 551 | while (1) { |
463 | int i; | 552 | int i; |
464 | start = find_next_bit(prefree_map, total_segs, end + 1); | 553 | start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); |
465 | if (start >= total_segs) | 554 | if (start >= MAIN_SEGS(sbi)) |
466 | break; | 555 | break; |
467 | end = find_next_zero_bit(prefree_map, total_segs, start + 1); | 556 | end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), |
557 | start + 1); | ||
468 | 558 | ||
469 | for (i = start; i < end; i++) | 559 | for (i = start; i < end; i++) |
470 | clear_bit(i, prefree_map); | 560 | clear_bit(i, prefree_map); |
@@ -488,11 +578,16 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) | |||
488 | } | 578 | } |
489 | } | 579 | } |
490 | 580 | ||
491 | static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) | 581 | static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) |
492 | { | 582 | { |
493 | struct sit_info *sit_i = SIT_I(sbi); | 583 | struct sit_info *sit_i = SIT_I(sbi); |
494 | if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) | 584 | |
585 | if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { | ||
495 | sit_i->dirty_sentries++; | 586 | sit_i->dirty_sentries++; |
587 | return false; | ||
588 | } | ||
589 | |||
590 | return true; | ||
496 | } | 591 | } |
497 | 592 | ||
498 | static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, | 593 | static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, |
@@ -516,7 +611,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) | |||
516 | new_vblocks = se->valid_blocks + del; | 611 | new_vblocks = se->valid_blocks + del; |
517 | offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); | 612 | offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); |
518 | 613 | ||
519 | f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || | 614 | f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) || |
520 | (new_vblocks > sbi->blocks_per_seg))); | 615 | (new_vblocks > sbi->blocks_per_seg))); |
521 | 616 | ||
522 | se->valid_blocks = new_vblocks; | 617 | se->valid_blocks = new_vblocks; |
@@ -526,10 +621,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) | |||
526 | /* Update valid block bitmap */ | 621 | /* Update valid block bitmap */ |
527 | if (del > 0) { | 622 | if (del > 0) { |
528 | if (f2fs_set_bit(offset, se->cur_valid_map)) | 623 | if (f2fs_set_bit(offset, se->cur_valid_map)) |
529 | BUG(); | 624 | f2fs_bug_on(sbi, 1); |
530 | } else { | 625 | } else { |
531 | if (!f2fs_clear_bit(offset, se->cur_valid_map)) | 626 | if (!f2fs_clear_bit(offset, se->cur_valid_map)) |
532 | BUG(); | 627 | f2fs_bug_on(sbi, 1); |
533 | } | 628 | } |
534 | if (!f2fs_test_bit(offset, se->ckpt_valid_map)) | 629 | if (!f2fs_test_bit(offset, se->ckpt_valid_map)) |
535 | se->ckpt_valid_blocks += del; | 630 | se->ckpt_valid_blocks += del; |
@@ -558,7 +653,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) | |||
558 | unsigned int segno = GET_SEGNO(sbi, addr); | 653 | unsigned int segno = GET_SEGNO(sbi, addr); |
559 | struct sit_info *sit_i = SIT_I(sbi); | 654 | struct sit_info *sit_i = SIT_I(sbi); |
560 | 655 | ||
561 | f2fs_bug_on(addr == NULL_ADDR); | 656 | f2fs_bug_on(sbi, addr == NULL_ADDR); |
562 | if (addr == NEW_ADDR) | 657 | if (addr == NEW_ADDR) |
563 | return; | 658 | return; |
564 | 659 | ||
@@ -634,7 +729,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) | |||
634 | unsigned int segno = curseg->segno + 1; | 729 | unsigned int segno = curseg->segno + 1; |
635 | struct free_segmap_info *free_i = FREE_I(sbi); | 730 | struct free_segmap_info *free_i = FREE_I(sbi); |
636 | 731 | ||
637 | if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) | 732 | if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) |
638 | return !test_bit(segno, free_i->free_segmap); | 733 | return !test_bit(segno, free_i->free_segmap); |
639 | return 0; | 734 | return 0; |
640 | } | 735 | } |
@@ -648,7 +743,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, | |||
648 | { | 743 | { |
649 | struct free_segmap_info *free_i = FREE_I(sbi); | 744 | struct free_segmap_info *free_i = FREE_I(sbi); |
650 | unsigned int segno, secno, zoneno; | 745 | unsigned int segno, secno, zoneno; |
651 | unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; | 746 | unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; |
652 | unsigned int hint = *newseg / sbi->segs_per_sec; | 747 | unsigned int hint = *newseg / sbi->segs_per_sec; |
653 | unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); | 748 | unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); |
654 | unsigned int left_start = hint; | 749 | unsigned int left_start = hint; |
@@ -660,18 +755,18 @@ static void get_new_segment(struct f2fs_sb_info *sbi, | |||
660 | 755 | ||
661 | if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { | 756 | if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { |
662 | segno = find_next_zero_bit(free_i->free_segmap, | 757 | segno = find_next_zero_bit(free_i->free_segmap, |
663 | TOTAL_SEGS(sbi), *newseg + 1); | 758 | MAIN_SEGS(sbi), *newseg + 1); |
664 | if (segno - *newseg < sbi->segs_per_sec - | 759 | if (segno - *newseg < sbi->segs_per_sec - |
665 | (*newseg % sbi->segs_per_sec)) | 760 | (*newseg % sbi->segs_per_sec)) |
666 | goto got_it; | 761 | goto got_it; |
667 | } | 762 | } |
668 | find_other_zone: | 763 | find_other_zone: |
669 | secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); | 764 | secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); |
670 | if (secno >= TOTAL_SECS(sbi)) { | 765 | if (secno >= MAIN_SECS(sbi)) { |
671 | if (dir == ALLOC_RIGHT) { | 766 | if (dir == ALLOC_RIGHT) { |
672 | secno = find_next_zero_bit(free_i->free_secmap, | 767 | secno = find_next_zero_bit(free_i->free_secmap, |
673 | TOTAL_SECS(sbi), 0); | 768 | MAIN_SECS(sbi), 0); |
674 | f2fs_bug_on(secno >= TOTAL_SECS(sbi)); | 769 | f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); |
675 | } else { | 770 | } else { |
676 | go_left = 1; | 771 | go_left = 1; |
677 | left_start = hint - 1; | 772 | left_start = hint - 1; |
@@ -686,8 +781,8 @@ find_other_zone: | |||
686 | continue; | 781 | continue; |
687 | } | 782 | } |
688 | left_start = find_next_zero_bit(free_i->free_secmap, | 783 | left_start = find_next_zero_bit(free_i->free_secmap, |
689 | TOTAL_SECS(sbi), 0); | 784 | MAIN_SECS(sbi), 0); |
690 | f2fs_bug_on(left_start >= TOTAL_SECS(sbi)); | 785 | f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); |
691 | break; | 786 | break; |
692 | } | 787 | } |
693 | secno = left_start; | 788 | secno = left_start; |
@@ -726,7 +821,7 @@ skip_left: | |||
726 | } | 821 | } |
727 | got_it: | 822 | got_it: |
728 | /* set it as dirty segment in free segmap */ | 823 | /* set it as dirty segment in free segmap */ |
729 | f2fs_bug_on(test_bit(segno, free_i->free_segmap)); | 824 | f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); |
730 | __set_inuse(sbi, segno); | 825 | __set_inuse(sbi, segno); |
731 | *newseg = segno; | 826 | *newseg = segno; |
732 | write_unlock(&free_i->segmap_lock); | 827 | write_unlock(&free_i->segmap_lock); |
@@ -898,6 +993,37 @@ static const struct segment_allocation default_salloc_ops = { | |||
898 | .allocate_segment = allocate_segment_by_default, | 993 | .allocate_segment = allocate_segment_by_default, |
899 | }; | 994 | }; |
900 | 995 | ||
996 | int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) | ||
997 | { | ||
998 | __u64 start = range->start >> sbi->log_blocksize; | ||
999 | __u64 end = start + (range->len >> sbi->log_blocksize) - 1; | ||
1000 | unsigned int start_segno, end_segno; | ||
1001 | struct cp_control cpc; | ||
1002 | |||
1003 | if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) || | ||
1004 | range->len < sbi->blocksize) | ||
1005 | return -EINVAL; | ||
1006 | |||
1007 | if (end <= MAIN_BLKADDR(sbi)) | ||
1008 | goto out; | ||
1009 | |||
1010 | /* start/end segment number in main_area */ | ||
1011 | start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); | ||
1012 | end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : | ||
1013 | GET_SEGNO(sbi, end); | ||
1014 | cpc.reason = CP_DISCARD; | ||
1015 | cpc.trim_start = start_segno; | ||
1016 | cpc.trim_end = end_segno; | ||
1017 | cpc.trim_minlen = range->minlen >> sbi->log_blocksize; | ||
1018 | cpc.trimmed = 0; | ||
1019 | |||
1020 | /* do checkpoint to issue discard commands safely */ | ||
1021 | write_checkpoint(sbi, &cpc); | ||
1022 | out: | ||
1023 | range->len = cpc.trimmed << sbi->log_blocksize; | ||
1024 | return 0; | ||
1025 | } | ||
1026 | |||
901 | static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) | 1027 | static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) |
902 | { | 1028 | { |
903 | struct curseg_info *curseg = CURSEG_I(sbi, type); | 1029 | struct curseg_info *curseg = CURSEG_I(sbi, type); |
@@ -953,15 +1079,15 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) | |||
953 | 1079 | ||
954 | static int __get_segment_type(struct page *page, enum page_type p_type) | 1080 | static int __get_segment_type(struct page *page, enum page_type p_type) |
955 | { | 1081 | { |
956 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 1082 | switch (F2FS_P_SB(page)->active_logs) { |
957 | switch (sbi->active_logs) { | ||
958 | case 2: | 1083 | case 2: |
959 | return __get_segment_type_2(page, p_type); | 1084 | return __get_segment_type_2(page, p_type); |
960 | case 4: | 1085 | case 4: |
961 | return __get_segment_type_4(page, p_type); | 1086 | return __get_segment_type_4(page, p_type); |
962 | } | 1087 | } |
963 | /* NR_CURSEG_TYPE(6) logs by default */ | 1088 | /* NR_CURSEG_TYPE(6) logs by default */ |
964 | f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE); | 1089 | f2fs_bug_on(F2FS_P_SB(page), |
1090 | F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); | ||
965 | return __get_segment_type_6(page, p_type); | 1091 | return __get_segment_type_6(page, p_type); |
966 | } | 1092 | } |
967 | 1093 | ||
@@ -1041,11 +1167,11 @@ void write_node_page(struct f2fs_sb_info *sbi, struct page *page, | |||
1041 | void write_data_page(struct page *page, struct dnode_of_data *dn, | 1167 | void write_data_page(struct page *page, struct dnode_of_data *dn, |
1042 | block_t *new_blkaddr, struct f2fs_io_info *fio) | 1168 | block_t *new_blkaddr, struct f2fs_io_info *fio) |
1043 | { | 1169 | { |
1044 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 1170 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
1045 | struct f2fs_summary sum; | 1171 | struct f2fs_summary sum; |
1046 | struct node_info ni; | 1172 | struct node_info ni; |
1047 | 1173 | ||
1048 | f2fs_bug_on(dn->data_blkaddr == NULL_ADDR); | 1174 | f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); |
1049 | get_node_info(sbi, dn->nid, &ni); | 1175 | get_node_info(sbi, dn->nid, &ni); |
1050 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); | 1176 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); |
1051 | 1177 | ||
@@ -1055,9 +1181,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, | |||
1055 | void rewrite_data_page(struct page *page, block_t old_blkaddr, | 1181 | void rewrite_data_page(struct page *page, block_t old_blkaddr, |
1056 | struct f2fs_io_info *fio) | 1182 | struct f2fs_io_info *fio) |
1057 | { | 1183 | { |
1058 | struct inode *inode = page->mapping->host; | 1184 | f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio); |
1059 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
1060 | f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio); | ||
1061 | } | 1185 | } |
1062 | 1186 | ||
1063 | void recover_data_page(struct f2fs_sb_info *sbi, | 1187 | void recover_data_page(struct f2fs_sb_info *sbi, |
@@ -1130,8 +1254,9 @@ out: | |||
1130 | void f2fs_wait_on_page_writeback(struct page *page, | 1254 | void f2fs_wait_on_page_writeback(struct page *page, |
1131 | enum page_type type) | 1255 | enum page_type type) |
1132 | { | 1256 | { |
1133 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | ||
1134 | if (PageWriteback(page)) { | 1257 | if (PageWriteback(page)) { |
1258 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); | ||
1259 | |||
1135 | if (is_merged_page(sbi, page, type)) | 1260 | if (is_merged_page(sbi, page, type)) |
1136 | f2fs_submit_merged_bio(sbi, type, WRITE); | 1261 | f2fs_submit_merged_bio(sbi, type, WRITE); |
1137 | wait_on_page_writeback(page); | 1262 | wait_on_page_writeback(page); |
@@ -1400,7 +1525,7 @@ static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, | |||
1400 | unsigned int segno) | 1525 | unsigned int segno) |
1401 | { | 1526 | { |
1402 | struct sit_info *sit_i = SIT_I(sbi); | 1527 | struct sit_info *sit_i = SIT_I(sbi); |
1403 | unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); | 1528 | unsigned int offset = SIT_BLOCK_OFFSET(segno); |
1404 | block_t blk_addr = sit_i->sit_base_addr + offset; | 1529 | block_t blk_addr = sit_i->sit_base_addr + offset; |
1405 | 1530 | ||
1406 | check_seg_range(sbi, segno); | 1531 | check_seg_range(sbi, segno); |
@@ -1426,7 +1551,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, | |||
1426 | /* get current sit block page without lock */ | 1551 | /* get current sit block page without lock */ |
1427 | src_page = get_meta_page(sbi, src_off); | 1552 | src_page = get_meta_page(sbi, src_off); |
1428 | dst_page = grab_meta_page(sbi, dst_off); | 1553 | dst_page = grab_meta_page(sbi, dst_off); |
1429 | f2fs_bug_on(PageDirty(src_page)); | 1554 | f2fs_bug_on(sbi, PageDirty(src_page)); |
1430 | 1555 | ||
1431 | src_addr = page_address(src_page); | 1556 | src_addr = page_address(src_page); |
1432 | dst_addr = page_address(dst_page); | 1557 | dst_addr = page_address(dst_page); |
@@ -1440,101 +1565,192 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, | |||
1440 | return dst_page; | 1565 | return dst_page; |
1441 | } | 1566 | } |
1442 | 1567 | ||
1443 | static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) | 1568 | static struct sit_entry_set *grab_sit_entry_set(void) |
1569 | { | ||
1570 | struct sit_entry_set *ses = | ||
1571 | f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC); | ||
1572 | |||
1573 | ses->entry_cnt = 0; | ||
1574 | INIT_LIST_HEAD(&ses->set_list); | ||
1575 | return ses; | ||
1576 | } | ||
1577 | |||
1578 | static void release_sit_entry_set(struct sit_entry_set *ses) | ||
1579 | { | ||
1580 | list_del(&ses->set_list); | ||
1581 | kmem_cache_free(sit_entry_set_slab, ses); | ||
1582 | } | ||
1583 | |||
1584 | static void adjust_sit_entry_set(struct sit_entry_set *ses, | ||
1585 | struct list_head *head) | ||
1586 | { | ||
1587 | struct sit_entry_set *next = ses; | ||
1588 | |||
1589 | if (list_is_last(&ses->set_list, head)) | ||
1590 | return; | ||
1591 | |||
1592 | list_for_each_entry_continue(next, head, set_list) | ||
1593 | if (ses->entry_cnt <= next->entry_cnt) | ||
1594 | break; | ||
1595 | |||
1596 | list_move_tail(&ses->set_list, &next->set_list); | ||
1597 | } | ||
1598 | |||
1599 | static void add_sit_entry(unsigned int segno, struct list_head *head) | ||
1600 | { | ||
1601 | struct sit_entry_set *ses; | ||
1602 | unsigned int start_segno = START_SEGNO(segno); | ||
1603 | |||
1604 | list_for_each_entry(ses, head, set_list) { | ||
1605 | if (ses->start_segno == start_segno) { | ||
1606 | ses->entry_cnt++; | ||
1607 | adjust_sit_entry_set(ses, head); | ||
1608 | return; | ||
1609 | } | ||
1610 | } | ||
1611 | |||
1612 | ses = grab_sit_entry_set(); | ||
1613 | |||
1614 | ses->start_segno = start_segno; | ||
1615 | ses->entry_cnt++; | ||
1616 | list_add(&ses->set_list, head); | ||
1617 | } | ||
1618 | |||
1619 | static void add_sits_in_set(struct f2fs_sb_info *sbi) | ||
1620 | { | ||
1621 | struct f2fs_sm_info *sm_info = SM_I(sbi); | ||
1622 | struct list_head *set_list = &sm_info->sit_entry_set; | ||
1623 | unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; | ||
1624 | unsigned int segno; | ||
1625 | |||
1626 | for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) | ||
1627 | add_sit_entry(segno, set_list); | ||
1628 | } | ||
1629 | |||
1630 | static void remove_sits_in_journal(struct f2fs_sb_info *sbi) | ||
1444 | { | 1631 | { |
1445 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); | 1632 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); |
1446 | struct f2fs_summary_block *sum = curseg->sum_blk; | 1633 | struct f2fs_summary_block *sum = curseg->sum_blk; |
1447 | int i; | 1634 | int i; |
1448 | 1635 | ||
1449 | /* | 1636 | for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { |
1450 | * If the journal area in the current summary is full of sit entries, | 1637 | unsigned int segno; |
1451 | * all the sit entries will be flushed. Otherwise the sit entries | 1638 | bool dirtied; |
1452 | * are not able to replace with newly hot sit entries. | 1639 | |
1453 | */ | 1640 | segno = le32_to_cpu(segno_in_journal(sum, i)); |
1454 | if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { | 1641 | dirtied = __mark_sit_entry_dirty(sbi, segno); |
1455 | for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { | 1642 | |
1456 | unsigned int segno; | 1643 | if (!dirtied) |
1457 | segno = le32_to_cpu(segno_in_journal(sum, i)); | 1644 | add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); |
1458 | __mark_sit_entry_dirty(sbi, segno); | ||
1459 | } | ||
1460 | update_sits_in_cursum(sum, -sits_in_cursum(sum)); | ||
1461 | return true; | ||
1462 | } | 1645 | } |
1463 | return false; | 1646 | update_sits_in_cursum(sum, -sits_in_cursum(sum)); |
1464 | } | 1647 | } |
1465 | 1648 | ||
1466 | /* | 1649 | /* |
1467 | * CP calls this function, which flushes SIT entries including sit_journal, | 1650 | * CP calls this function, which flushes SIT entries including sit_journal, |
1468 | * and moves prefree segs to free segs. | 1651 | * and moves prefree segs to free segs. |
1469 | */ | 1652 | */ |
1470 | void flush_sit_entries(struct f2fs_sb_info *sbi) | 1653 | void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
1471 | { | 1654 | { |
1472 | struct sit_info *sit_i = SIT_I(sbi); | 1655 | struct sit_info *sit_i = SIT_I(sbi); |
1473 | unsigned long *bitmap = sit_i->dirty_sentries_bitmap; | 1656 | unsigned long *bitmap = sit_i->dirty_sentries_bitmap; |
1474 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); | 1657 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); |
1475 | struct f2fs_summary_block *sum = curseg->sum_blk; | 1658 | struct f2fs_summary_block *sum = curseg->sum_blk; |
1476 | unsigned long nsegs = TOTAL_SEGS(sbi); | 1659 | struct sit_entry_set *ses, *tmp; |
1477 | struct page *page = NULL; | 1660 | struct list_head *head = &SM_I(sbi)->sit_entry_set; |
1478 | struct f2fs_sit_block *raw_sit = NULL; | 1661 | bool to_journal = true; |
1479 | unsigned int start = 0, end = 0; | 1662 | struct seg_entry *se; |
1480 | unsigned int segno; | ||
1481 | bool flushed; | ||
1482 | 1663 | ||
1483 | mutex_lock(&curseg->curseg_mutex); | 1664 | mutex_lock(&curseg->curseg_mutex); |
1484 | mutex_lock(&sit_i->sentry_lock); | 1665 | mutex_lock(&sit_i->sentry_lock); |
1485 | 1666 | ||
1486 | /* | 1667 | /* |
1487 | * "flushed" indicates whether sit entries in journal are flushed | 1668 | * add and account sit entries of dirty bitmap in sit entry |
1488 | * to the SIT area or not. | 1669 | * set temporarily |
1489 | */ | 1670 | */ |
1490 | flushed = flush_sits_in_journal(sbi); | 1671 | add_sits_in_set(sbi); |
1491 | 1672 | ||
1492 | for_each_set_bit(segno, bitmap, nsegs) { | 1673 | /* |
1493 | struct seg_entry *se = get_seg_entry(sbi, segno); | 1674 | * if there are no enough space in journal to store dirty sit |
1494 | int sit_offset, offset; | 1675 | * entries, remove all entries from journal and add and account |
1676 | * them in sit entry set. | ||
1677 | */ | ||
1678 | if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) | ||
1679 | remove_sits_in_journal(sbi); | ||
1495 | 1680 | ||
1496 | sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); | 1681 | if (!sit_i->dirty_sentries) |
1682 | goto out; | ||
1497 | 1683 | ||
1498 | /* add discard candidates */ | 1684 | /* |
1499 | if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) | 1685 | * there are two steps to flush sit entries: |
1500 | add_discard_addrs(sbi, segno, se); | 1686 | * #1, flush sit entries to journal in current cold data summary block. |
1687 | * #2, flush sit entries to sit page. | ||
1688 | */ | ||
1689 | list_for_each_entry_safe(ses, tmp, head, set_list) { | ||
1690 | struct page *page; | ||
1691 | struct f2fs_sit_block *raw_sit = NULL; | ||
1692 | unsigned int start_segno = ses->start_segno; | ||
1693 | unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, | ||
1694 | (unsigned long)MAIN_SEGS(sbi)); | ||
1695 | unsigned int segno = start_segno; | ||
1696 | |||
1697 | if (to_journal && | ||
1698 | !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL)) | ||
1699 | to_journal = false; | ||
1700 | |||
1701 | if (!to_journal) { | ||
1702 | page = get_next_sit_page(sbi, start_segno); | ||
1703 | raw_sit = page_address(page); | ||
1704 | } | ||
1501 | 1705 | ||
1502 | if (flushed) | 1706 | /* flush dirty sit entries in region of current sit set */ |
1503 | goto to_sit_page; | 1707 | for_each_set_bit_from(segno, bitmap, end) { |
1708 | int offset, sit_offset; | ||
1504 | 1709 | ||
1505 | offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); | 1710 | se = get_seg_entry(sbi, segno); |
1506 | if (offset >= 0) { | 1711 | |
1507 | segno_in_journal(sum, offset) = cpu_to_le32(segno); | 1712 | /* add discard candidates */ |
1508 | seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); | 1713 | if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { |
1509 | goto flush_done; | 1714 | cpc->trim_start = segno; |
1510 | } | 1715 | add_discard_addrs(sbi, cpc); |
1511 | to_sit_page: | ||
1512 | if (!page || (start > segno) || (segno > end)) { | ||
1513 | if (page) { | ||
1514 | f2fs_put_page(page, 1); | ||
1515 | page = NULL; | ||
1516 | } | 1716 | } |
1517 | 1717 | ||
1518 | start = START_SEGNO(sit_i, segno); | 1718 | if (to_journal) { |
1519 | end = start + SIT_ENTRY_PER_BLOCK - 1; | 1719 | offset = lookup_journal_in_cursum(sum, |
1720 | SIT_JOURNAL, segno, 1); | ||
1721 | f2fs_bug_on(sbi, offset < 0); | ||
1722 | segno_in_journal(sum, offset) = | ||
1723 | cpu_to_le32(segno); | ||
1724 | seg_info_to_raw_sit(se, | ||
1725 | &sit_in_journal(sum, offset)); | ||
1726 | } else { | ||
1727 | sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); | ||
1728 | seg_info_to_raw_sit(se, | ||
1729 | &raw_sit->entries[sit_offset]); | ||
1730 | } | ||
1520 | 1731 | ||
1521 | /* read sit block that will be updated */ | 1732 | __clear_bit(segno, bitmap); |
1522 | page = get_next_sit_page(sbi, start); | 1733 | sit_i->dirty_sentries--; |
1523 | raw_sit = page_address(page); | 1734 | ses->entry_cnt--; |
1524 | } | 1735 | } |
1525 | 1736 | ||
1526 | /* udpate entry in SIT block */ | 1737 | if (!to_journal) |
1527 | seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); | 1738 | f2fs_put_page(page, 1); |
1528 | flush_done: | 1739 | |
1529 | __clear_bit(segno, bitmap); | 1740 | f2fs_bug_on(sbi, ses->entry_cnt); |
1530 | sit_i->dirty_sentries--; | 1741 | release_sit_entry_set(ses); |
1742 | } | ||
1743 | |||
1744 | f2fs_bug_on(sbi, !list_empty(head)); | ||
1745 | f2fs_bug_on(sbi, sit_i->dirty_sentries); | ||
1746 | out: | ||
1747 | if (cpc->reason == CP_DISCARD) { | ||
1748 | for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) | ||
1749 | add_discard_addrs(sbi, cpc); | ||
1531 | } | 1750 | } |
1532 | mutex_unlock(&sit_i->sentry_lock); | 1751 | mutex_unlock(&sit_i->sentry_lock); |
1533 | mutex_unlock(&curseg->curseg_mutex); | 1752 | mutex_unlock(&curseg->curseg_mutex); |
1534 | 1753 | ||
1535 | /* writeout last modified SIT block */ | ||
1536 | f2fs_put_page(page, 1); | ||
1537 | |||
1538 | set_prefree_as_free_segments(sbi); | 1754 | set_prefree_as_free_segments(sbi); |
1539 | } | 1755 | } |
1540 | 1756 | ||
@@ -1554,16 +1770,16 @@ static int build_sit_info(struct f2fs_sb_info *sbi) | |||
1554 | 1770 | ||
1555 | SM_I(sbi)->sit_info = sit_i; | 1771 | SM_I(sbi)->sit_info = sit_i; |
1556 | 1772 | ||
1557 | sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); | 1773 | sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry)); |
1558 | if (!sit_i->sentries) | 1774 | if (!sit_i->sentries) |
1559 | return -ENOMEM; | 1775 | return -ENOMEM; |
1560 | 1776 | ||
1561 | bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 1777 | bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); |
1562 | sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); | 1778 | sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); |
1563 | if (!sit_i->dirty_sentries_bitmap) | 1779 | if (!sit_i->dirty_sentries_bitmap) |
1564 | return -ENOMEM; | 1780 | return -ENOMEM; |
1565 | 1781 | ||
1566 | for (start = 0; start < TOTAL_SEGS(sbi); start++) { | 1782 | for (start = 0; start < MAIN_SEGS(sbi); start++) { |
1567 | sit_i->sentries[start].cur_valid_map | 1783 | sit_i->sentries[start].cur_valid_map |
1568 | = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); | 1784 | = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); |
1569 | sit_i->sentries[start].ckpt_valid_map | 1785 | sit_i->sentries[start].ckpt_valid_map |
@@ -1574,7 +1790,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) | |||
1574 | } | 1790 | } |
1575 | 1791 | ||
1576 | if (sbi->segs_per_sec > 1) { | 1792 | if (sbi->segs_per_sec > 1) { |
1577 | sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * | 1793 | sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * |
1578 | sizeof(struct sec_entry)); | 1794 | sizeof(struct sec_entry)); |
1579 | if (!sit_i->sec_entries) | 1795 | if (!sit_i->sec_entries) |
1580 | return -ENOMEM; | 1796 | return -ENOMEM; |
@@ -1609,7 +1825,6 @@ static int build_sit_info(struct f2fs_sb_info *sbi) | |||
1609 | 1825 | ||
1610 | static int build_free_segmap(struct f2fs_sb_info *sbi) | 1826 | static int build_free_segmap(struct f2fs_sb_info *sbi) |
1611 | { | 1827 | { |
1612 | struct f2fs_sm_info *sm_info = SM_I(sbi); | ||
1613 | struct free_segmap_info *free_i; | 1828 | struct free_segmap_info *free_i; |
1614 | unsigned int bitmap_size, sec_bitmap_size; | 1829 | unsigned int bitmap_size, sec_bitmap_size; |
1615 | 1830 | ||
@@ -1620,12 +1835,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) | |||
1620 | 1835 | ||
1621 | SM_I(sbi)->free_info = free_i; | 1836 | SM_I(sbi)->free_info = free_i; |
1622 | 1837 | ||
1623 | bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 1838 | bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); |
1624 | free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); | 1839 | free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); |
1625 | if (!free_i->free_segmap) | 1840 | if (!free_i->free_segmap) |
1626 | return -ENOMEM; | 1841 | return -ENOMEM; |
1627 | 1842 | ||
1628 | sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); | 1843 | sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); |
1629 | free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); | 1844 | free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); |
1630 | if (!free_i->free_secmap) | 1845 | if (!free_i->free_secmap) |
1631 | return -ENOMEM; | 1846 | return -ENOMEM; |
@@ -1635,8 +1850,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) | |||
1635 | memset(free_i->free_secmap, 0xff, sec_bitmap_size); | 1850 | memset(free_i->free_secmap, 0xff, sec_bitmap_size); |
1636 | 1851 | ||
1637 | /* init free segmap information */ | 1852 | /* init free segmap information */ |
1638 | free_i->start_segno = | 1853 | free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); |
1639 | (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); | ||
1640 | free_i->free_segments = 0; | 1854 | free_i->free_segments = 0; |
1641 | free_i->free_sections = 0; | 1855 | free_i->free_sections = 0; |
1642 | rwlock_init(&free_i->segmap_lock); | 1856 | rwlock_init(&free_i->segmap_lock); |
@@ -1673,7 +1887,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) | |||
1673 | int sit_blk_cnt = SIT_BLK_CNT(sbi); | 1887 | int sit_blk_cnt = SIT_BLK_CNT(sbi); |
1674 | unsigned int i, start, end; | 1888 | unsigned int i, start, end; |
1675 | unsigned int readed, start_blk = 0; | 1889 | unsigned int readed, start_blk = 0; |
1676 | int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 1890 | int nrpages = MAX_BIO_BLOCKS(sbi); |
1677 | 1891 | ||
1678 | do { | 1892 | do { |
1679 | readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); | 1893 | readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); |
@@ -1681,7 +1895,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) | |||
1681 | start = start_blk * sit_i->sents_per_block; | 1895 | start = start_blk * sit_i->sents_per_block; |
1682 | end = (start_blk + readed) * sit_i->sents_per_block; | 1896 | end = (start_blk + readed) * sit_i->sents_per_block; |
1683 | 1897 | ||
1684 | for (; start < end && start < TOTAL_SEGS(sbi); start++) { | 1898 | for (; start < end && start < MAIN_SEGS(sbi); start++) { |
1685 | struct seg_entry *se = &sit_i->sentries[start]; | 1899 | struct seg_entry *se = &sit_i->sentries[start]; |
1686 | struct f2fs_sit_block *sit_blk; | 1900 | struct f2fs_sit_block *sit_blk; |
1687 | struct f2fs_sit_entry sit; | 1901 | struct f2fs_sit_entry sit; |
@@ -1719,7 +1933,7 @@ static void init_free_segmap(struct f2fs_sb_info *sbi) | |||
1719 | unsigned int start; | 1933 | unsigned int start; |
1720 | int type; | 1934 | int type; |
1721 | 1935 | ||
1722 | for (start = 0; start < TOTAL_SEGS(sbi); start++) { | 1936 | for (start = 0; start < MAIN_SEGS(sbi); start++) { |
1723 | struct seg_entry *sentry = get_seg_entry(sbi, start); | 1937 | struct seg_entry *sentry = get_seg_entry(sbi, start); |
1724 | if (!sentry->valid_blocks) | 1938 | if (!sentry->valid_blocks) |
1725 | __set_free(sbi, start); | 1939 | __set_free(sbi, start); |
@@ -1736,18 +1950,22 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) | |||
1736 | { | 1950 | { |
1737 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 1951 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
1738 | struct free_segmap_info *free_i = FREE_I(sbi); | 1952 | struct free_segmap_info *free_i = FREE_I(sbi); |
1739 | unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); | 1953 | unsigned int segno = 0, offset = 0; |
1740 | unsigned short valid_blocks; | 1954 | unsigned short valid_blocks; |
1741 | 1955 | ||
1742 | while (1) { | 1956 | while (1) { |
1743 | /* find dirty segment based on free segmap */ | 1957 | /* find dirty segment based on free segmap */ |
1744 | segno = find_next_inuse(free_i, total_segs, offset); | 1958 | segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); |
1745 | if (segno >= total_segs) | 1959 | if (segno >= MAIN_SEGS(sbi)) |
1746 | break; | 1960 | break; |
1747 | offset = segno + 1; | 1961 | offset = segno + 1; |
1748 | valid_blocks = get_valid_blocks(sbi, segno, 0); | 1962 | valid_blocks = get_valid_blocks(sbi, segno, 0); |
1749 | if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) | 1963 | if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) |
1964 | continue; | ||
1965 | if (valid_blocks > sbi->blocks_per_seg) { | ||
1966 | f2fs_bug_on(sbi, 1); | ||
1750 | continue; | 1967 | continue; |
1968 | } | ||
1751 | mutex_lock(&dirty_i->seglist_lock); | 1969 | mutex_lock(&dirty_i->seglist_lock); |
1752 | __locate_dirty_segment(sbi, segno, DIRTY); | 1970 | __locate_dirty_segment(sbi, segno, DIRTY); |
1753 | mutex_unlock(&dirty_i->seglist_lock); | 1971 | mutex_unlock(&dirty_i->seglist_lock); |
@@ -1757,7 +1975,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) | |||
1757 | static int init_victim_secmap(struct f2fs_sb_info *sbi) | 1975 | static int init_victim_secmap(struct f2fs_sb_info *sbi) |
1758 | { | 1976 | { |
1759 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 1977 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
1760 | unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); | 1978 | unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); |
1761 | 1979 | ||
1762 | dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); | 1980 | dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); |
1763 | if (!dirty_i->victim_secmap) | 1981 | if (!dirty_i->victim_secmap) |
@@ -1778,7 +1996,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) | |||
1778 | SM_I(sbi)->dirty_info = dirty_i; | 1996 | SM_I(sbi)->dirty_info = dirty_i; |
1779 | mutex_init(&dirty_i->seglist_lock); | 1997 | mutex_init(&dirty_i->seglist_lock); |
1780 | 1998 | ||
1781 | bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 1999 | bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); |
1782 | 2000 | ||
1783 | for (i = 0; i < NR_DIRTY_TYPE; i++) { | 2001 | for (i = 0; i < NR_DIRTY_TYPE; i++) { |
1784 | dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); | 2002 | dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); |
@@ -1802,7 +2020,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) | |||
1802 | 2020 | ||
1803 | sit_i->min_mtime = LLONG_MAX; | 2021 | sit_i->min_mtime = LLONG_MAX; |
1804 | 2022 | ||
1805 | for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { | 2023 | for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { |
1806 | unsigned int i; | 2024 | unsigned int i; |
1807 | unsigned long long mtime = 0; | 2025 | unsigned long long mtime = 0; |
1808 | 2026 | ||
@@ -1840,13 +2058,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi) | |||
1840 | sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); | 2058 | sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); |
1841 | sm_info->rec_prefree_segments = sm_info->main_segments * | 2059 | sm_info->rec_prefree_segments = sm_info->main_segments * |
1842 | DEF_RECLAIM_PREFREE_SEGMENTS / 100; | 2060 | DEF_RECLAIM_PREFREE_SEGMENTS / 100; |
1843 | sm_info->ipu_policy = F2FS_IPU_DISABLE; | 2061 | sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; |
1844 | sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; | 2062 | sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; |
2063 | sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; | ||
1845 | 2064 | ||
1846 | INIT_LIST_HEAD(&sm_info->discard_list); | 2065 | INIT_LIST_HEAD(&sm_info->discard_list); |
1847 | sm_info->nr_discards = 0; | 2066 | sm_info->nr_discards = 0; |
1848 | sm_info->max_discards = 0; | 2067 | sm_info->max_discards = 0; |
1849 | 2068 | ||
2069 | INIT_LIST_HEAD(&sm_info->sit_entry_set); | ||
2070 | |||
1850 | if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { | 2071 | if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { |
1851 | err = create_flush_cmd_control(sbi); | 2072 | err = create_flush_cmd_control(sbi); |
1852 | if (err) | 2073 | if (err) |
@@ -1942,7 +2163,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) | |||
1942 | return; | 2163 | return; |
1943 | 2164 | ||
1944 | if (sit_i->sentries) { | 2165 | if (sit_i->sentries) { |
1945 | for (start = 0; start < TOTAL_SEGS(sbi); start++) { | 2166 | for (start = 0; start < MAIN_SEGS(sbi); start++) { |
1946 | kfree(sit_i->sentries[start].cur_valid_map); | 2167 | kfree(sit_i->sentries[start].cur_valid_map); |
1947 | kfree(sit_i->sentries[start].ckpt_valid_map); | 2168 | kfree(sit_i->sentries[start].ckpt_valid_map); |
1948 | } | 2169 | } |
@@ -1976,11 +2197,30 @@ int __init create_segment_manager_caches(void) | |||
1976 | discard_entry_slab = f2fs_kmem_cache_create("discard_entry", | 2197 | discard_entry_slab = f2fs_kmem_cache_create("discard_entry", |
1977 | sizeof(struct discard_entry)); | 2198 | sizeof(struct discard_entry)); |
1978 | if (!discard_entry_slab) | 2199 | if (!discard_entry_slab) |
1979 | return -ENOMEM; | 2200 | goto fail; |
2201 | |||
2202 | sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", | ||
2203 | sizeof(struct nat_entry_set)); | ||
2204 | if (!sit_entry_set_slab) | ||
2205 | goto destory_discard_entry; | ||
2206 | |||
2207 | inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", | ||
2208 | sizeof(struct inmem_pages)); | ||
2209 | if (!inmem_entry_slab) | ||
2210 | goto destroy_sit_entry_set; | ||
1980 | return 0; | 2211 | return 0; |
2212 | |||
2213 | destroy_sit_entry_set: | ||
2214 | kmem_cache_destroy(sit_entry_set_slab); | ||
2215 | destory_discard_entry: | ||
2216 | kmem_cache_destroy(discard_entry_slab); | ||
2217 | fail: | ||
2218 | return -ENOMEM; | ||
1981 | } | 2219 | } |
1982 | 2220 | ||
1983 | void destroy_segment_manager_caches(void) | 2221 | void destroy_segment_manager_caches(void) |
1984 | { | 2222 | { |
2223 | kmem_cache_destroy(sit_entry_set_slab); | ||
1985 | kmem_cache_destroy(discard_entry_slab); | 2224 | kmem_cache_destroy(discard_entry_slab); |
2225 | kmem_cache_destroy(inmem_entry_slab); | ||
1986 | } | 2226 | } |
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index ff483257283b..2495bec1c621 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h | |||
@@ -45,16 +45,26 @@ | |||
45 | (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ | 45 | (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ |
46 | sbi->segs_per_sec)) \ | 46 | sbi->segs_per_sec)) \ |
47 | 47 | ||
48 | #define START_BLOCK(sbi, segno) \ | 48 | #define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) |
49 | (SM_I(sbi)->seg0_blkaddr + \ | 49 | #define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) |
50 | |||
51 | #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) | ||
52 | #define MAIN_SECS(sbi) (sbi->total_sections) | ||
53 | |||
54 | #define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) | ||
55 | #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) | ||
56 | |||
57 | #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) | ||
58 | #define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \ | ||
59 | sbi->log_blocks_per_seg)) | ||
60 | |||
61 | #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ | ||
50 | (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) | 62 | (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) |
63 | |||
51 | #define NEXT_FREE_BLKADDR(sbi, curseg) \ | 64 | #define NEXT_FREE_BLKADDR(sbi, curseg) \ |
52 | (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) | 65 | (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) |
53 | 66 | ||
54 | #define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) | 67 | #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) |
55 | |||
56 | #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ | ||
57 | ((blk_addr) - SM_I(sbi)->seg0_blkaddr) | ||
58 | #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ | 68 | #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ |
59 | (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) | 69 | (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) |
60 | #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ | 70 | #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ |
@@ -77,23 +87,21 @@ | |||
77 | 87 | ||
78 | #define SIT_ENTRY_OFFSET(sit_i, segno) \ | 88 | #define SIT_ENTRY_OFFSET(sit_i, segno) \ |
79 | (segno % sit_i->sents_per_block) | 89 | (segno % sit_i->sents_per_block) |
80 | #define SIT_BLOCK_OFFSET(sit_i, segno) \ | 90 | #define SIT_BLOCK_OFFSET(segno) \ |
81 | (segno / SIT_ENTRY_PER_BLOCK) | 91 | (segno / SIT_ENTRY_PER_BLOCK) |
82 | #define START_SEGNO(sit_i, segno) \ | 92 | #define START_SEGNO(segno) \ |
83 | (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) | 93 | (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) |
84 | #define SIT_BLK_CNT(sbi) \ | 94 | #define SIT_BLK_CNT(sbi) \ |
85 | ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) | 95 | ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) |
86 | #define f2fs_bitmap_size(nr) \ | 96 | #define f2fs_bitmap_size(nr) \ |
87 | (BITS_TO_LONGS(nr) * sizeof(unsigned long)) | 97 | (BITS_TO_LONGS(nr) * sizeof(unsigned long)) |
88 | #define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) | ||
89 | #define TOTAL_SECS(sbi) (sbi->total_sections) | ||
90 | 98 | ||
91 | #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ | 99 | #define SECTOR_FROM_BLOCK(blk_addr) \ |
92 | (((sector_t)blk_addr) << (sbi)->log_sectors_per_block) | 100 | (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) |
93 | #define SECTOR_TO_BLOCK(sbi, sectors) \ | 101 | #define SECTOR_TO_BLOCK(sectors) \ |
94 | (sectors >> (sbi)->log_sectors_per_block) | 102 | (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) |
95 | #define MAX_BIO_BLOCKS(max_hw_blocks) \ | 103 | #define MAX_BIO_BLOCKS(sbi) \ |
96 | (min((int)max_hw_blocks, BIO_MAX_PAGES)) | 104 | ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES)) |
97 | 105 | ||
98 | /* | 106 | /* |
99 | * indicate a block allocation direction: RIGHT and LEFT. | 107 | * indicate a block allocation direction: RIGHT and LEFT. |
@@ -167,6 +175,11 @@ struct segment_allocation { | |||
167 | void (*allocate_segment)(struct f2fs_sb_info *, int, bool); | 175 | void (*allocate_segment)(struct f2fs_sb_info *, int, bool); |
168 | }; | 176 | }; |
169 | 177 | ||
178 | struct inmem_pages { | ||
179 | struct list_head list; | ||
180 | struct page *page; | ||
181 | }; | ||
182 | |||
170 | struct sit_info { | 183 | struct sit_info { |
171 | const struct segment_allocation *s_ops; | 184 | const struct segment_allocation *s_ops; |
172 | 185 | ||
@@ -237,6 +250,12 @@ struct curseg_info { | |||
237 | unsigned int next_segno; /* preallocated segment */ | 250 | unsigned int next_segno; /* preallocated segment */ |
238 | }; | 251 | }; |
239 | 252 | ||
253 | struct sit_entry_set { | ||
254 | struct list_head set_list; /* link with all sit sets */ | ||
255 | unsigned int start_segno; /* start segno of sits in set */ | ||
256 | unsigned int entry_cnt; /* the # of sit entries in set */ | ||
257 | }; | ||
258 | |||
240 | /* | 259 | /* |
241 | * inline functions | 260 | * inline functions |
242 | */ | 261 | */ |
@@ -316,7 +335,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) | |||
316 | clear_bit(segno, free_i->free_segmap); | 335 | clear_bit(segno, free_i->free_segmap); |
317 | free_i->free_segments++; | 336 | free_i->free_segments++; |
318 | 337 | ||
319 | next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); | 338 | next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); |
320 | if (next >= start_segno + sbi->segs_per_sec) { | 339 | if (next >= start_segno + sbi->segs_per_sec) { |
321 | clear_bit(secno, free_i->free_secmap); | 340 | clear_bit(secno, free_i->free_secmap); |
322 | free_i->free_sections++; | 341 | free_i->free_sections++; |
@@ -430,8 +449,10 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) | |||
430 | 449 | ||
431 | static inline bool need_SSR(struct f2fs_sb_info *sbi) | 450 | static inline bool need_SSR(struct f2fs_sb_info *sbi) |
432 | { | 451 | { |
433 | return (prefree_segments(sbi) / sbi->segs_per_sec) | 452 | int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); |
434 | + free_sections(sbi) < overprovision_sections(sbi); | 453 | int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); |
454 | return free_sections(sbi) <= (node_secs + 2 * dent_secs + | ||
455 | reserved_sections(sbi) + 1); | ||
435 | } | 456 | } |
436 | 457 | ||
437 | static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) | 458 | static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) |
@@ -466,48 +487,47 @@ static inline int utilization(struct f2fs_sb_info *sbi) | |||
466 | * F2FS_IPU_UTIL - if FS utilization is over threashold, | 487 | * F2FS_IPU_UTIL - if FS utilization is over threashold, |
467 | * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over | 488 | * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over |
468 | * threashold, | 489 | * threashold, |
490 | * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash | ||
491 | * storages. IPU will be triggered only if the # of dirty | ||
492 | * pages over min_fsync_blocks. | ||
469 | * F2FS_IPUT_DISABLE - disable IPU. (=default option) | 493 | * F2FS_IPUT_DISABLE - disable IPU. (=default option) |
470 | */ | 494 | */ |
471 | #define DEF_MIN_IPU_UTIL 70 | 495 | #define DEF_MIN_IPU_UTIL 70 |
496 | #define DEF_MIN_FSYNC_BLOCKS 8 | ||
472 | 497 | ||
473 | enum { | 498 | enum { |
474 | F2FS_IPU_FORCE, | 499 | F2FS_IPU_FORCE, |
475 | F2FS_IPU_SSR, | 500 | F2FS_IPU_SSR, |
476 | F2FS_IPU_UTIL, | 501 | F2FS_IPU_UTIL, |
477 | F2FS_IPU_SSR_UTIL, | 502 | F2FS_IPU_SSR_UTIL, |
478 | F2FS_IPU_DISABLE, | 503 | F2FS_IPU_FSYNC, |
479 | }; | 504 | }; |
480 | 505 | ||
481 | static inline bool need_inplace_update(struct inode *inode) | 506 | static inline bool need_inplace_update(struct inode *inode) |
482 | { | 507 | { |
483 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 508 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
509 | unsigned int policy = SM_I(sbi)->ipu_policy; | ||
484 | 510 | ||
485 | /* IPU can be done only for the user data */ | 511 | /* IPU can be done only for the user data */ |
486 | if (S_ISDIR(inode->i_mode)) | 512 | if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) |
487 | return false; | 513 | return false; |
488 | 514 | ||
489 | /* this is only set during fdatasync */ | 515 | if (policy & (0x1 << F2FS_IPU_FORCE)) |
490 | if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) | 516 | return true; |
517 | if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) | ||
518 | return true; | ||
519 | if (policy & (0x1 << F2FS_IPU_UTIL) && | ||
520 | utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
521 | return true; | ||
522 | if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && | ||
523 | utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
491 | return true; | 524 | return true; |
492 | 525 | ||
493 | switch (SM_I(sbi)->ipu_policy) { | 526 | /* this is only set during fdatasync */ |
494 | case F2FS_IPU_FORCE: | 527 | if (policy & (0x1 << F2FS_IPU_FSYNC) && |
528 | is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) | ||
495 | return true; | 529 | return true; |
496 | case F2FS_IPU_SSR: | 530 | |
497 | if (need_SSR(sbi)) | ||
498 | return true; | ||
499 | break; | ||
500 | case F2FS_IPU_UTIL: | ||
501 | if (utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
502 | return true; | ||
503 | break; | ||
504 | case F2FS_IPU_SSR_UTIL: | ||
505 | if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
506 | return true; | ||
507 | break; | ||
508 | case F2FS_IPU_DISABLE: | ||
509 | break; | ||
510 | } | ||
511 | return false; | 531 | return false; |
512 | } | 532 | } |
513 | 533 | ||
@@ -534,18 +554,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) | |||
534 | #ifdef CONFIG_F2FS_CHECK_FS | 554 | #ifdef CONFIG_F2FS_CHECK_FS |
535 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) | 555 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) |
536 | { | 556 | { |
537 | unsigned int end_segno = SM_I(sbi)->segment_count - 1; | 557 | BUG_ON(segno > TOTAL_SEGS(sbi) - 1); |
538 | BUG_ON(segno > end_segno); | ||
539 | } | 558 | } |
540 | 559 | ||
541 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | 560 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) |
542 | { | 561 | { |
543 | struct f2fs_sm_info *sm_info = SM_I(sbi); | 562 | BUG_ON(blk_addr < SEG0_BLKADDR(sbi)); |
544 | block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; | 563 | BUG_ON(blk_addr >= MAX_BLKADDR(sbi)); |
545 | block_t start_addr = sm_info->seg0_blkaddr; | ||
546 | block_t end_addr = start_addr + total_blks - 1; | ||
547 | BUG_ON(blk_addr < start_addr); | ||
548 | BUG_ON(blk_addr > end_addr); | ||
549 | } | 564 | } |
550 | 565 | ||
551 | /* | 566 | /* |
@@ -554,8 +569,6 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | |||
554 | static inline void check_block_count(struct f2fs_sb_info *sbi, | 569 | static inline void check_block_count(struct f2fs_sb_info *sbi, |
555 | int segno, struct f2fs_sit_entry *raw_sit) | 570 | int segno, struct f2fs_sit_entry *raw_sit) |
556 | { | 571 | { |
557 | struct f2fs_sm_info *sm_info = SM_I(sbi); | ||
558 | unsigned int end_segno = sm_info->segment_count - 1; | ||
559 | bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; | 572 | bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; |
560 | int valid_blocks = 0; | 573 | int valid_blocks = 0; |
561 | int cur_pos = 0, next_pos; | 574 | int cur_pos = 0, next_pos; |
@@ -564,7 +577,7 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, | |||
564 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); | 577 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); |
565 | 578 | ||
566 | /* check boundary of a given segment number */ | 579 | /* check boundary of a given segment number */ |
567 | BUG_ON(segno > end_segno); | 580 | BUG_ON(segno > TOTAL_SEGS(sbi) - 1); |
568 | 581 | ||
569 | /* check bitmap with valid block count */ | 582 | /* check bitmap with valid block count */ |
570 | do { | 583 | do { |
@@ -583,16 +596,39 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, | |||
583 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); | 596 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); |
584 | } | 597 | } |
585 | #else | 598 | #else |
586 | #define check_seg_range(sbi, segno) | 599 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) |
587 | #define verify_block_addr(sbi, blk_addr) | 600 | { |
588 | #define check_block_count(sbi, segno, raw_sit) | 601 | if (segno > TOTAL_SEGS(sbi) - 1) |
602 | sbi->need_fsck = true; | ||
603 | } | ||
604 | |||
605 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | ||
606 | { | ||
607 | if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) | ||
608 | sbi->need_fsck = true; | ||
609 | } | ||
610 | |||
611 | /* | ||
612 | * Summary block is always treated as an invalid block | ||
613 | */ | ||
614 | static inline void check_block_count(struct f2fs_sb_info *sbi, | ||
615 | int segno, struct f2fs_sit_entry *raw_sit) | ||
616 | { | ||
617 | /* check segment usage */ | ||
618 | if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) | ||
619 | sbi->need_fsck = true; | ||
620 | |||
621 | /* check boundary of a given segment number */ | ||
622 | if (segno > TOTAL_SEGS(sbi) - 1) | ||
623 | sbi->need_fsck = true; | ||
624 | } | ||
589 | #endif | 625 | #endif |
590 | 626 | ||
591 | static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, | 627 | static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, |
592 | unsigned int start) | 628 | unsigned int start) |
593 | { | 629 | { |
594 | struct sit_info *sit_i = SIT_I(sbi); | 630 | struct sit_info *sit_i = SIT_I(sbi); |
595 | unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); | 631 | unsigned int offset = SIT_BLOCK_OFFSET(start); |
596 | block_t blk_addr = sit_i->sit_base_addr + offset; | 632 | block_t blk_addr = sit_i->sit_base_addr + offset; |
597 | 633 | ||
598 | check_seg_range(sbi, start); | 634 | check_seg_range(sbi, start); |
@@ -619,7 +655,7 @@ static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, | |||
619 | 655 | ||
620 | static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) | 656 | static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) |
621 | { | 657 | { |
622 | unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); | 658 | unsigned int block_off = SIT_BLOCK_OFFSET(start); |
623 | 659 | ||
624 | if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) | 660 | if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) |
625 | f2fs_clear_bit(block_off, sit_i->sit_bitmap); | 661 | f2fs_clear_bit(block_off, sit_i->sit_bitmap); |
@@ -666,7 +702,7 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) | |||
666 | { | 702 | { |
667 | struct block_device *bdev = sbi->sb->s_bdev; | 703 | struct block_device *bdev = sbi->sb->s_bdev; |
668 | struct request_queue *q = bdev_get_queue(bdev); | 704 | struct request_queue *q = bdev_get_queue(bdev); |
669 | return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); | 705 | return SECTOR_TO_BLOCK(queue_max_sectors(q)); |
670 | } | 706 | } |
671 | 707 | ||
672 | /* | 708 | /* |
@@ -683,7 +719,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) | |||
683 | else if (type == NODE) | 719 | else if (type == NODE) |
684 | return 3 * sbi->blocks_per_seg; | 720 | return 3 * sbi->blocks_per_seg; |
685 | else if (type == META) | 721 | else if (type == META) |
686 | return MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 722 | return MAX_BIO_BLOCKS(sbi); |
687 | else | 723 | else |
688 | return 0; | 724 | return 0; |
689 | } | 725 | } |
@@ -706,7 +742,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, | |||
706 | else if (type == NODE) | 742 | else if (type == NODE) |
707 | desired = 3 * max_hw_blocks(sbi); | 743 | desired = 3 * max_hw_blocks(sbi); |
708 | else | 744 | else |
709 | desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 745 | desired = MAX_BIO_BLOCKS(sbi); |
710 | 746 | ||
711 | wbc->nr_to_write = desired; | 747 | wbc->nr_to_write = desired; |
712 | return desired - nr_to_write; | 748 | return desired - nr_to_write; |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41bdf511003d..41d6f700f4ee 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -190,6 +190,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); | |||
190 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); | 190 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); |
191 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); | 191 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); |
192 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); | 192 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); |
193 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); | ||
193 | F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); | 194 | F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); |
194 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); | 195 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); |
195 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); | 196 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); |
@@ -204,6 +205,7 @@ static struct attribute *f2fs_attrs[] = { | |||
204 | ATTR_LIST(max_small_discards), | 205 | ATTR_LIST(max_small_discards), |
205 | ATTR_LIST(ipu_policy), | 206 | ATTR_LIST(ipu_policy), |
206 | ATTR_LIST(min_ipu_util), | 207 | ATTR_LIST(min_ipu_util), |
208 | ATTR_LIST(min_fsync_blocks), | ||
207 | ATTR_LIST(max_victim_search), | 209 | ATTR_LIST(max_victim_search), |
208 | ATTR_LIST(dir_level), | 210 | ATTR_LIST(dir_level), |
209 | ATTR_LIST(ram_thresh), | 211 | ATTR_LIST(ram_thresh), |
@@ -366,11 +368,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) | |||
366 | 368 | ||
367 | /* Initialize f2fs-specific inode info */ | 369 | /* Initialize f2fs-specific inode info */ |
368 | fi->vfs_inode.i_version = 1; | 370 | fi->vfs_inode.i_version = 1; |
369 | atomic_set(&fi->dirty_dents, 0); | 371 | atomic_set(&fi->dirty_pages, 0); |
370 | fi->i_current_depth = 1; | 372 | fi->i_current_depth = 1; |
371 | fi->i_advise = 0; | 373 | fi->i_advise = 0; |
372 | rwlock_init(&fi->ext.ext_lock); | 374 | rwlock_init(&fi->ext.ext_lock); |
373 | init_rwsem(&fi->i_sem); | 375 | init_rwsem(&fi->i_sem); |
376 | INIT_LIST_HEAD(&fi->inmem_pages); | ||
377 | mutex_init(&fi->inmem_lock); | ||
374 | 378 | ||
375 | set_inode_flag(fi, FI_NEW_INODE); | 379 | set_inode_flag(fi, FI_NEW_INODE); |
376 | 380 | ||
@@ -432,14 +436,19 @@ static void f2fs_put_super(struct super_block *sb) | |||
432 | stop_gc_thread(sbi); | 436 | stop_gc_thread(sbi); |
433 | 437 | ||
434 | /* We don't need to do checkpoint when it's clean */ | 438 | /* We don't need to do checkpoint when it's clean */ |
435 | if (sbi->s_dirty) | 439 | if (sbi->s_dirty) { |
436 | write_checkpoint(sbi, true); | 440 | struct cp_control cpc = { |
441 | .reason = CP_UMOUNT, | ||
442 | }; | ||
443 | write_checkpoint(sbi, &cpc); | ||
444 | } | ||
437 | 445 | ||
438 | /* | 446 | /* |
439 | * normally superblock is clean, so we need to release this. | 447 | * normally superblock is clean, so we need to release this. |
440 | * In addition, EIO will skip do checkpoint, we need this as well. | 448 | * In addition, EIO will skip do checkpoint, we need this as well. |
441 | */ | 449 | */ |
442 | release_dirty_inode(sbi); | 450 | release_dirty_inode(sbi); |
451 | release_discard_addrs(sbi); | ||
443 | 452 | ||
444 | iput(sbi->node_inode); | 453 | iput(sbi->node_inode); |
445 | iput(sbi->meta_inode); | 454 | iput(sbi->meta_inode); |
@@ -464,8 +473,11 @@ int f2fs_sync_fs(struct super_block *sb, int sync) | |||
464 | trace_f2fs_sync_fs(sb, sync); | 473 | trace_f2fs_sync_fs(sb, sync); |
465 | 474 | ||
466 | if (sync) { | 475 | if (sync) { |
476 | struct cp_control cpc = { | ||
477 | .reason = CP_SYNC, | ||
478 | }; | ||
467 | mutex_lock(&sbi->gc_mutex); | 479 | mutex_lock(&sbi->gc_mutex); |
468 | write_checkpoint(sbi, false); | 480 | write_checkpoint(sbi, &cpc); |
469 | mutex_unlock(&sbi->gc_mutex); | 481 | mutex_unlock(&sbi->gc_mutex); |
470 | } else { | 482 | } else { |
471 | f2fs_balance_fs(sbi); | 483 | f2fs_balance_fs(sbi); |
@@ -616,6 +628,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) | |||
616 | org_mount_opt = sbi->mount_opt; | 628 | org_mount_opt = sbi->mount_opt; |
617 | active_logs = sbi->active_logs; | 629 | active_logs = sbi->active_logs; |
618 | 630 | ||
631 | sbi->mount_opt.opt = 0; | ||
632 | sbi->active_logs = NR_CURSEG_TYPE; | ||
633 | |||
619 | /* parse mount options */ | 634 | /* parse mount options */ |
620 | err = parse_options(sb, data); | 635 | err = parse_options(sb, data); |
621 | if (err) | 636 | if (err) |
@@ -786,14 +801,22 @@ static int sanity_check_raw_super(struct super_block *sb, | |||
786 | return 1; | 801 | return 1; |
787 | } | 802 | } |
788 | 803 | ||
789 | if (le32_to_cpu(raw_super->log_sectorsize) != | 804 | /* Currently, support 512/1024/2048/4096 bytes sector size */ |
790 | F2FS_LOG_SECTOR_SIZE) { | 805 | if (le32_to_cpu(raw_super->log_sectorsize) > |
791 | f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); | 806 | F2FS_MAX_LOG_SECTOR_SIZE || |
807 | le32_to_cpu(raw_super->log_sectorsize) < | ||
808 | F2FS_MIN_LOG_SECTOR_SIZE) { | ||
809 | f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)", | ||
810 | le32_to_cpu(raw_super->log_sectorsize)); | ||
792 | return 1; | 811 | return 1; |
793 | } | 812 | } |
794 | if (le32_to_cpu(raw_super->log_sectors_per_block) != | 813 | if (le32_to_cpu(raw_super->log_sectors_per_block) + |
795 | F2FS_LOG_SECTORS_PER_BLOCK) { | 814 | le32_to_cpu(raw_super->log_sectorsize) != |
796 | f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); | 815 | F2FS_MAX_LOG_SECTOR_SIZE) { |
816 | f2fs_msg(sb, KERN_INFO, | ||
817 | "Invalid log sectors per block(%u) log sectorsize(%u)", | ||
818 | le32_to_cpu(raw_super->log_sectors_per_block), | ||
819 | le32_to_cpu(raw_super->log_sectorsize)); | ||
797 | return 1; | 820 | return 1; |
798 | } | 821 | } |
799 | return 0; | 822 | return 0; |
@@ -849,6 +872,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) | |||
849 | atomic_set(&sbi->nr_pages[i], 0); | 872 | atomic_set(&sbi->nr_pages[i], 0); |
850 | 873 | ||
851 | sbi->dir_level = DEF_DIR_LEVEL; | 874 | sbi->dir_level = DEF_DIR_LEVEL; |
875 | sbi->need_fsck = false; | ||
852 | } | 876 | } |
853 | 877 | ||
854 | /* | 878 | /* |
@@ -1082,6 +1106,9 @@ try_onemore: | |||
1082 | if (err) | 1106 | if (err) |
1083 | goto free_proc; | 1107 | goto free_proc; |
1084 | 1108 | ||
1109 | if (!retry) | ||
1110 | sbi->need_fsck = true; | ||
1111 | |||
1085 | /* recover fsynced data */ | 1112 | /* recover fsynced data */ |
1086 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { | 1113 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { |
1087 | err = recover_fsync_data(sbi); | 1114 | err = recover_fsync_data(sbi); |
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 728a5dc3dc16..deca8728117b 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c | |||
@@ -266,7 +266,7 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, | |||
266 | 266 | ||
267 | static void *read_all_xattrs(struct inode *inode, struct page *ipage) | 267 | static void *read_all_xattrs(struct inode *inode, struct page *ipage) |
268 | { | 268 | { |
269 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 269 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
270 | struct f2fs_xattr_header *header; | 270 | struct f2fs_xattr_header *header; |
271 | size_t size = PAGE_SIZE, inline_size = 0; | 271 | size_t size = PAGE_SIZE, inline_size = 0; |
272 | void *txattr_addr; | 272 | void *txattr_addr; |
@@ -325,7 +325,7 @@ fail: | |||
325 | static inline int write_all_xattrs(struct inode *inode, __u32 hsize, | 325 | static inline int write_all_xattrs(struct inode *inode, __u32 hsize, |
326 | void *txattr_addr, struct page *ipage) | 326 | void *txattr_addr, struct page *ipage) |
327 | { | 327 | { |
328 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 328 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
329 | size_t inline_size = 0; | 329 | size_t inline_size = 0; |
330 | void *xattr_addr; | 330 | void *xattr_addr; |
331 | struct page *xpage; | 331 | struct page *xpage; |
@@ -373,7 +373,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, | |||
373 | alloc_nid_failed(sbi, new_nid); | 373 | alloc_nid_failed(sbi, new_nid); |
374 | return PTR_ERR(xpage); | 374 | return PTR_ERR(xpage); |
375 | } | 375 | } |
376 | f2fs_bug_on(new_nid); | 376 | f2fs_bug_on(sbi, new_nid); |
377 | f2fs_wait_on_page_writeback(xpage, NODE); | 377 | f2fs_wait_on_page_writeback(xpage, NODE); |
378 | } else { | 378 | } else { |
379 | struct dnode_of_data dn; | 379 | struct dnode_of_data dn; |
@@ -596,7 +596,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, | |||
596 | const void *value, size_t size, | 596 | const void *value, size_t size, |
597 | struct page *ipage, int flags) | 597 | struct page *ipage, int flags) |
598 | { | 598 | { |
599 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 599 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
600 | int err; | 600 | int err; |
601 | 601 | ||
602 | /* this case is only from init_inode_metadata */ | 602 | /* this case is only from init_inode_metadata */ |
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile index ca58d64374ca..9b320cc2a8cf 100644 --- a/fs/lockd/Makefile +++ b/fs/lockd/Makefile | |||
@@ -5,6 +5,7 @@ | |||
5 | obj-$(CONFIG_LOCKD) += lockd.o | 5 | obj-$(CONFIG_LOCKD) += lockd.o |
6 | 6 | ||
7 | lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ | 7 | lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ |
8 | svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o | 8 | svcshare.o svcproc.o svcsubs.o mon.o xdr.o |
9 | lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o | 9 | lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o |
10 | lockd-objs-$(CONFIG_PROC_FS) += procfs.o | ||
10 | lockd-objs := $(lockd-objs-y) | 11 | lockd-objs := $(lockd-objs-y) |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index daa8e7514eae..9106f42c472c 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -159,6 +159,12 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, | |||
159 | 159 | ||
160 | msg.rpc_proc = &clnt->cl_procinfo[proc]; | 160 | msg.rpc_proc = &clnt->cl_procinfo[proc]; |
161 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); | 161 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); |
162 | if (status == -ECONNREFUSED) { | ||
163 | dprintk("lockd: NSM upcall RPC failed, status=%d, forcing rebind\n", | ||
164 | status); | ||
165 | rpc_force_rebind(clnt); | ||
166 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); | ||
167 | } | ||
162 | if (status < 0) | 168 | if (status < 0) |
163 | dprintk("lockd: NSM upcall RPC failed, status=%d\n", | 169 | dprintk("lockd: NSM upcall RPC failed, status=%d\n", |
164 | status); | 170 | status); |
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 5010b55628b4..097bfa3adb1c 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h | |||
@@ -11,7 +11,6 @@ struct lockd_net { | |||
11 | 11 | ||
12 | struct delayed_work grace_period_end; | 12 | struct delayed_work grace_period_end; |
13 | struct lock_manager lockd_manager; | 13 | struct lock_manager lockd_manager; |
14 | struct list_head grace_list; | ||
15 | 14 | ||
16 | spinlock_t nsm_clnt_lock; | 15 | spinlock_t nsm_clnt_lock; |
17 | unsigned int nsm_users; | 16 | unsigned int nsm_users; |
diff --git a/fs/lockd/procfs.c b/fs/lockd/procfs.c new file mode 100644 index 000000000000..2a0a98480e39 --- /dev/null +++ b/fs/lockd/procfs.c | |||
@@ -0,0 +1,92 @@ | |||
1 | /* | ||
2 | * Procfs support for lockd | ||
3 | * | ||
4 | * Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/fs.h> | ||
8 | #include <linux/proc_fs.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/nsproxy.h> | ||
11 | #include <net/net_namespace.h> | ||
12 | |||
13 | #include "netns.h" | ||
14 | #include "procfs.h" | ||
15 | |||
16 | /* | ||
17 | * We only allow strings that start with 'Y', 'y', or '1'. | ||
18 | */ | ||
19 | static ssize_t | ||
20 | nlm_end_grace_write(struct file *file, const char __user *buf, size_t size, | ||
21 | loff_t *pos) | ||
22 | { | ||
23 | char *data; | ||
24 | struct lockd_net *ln = net_generic(current->nsproxy->net_ns, | ||
25 | lockd_net_id); | ||
26 | |||
27 | if (size < 1) | ||
28 | return -EINVAL; | ||
29 | |||
30 | data = simple_transaction_get(file, buf, size); | ||
31 | if (IS_ERR(data)) | ||
32 | return PTR_ERR(data); | ||
33 | |||
34 | switch(data[0]) { | ||
35 | case 'Y': | ||
36 | case 'y': | ||
37 | case '1': | ||
38 | locks_end_grace(&ln->lockd_manager); | ||
39 | break; | ||
40 | default: | ||
41 | return -EINVAL; | ||
42 | } | ||
43 | |||
44 | return size; | ||
45 | } | ||
46 | |||
47 | static ssize_t | ||
48 | nlm_end_grace_read(struct file *file, char __user *buf, size_t size, | ||
49 | loff_t *pos) | ||
50 | { | ||
51 | struct lockd_net *ln = net_generic(current->nsproxy->net_ns, | ||
52 | lockd_net_id); | ||
53 | char resp[3]; | ||
54 | |||
55 | resp[0] = list_empty(&ln->lockd_manager.list) ? 'Y' : 'N'; | ||
56 | resp[1] = '\n'; | ||
57 | resp[2] = '\0'; | ||
58 | |||
59 | return simple_read_from_buffer(buf, size, pos, resp, sizeof(resp)); | ||
60 | } | ||
61 | |||
62 | static const struct file_operations lockd_end_grace_operations = { | ||
63 | .write = nlm_end_grace_write, | ||
64 | .read = nlm_end_grace_read, | ||
65 | .llseek = default_llseek, | ||
66 | .release = simple_transaction_release, | ||
67 | .owner = THIS_MODULE, | ||
68 | }; | ||
69 | |||
70 | int __init | ||
71 | lockd_create_procfs(void) | ||
72 | { | ||
73 | struct proc_dir_entry *entry; | ||
74 | |||
75 | entry = proc_mkdir("fs/lockd", NULL); | ||
76 | if (!entry) | ||
77 | return -ENOMEM; | ||
78 | entry = proc_create("nlm_end_grace", S_IRUGO|S_IWUSR, entry, | ||
79 | &lockd_end_grace_operations); | ||
80 | if (!entry) { | ||
81 | remove_proc_entry("fs/lockd", NULL); | ||
82 | return -ENOMEM; | ||
83 | } | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | void __exit | ||
88 | lockd_remove_procfs(void) | ||
89 | { | ||
90 | remove_proc_entry("fs/lockd/nlm_end_grace", NULL); | ||
91 | remove_proc_entry("fs/lockd", NULL); | ||
92 | } | ||
diff --git a/fs/lockd/procfs.h b/fs/lockd/procfs.h new file mode 100644 index 000000000000..2257a1311027 --- /dev/null +++ b/fs/lockd/procfs.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * Procfs support for lockd | ||
3 | * | ||
4 | * Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com> | ||
5 | */ | ||
6 | #ifndef _LOCKD_PROCFS_H | ||
7 | #define _LOCKD_PROCFS_H | ||
8 | |||
9 | #include <linux/kconfig.h> | ||
10 | |||
11 | #if IS_ENABLED(CONFIG_PROC_FS) | ||
12 | int lockd_create_procfs(void); | ||
13 | void lockd_remove_procfs(void); | ||
14 | #else | ||
15 | static inline int | ||
16 | lockd_create_procfs(void) | ||
17 | { | ||
18 | return 0; | ||
19 | } | ||
20 | |||
21 | static inline void | ||
22 | lockd_remove_procfs(void) | ||
23 | { | ||
24 | return; | ||
25 | } | ||
26 | #endif /* IS_ENABLED(CONFIG_PROC_FS) */ | ||
27 | |||
28 | #endif /* _LOCKD_PROCFS_H */ | ||
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index ec9e082f9ecd..d1bb7ecfd201 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/nfs.h> | 36 | #include <linux/nfs.h> |
37 | 37 | ||
38 | #include "netns.h" | 38 | #include "netns.h" |
39 | #include "procfs.h" | ||
39 | 40 | ||
40 | #define NLMDBG_FACILITY NLMDBG_SVC | 41 | #define NLMDBG_FACILITY NLMDBG_SVC |
41 | #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) | 42 | #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) |
@@ -304,13 +305,16 @@ static int lockd_start_svc(struct svc_serv *serv) | |||
304 | svc_sock_update_bufs(serv); | 305 | svc_sock_update_bufs(serv); |
305 | serv->sv_maxconn = nlm_max_connections; | 306 | serv->sv_maxconn = nlm_max_connections; |
306 | 307 | ||
307 | nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name); | 308 | nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name); |
308 | if (IS_ERR(nlmsvc_task)) { | 309 | if (IS_ERR(nlmsvc_task)) { |
309 | error = PTR_ERR(nlmsvc_task); | 310 | error = PTR_ERR(nlmsvc_task); |
310 | printk(KERN_WARNING | 311 | printk(KERN_WARNING |
311 | "lockd_up: kthread_run failed, error=%d\n", error); | 312 | "lockd_up: kthread_run failed, error=%d\n", error); |
312 | goto out_task; | 313 | goto out_task; |
313 | } | 314 | } |
315 | nlmsvc_rqst->rq_task = nlmsvc_task; | ||
316 | wake_up_process(nlmsvc_task); | ||
317 | |||
314 | dprintk("lockd_up: service started\n"); | 318 | dprintk("lockd_up: service started\n"); |
315 | return 0; | 319 | return 0; |
316 | 320 | ||
@@ -581,7 +585,7 @@ static int lockd_init_net(struct net *net) | |||
581 | struct lockd_net *ln = net_generic(net, lockd_net_id); | 585 | struct lockd_net *ln = net_generic(net, lockd_net_id); |
582 | 586 | ||
583 | INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); | 587 | INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); |
584 | INIT_LIST_HEAD(&ln->grace_list); | 588 | INIT_LIST_HEAD(&ln->lockd_manager.list); |
585 | spin_lock_init(&ln->nsm_clnt_lock); | 589 | spin_lock_init(&ln->nsm_clnt_lock); |
586 | return 0; | 590 | return 0; |
587 | } | 591 | } |
@@ -615,8 +619,15 @@ static int __init init_nlm(void) | |||
615 | err = register_pernet_subsys(&lockd_net_ops); | 619 | err = register_pernet_subsys(&lockd_net_ops); |
616 | if (err) | 620 | if (err) |
617 | goto err_pernet; | 621 | goto err_pernet; |
622 | |||
623 | err = lockd_create_procfs(); | ||
624 | if (err) | ||
625 | goto err_procfs; | ||
626 | |||
618 | return 0; | 627 | return 0; |
619 | 628 | ||
629 | err_procfs: | ||
630 | unregister_pernet_subsys(&lockd_net_ops); | ||
620 | err_pernet: | 631 | err_pernet: |
621 | #ifdef CONFIG_SYSCTL | 632 | #ifdef CONFIG_SYSCTL |
622 | unregister_sysctl_table(nlm_sysctl_table); | 633 | unregister_sysctl_table(nlm_sysctl_table); |
@@ -629,6 +640,7 @@ static void __exit exit_nlm(void) | |||
629 | { | 640 | { |
630 | /* FIXME: delete all NLM clients */ | 641 | /* FIXME: delete all NLM clients */ |
631 | nlm_shutdown_hosts(); | 642 | nlm_shutdown_hosts(); |
643 | lockd_remove_procfs(); | ||
632 | unregister_pernet_subsys(&lockd_net_ops); | 644 | unregister_pernet_subsys(&lockd_net_ops); |
633 | #ifdef CONFIG_SYSCTL | 645 | #ifdef CONFIG_SYSCTL |
634 | unregister_sysctl_table(nlm_sysctl_table); | 646 | unregister_sysctl_table(nlm_sysctl_table); |
diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile index d5815505c020..3ca14c36d08b 100644 --- a/fs/nfs/blocklayout/Makefile +++ b/fs/nfs/blocklayout/Makefile | |||
@@ -2,4 +2,5 @@ | |||
2 | # Makefile for the pNFS block layout driver kernel module | 2 | # Makefile for the pNFS block layout driver kernel module |
3 | # | 3 | # |
4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o | 4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o |
5 | blocklayoutdriver-objs := blocklayout.o extents.o blocklayoutdev.o blocklayoutdm.o | 5 | |
6 | blocklayoutdriver-y += blocklayout.o dev.o extent_tree.o rpc_pipefs.o | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index cbb1797149d5..5228f201d3d5 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
36 | #include <linux/namei.h> | 36 | #include <linux/namei.h> |
37 | #include <linux/bio.h> /* struct bio */ | 37 | #include <linux/bio.h> /* struct bio */ |
38 | #include <linux/buffer_head.h> /* various write calls */ | ||
39 | #include <linux/prefetch.h> | 38 | #include <linux/prefetch.h> |
40 | #include <linux/pagevec.h> | 39 | #include <linux/pagevec.h> |
41 | 40 | ||
@@ -50,40 +49,16 @@ MODULE_LICENSE("GPL"); | |||
50 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); | 49 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); |
51 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); | 50 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); |
52 | 51 | ||
53 | static void print_page(struct page *page) | 52 | static bool is_hole(struct pnfs_block_extent *be) |
54 | { | 53 | { |
55 | dprintk("PRINTPAGE page %p\n", page); | 54 | switch (be->be_state) { |
56 | dprintk(" PagePrivate %d\n", PagePrivate(page)); | 55 | case PNFS_BLOCK_NONE_DATA: |
57 | dprintk(" PageUptodate %d\n", PageUptodate(page)); | 56 | return true; |
58 | dprintk(" PageError %d\n", PageError(page)); | 57 | case PNFS_BLOCK_INVALID_DATA: |
59 | dprintk(" PageDirty %d\n", PageDirty(page)); | 58 | return be->be_tag ? false : true; |
60 | dprintk(" PageReferenced %d\n", PageReferenced(page)); | 59 | default: |
61 | dprintk(" PageLocked %d\n", PageLocked(page)); | 60 | return false; |
62 | dprintk(" PageWriteback %d\n", PageWriteback(page)); | 61 | } |
63 | dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page)); | ||
64 | dprintk("\n"); | ||
65 | } | ||
66 | |||
67 | /* Given the be associated with isect, determine if page data needs to be | ||
68 | * initialized. | ||
69 | */ | ||
70 | static int is_hole(struct pnfs_block_extent *be, sector_t isect) | ||
71 | { | ||
72 | if (be->be_state == PNFS_BLOCK_NONE_DATA) | ||
73 | return 1; | ||
74 | else if (be->be_state != PNFS_BLOCK_INVALID_DATA) | ||
75 | return 0; | ||
76 | else | ||
77 | return !bl_is_sector_init(be->be_inval, isect); | ||
78 | } | ||
79 | |||
80 | /* Given the be associated with isect, determine if page data can be | ||
81 | * written to disk. | ||
82 | */ | ||
83 | static int is_writable(struct pnfs_block_extent *be, sector_t isect) | ||
84 | { | ||
85 | return (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
86 | be->be_state == PNFS_BLOCK_INVALID_DATA); | ||
87 | } | 62 | } |
88 | 63 | ||
89 | /* The data we are handed might be spread across several bios. We need | 64 | /* The data we are handed might be spread across several bios. We need |
@@ -91,9 +66,8 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) | |||
91 | */ | 66 | */ |
92 | struct parallel_io { | 67 | struct parallel_io { |
93 | struct kref refcnt; | 68 | struct kref refcnt; |
94 | void (*pnfs_callback) (void *data, int num_se); | 69 | void (*pnfs_callback) (void *data); |
95 | void *data; | 70 | void *data; |
96 | int bse_count; | ||
97 | }; | 71 | }; |
98 | 72 | ||
99 | static inline struct parallel_io *alloc_parallel(void *data) | 73 | static inline struct parallel_io *alloc_parallel(void *data) |
@@ -104,7 +78,6 @@ static inline struct parallel_io *alloc_parallel(void *data) | |||
104 | if (rv) { | 78 | if (rv) { |
105 | rv->data = data; | 79 | rv->data = data; |
106 | kref_init(&rv->refcnt); | 80 | kref_init(&rv->refcnt); |
107 | rv->bse_count = 0; | ||
108 | } | 81 | } |
109 | return rv; | 82 | return rv; |
110 | } | 83 | } |
@@ -119,7 +92,7 @@ static void destroy_parallel(struct kref *kref) | |||
119 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); | 92 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); |
120 | 93 | ||
121 | dprintk("%s enter\n", __func__); | 94 | dprintk("%s enter\n", __func__); |
122 | p->pnfs_callback(p->data, p->bse_count); | 95 | p->pnfs_callback(p->data); |
123 | kfree(p); | 96 | kfree(p); |
124 | } | 97 | } |
125 | 98 | ||
@@ -141,10 +114,9 @@ bl_submit_bio(int rw, struct bio *bio) | |||
141 | return NULL; | 114 | return NULL; |
142 | } | 115 | } |
143 | 116 | ||
144 | static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | 117 | static struct bio * |
145 | struct pnfs_block_extent *be, | 118 | bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, |
146 | void (*end_io)(struct bio *, int err), | 119 | void (*end_io)(struct bio *, int err), struct parallel_io *par) |
147 | struct parallel_io *par) | ||
148 | { | 120 | { |
149 | struct bio *bio; | 121 | struct bio *bio; |
150 | 122 | ||
@@ -156,58 +128,64 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | |||
156 | } | 128 | } |
157 | 129 | ||
158 | if (bio) { | 130 | if (bio) { |
159 | bio->bi_iter.bi_sector = isect - be->be_f_offset + | 131 | bio->bi_iter.bi_sector = disk_sector; |
160 | be->be_v_offset; | 132 | bio->bi_bdev = bdev; |
161 | bio->bi_bdev = be->be_mdev; | ||
162 | bio->bi_end_io = end_io; | 133 | bio->bi_end_io = end_io; |
163 | bio->bi_private = par; | 134 | bio->bi_private = par; |
164 | } | 135 | } |
165 | return bio; | 136 | return bio; |
166 | } | 137 | } |
167 | 138 | ||
168 | static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, | 139 | static struct bio * |
169 | sector_t isect, struct page *page, | 140 | do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, |
170 | struct pnfs_block_extent *be, | 141 | struct page *page, struct pnfs_block_dev_map *map, |
171 | void (*end_io)(struct bio *, int err), | 142 | struct pnfs_block_extent *be, |
172 | struct parallel_io *par, | 143 | void (*end_io)(struct bio *, int err), |
173 | unsigned int offset, int len) | 144 | struct parallel_io *par, unsigned int offset, int *len) |
174 | { | 145 | { |
175 | isect = isect + (offset >> SECTOR_SHIFT); | 146 | struct pnfs_block_dev *dev = |
147 | container_of(be->be_device, struct pnfs_block_dev, node); | ||
148 | u64 disk_addr, end; | ||
149 | |||
176 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, | 150 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, |
177 | npg, rw, (unsigned long long)isect, offset, len); | 151 | npg, rw, (unsigned long long)isect, offset, *len); |
152 | |||
153 | /* translate to device offset */ | ||
154 | isect += be->be_v_offset; | ||
155 | isect -= be->be_f_offset; | ||
156 | |||
157 | /* translate to physical disk offset */ | ||
158 | disk_addr = (u64)isect << SECTOR_SHIFT; | ||
159 | if (disk_addr < map->start || disk_addr >= map->start + map->len) { | ||
160 | if (!dev->map(dev, disk_addr, map)) | ||
161 | return ERR_PTR(-EIO); | ||
162 | bio = bl_submit_bio(rw, bio); | ||
163 | } | ||
164 | disk_addr += map->disk_offset; | ||
165 | disk_addr -= map->start; | ||
166 | |||
167 | /* limit length to what the device mapping allows */ | ||
168 | end = disk_addr + *len; | ||
169 | if (end >= map->start + map->len) | ||
170 | *len = map->start + map->len - disk_addr; | ||
171 | |||
178 | retry: | 172 | retry: |
179 | if (!bio) { | 173 | if (!bio) { |
180 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); | 174 | bio = bl_alloc_init_bio(npg, map->bdev, |
175 | disk_addr >> SECTOR_SHIFT, end_io, par); | ||
181 | if (!bio) | 176 | if (!bio) |
182 | return ERR_PTR(-ENOMEM); | 177 | return ERR_PTR(-ENOMEM); |
183 | } | 178 | } |
184 | if (bio_add_page(bio, page, len, offset) < len) { | 179 | if (bio_add_page(bio, page, *len, offset) < *len) { |
185 | bio = bl_submit_bio(rw, bio); | 180 | bio = bl_submit_bio(rw, bio); |
186 | goto retry; | 181 | goto retry; |
187 | } | 182 | } |
188 | return bio; | 183 | return bio; |
189 | } | 184 | } |
190 | 185 | ||
191 | static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, | ||
192 | sector_t isect, struct page *page, | ||
193 | struct pnfs_block_extent *be, | ||
194 | void (*end_io)(struct bio *, int err), | ||
195 | struct parallel_io *par) | ||
196 | { | ||
197 | return do_add_page_to_bio(bio, npg, rw, isect, page, be, | ||
198 | end_io, par, 0, PAGE_CACHE_SIZE); | ||
199 | } | ||
200 | |||
201 | /* This is basically copied from mpage_end_io_read */ | ||
202 | static void bl_end_io_read(struct bio *bio, int err) | 186 | static void bl_end_io_read(struct bio *bio, int err) |
203 | { | 187 | { |
204 | struct parallel_io *par = bio->bi_private; | 188 | struct parallel_io *par = bio->bi_private; |
205 | struct bio_vec *bvec; | ||
206 | int i; | ||
207 | |||
208 | if (!err) | ||
209 | bio_for_each_segment_all(bvec, bio, i) | ||
210 | SetPageUptodate(bvec->bv_page); | ||
211 | 189 | ||
212 | if (err) { | 190 | if (err) { |
213 | struct nfs_pgio_header *header = par->data; | 191 | struct nfs_pgio_header *header = par->data; |
@@ -216,6 +194,7 @@ static void bl_end_io_read(struct bio *bio, int err) | |||
216 | header->pnfs_error = -EIO; | 194 | header->pnfs_error = -EIO; |
217 | pnfs_set_lo_fail(header->lseg); | 195 | pnfs_set_lo_fail(header->lseg); |
218 | } | 196 | } |
197 | |||
219 | bio_put(bio); | 198 | bio_put(bio); |
220 | put_parallel(par); | 199 | put_parallel(par); |
221 | } | 200 | } |
@@ -231,7 +210,7 @@ static void bl_read_cleanup(struct work_struct *work) | |||
231 | } | 210 | } |
232 | 211 | ||
233 | static void | 212 | static void |
234 | bl_end_par_io_read(void *data, int unused) | 213 | bl_end_par_io_read(void *data) |
235 | { | 214 | { |
236 | struct nfs_pgio_header *hdr = data; | 215 | struct nfs_pgio_header *hdr = data; |
237 | 216 | ||
@@ -241,88 +220,78 @@ bl_end_par_io_read(void *data, int unused) | |||
241 | } | 220 | } |
242 | 221 | ||
243 | static enum pnfs_try_status | 222 | static enum pnfs_try_status |
244 | bl_read_pagelist(struct nfs_pgio_header *hdr) | 223 | bl_read_pagelist(struct nfs_pgio_header *header) |
245 | { | 224 | { |
246 | struct nfs_pgio_header *header = hdr; | 225 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); |
247 | int i, hole; | 226 | struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; |
248 | struct bio *bio = NULL; | 227 | struct bio *bio = NULL; |
249 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | 228 | struct pnfs_block_extent be; |
250 | sector_t isect, extent_length = 0; | 229 | sector_t isect, extent_length = 0; |
251 | struct parallel_io *par; | 230 | struct parallel_io *par; |
252 | loff_t f_offset = hdr->args.offset; | 231 | loff_t f_offset = header->args.offset; |
253 | size_t bytes_left = hdr->args.count; | 232 | size_t bytes_left = header->args.count; |
254 | unsigned int pg_offset, pg_len; | 233 | unsigned int pg_offset, pg_len; |
255 | struct page **pages = hdr->args.pages; | 234 | struct page **pages = header->args.pages; |
256 | int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT; | 235 | int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; |
257 | const bool is_dio = (header->dreq != NULL); | 236 | const bool is_dio = (header->dreq != NULL); |
237 | struct blk_plug plug; | ||
238 | int i; | ||
258 | 239 | ||
259 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, | 240 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, |
260 | hdr->page_array.npages, f_offset, | 241 | header->page_array.npages, f_offset, |
261 | (unsigned int)hdr->args.count); | 242 | (unsigned int)header->args.count); |
262 | 243 | ||
263 | par = alloc_parallel(hdr); | 244 | par = alloc_parallel(header); |
264 | if (!par) | 245 | if (!par) |
265 | goto use_mds; | 246 | return PNFS_NOT_ATTEMPTED; |
266 | par->pnfs_callback = bl_end_par_io_read; | 247 | par->pnfs_callback = bl_end_par_io_read; |
267 | /* At this point, we can no longer jump to use_mds */ | 248 | |
249 | blk_start_plug(&plug); | ||
268 | 250 | ||
269 | isect = (sector_t) (f_offset >> SECTOR_SHIFT); | 251 | isect = (sector_t) (f_offset >> SECTOR_SHIFT); |
270 | /* Code assumes extents are page-aligned */ | 252 | /* Code assumes extents are page-aligned */ |
271 | for (i = pg_index; i < hdr->page_array.npages; i++) { | 253 | for (i = pg_index; i < header->page_array.npages; i++) { |
272 | if (!extent_length) { | 254 | if (extent_length <= 0) { |
273 | /* We've used up the previous extent */ | 255 | /* We've used up the previous extent */ |
274 | bl_put_extent(be); | ||
275 | bl_put_extent(cow_read); | ||
276 | bio = bl_submit_bio(READ, bio); | 256 | bio = bl_submit_bio(READ, bio); |
257 | |||
277 | /* Get the next one */ | 258 | /* Get the next one */ |
278 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), | 259 | if (!ext_tree_lookup(bl, isect, &be, false)) { |
279 | isect, &cow_read); | ||
280 | if (!be) { | ||
281 | header->pnfs_error = -EIO; | 260 | header->pnfs_error = -EIO; |
282 | goto out; | 261 | goto out; |
283 | } | 262 | } |
284 | extent_length = be->be_length - | 263 | extent_length = be.be_length - (isect - be.be_f_offset); |
285 | (isect - be->be_f_offset); | ||
286 | if (cow_read) { | ||
287 | sector_t cow_length = cow_read->be_length - | ||
288 | (isect - cow_read->be_f_offset); | ||
289 | extent_length = min(extent_length, cow_length); | ||
290 | } | ||
291 | } | 264 | } |
292 | 265 | ||
266 | pg_offset = f_offset & ~PAGE_CACHE_MASK; | ||
293 | if (is_dio) { | 267 | if (is_dio) { |
294 | pg_offset = f_offset & ~PAGE_CACHE_MASK; | ||
295 | if (pg_offset + bytes_left > PAGE_CACHE_SIZE) | 268 | if (pg_offset + bytes_left > PAGE_CACHE_SIZE) |
296 | pg_len = PAGE_CACHE_SIZE - pg_offset; | 269 | pg_len = PAGE_CACHE_SIZE - pg_offset; |
297 | else | 270 | else |
298 | pg_len = bytes_left; | 271 | pg_len = bytes_left; |
299 | |||
300 | f_offset += pg_len; | ||
301 | bytes_left -= pg_len; | ||
302 | isect += (pg_offset >> SECTOR_SHIFT); | ||
303 | } else { | 272 | } else { |
304 | pg_offset = 0; | 273 | BUG_ON(pg_offset != 0); |
305 | pg_len = PAGE_CACHE_SIZE; | 274 | pg_len = PAGE_CACHE_SIZE; |
306 | } | 275 | } |
307 | 276 | ||
308 | hole = is_hole(be, isect); | 277 | isect += (pg_offset >> SECTOR_SHIFT); |
309 | if (hole && !cow_read) { | 278 | extent_length -= (pg_offset >> SECTOR_SHIFT); |
279 | |||
280 | if (is_hole(&be)) { | ||
310 | bio = bl_submit_bio(READ, bio); | 281 | bio = bl_submit_bio(READ, bio); |
311 | /* Fill hole w/ zeroes w/o accessing device */ | 282 | /* Fill hole w/ zeroes w/o accessing device */ |
312 | dprintk("%s Zeroing page for hole\n", __func__); | 283 | dprintk("%s Zeroing page for hole\n", __func__); |
313 | zero_user_segment(pages[i], pg_offset, pg_len); | 284 | zero_user_segment(pages[i], pg_offset, pg_len); |
314 | print_page(pages[i]); | ||
315 | SetPageUptodate(pages[i]); | ||
316 | } else { | ||
317 | struct pnfs_block_extent *be_read; | ||
318 | 285 | ||
319 | be_read = (hole && cow_read) ? cow_read : be; | 286 | /* invalidate map */ |
287 | map.start = NFS4_MAX_UINT64; | ||
288 | } else { | ||
320 | bio = do_add_page_to_bio(bio, | 289 | bio = do_add_page_to_bio(bio, |
321 | hdr->page_array.npages - i, | 290 | header->page_array.npages - i, |
322 | READ, | 291 | READ, |
323 | isect, pages[i], be_read, | 292 | isect, pages[i], &map, &be, |
324 | bl_end_io_read, par, | 293 | bl_end_io_read, par, |
325 | pg_offset, pg_len); | 294 | pg_offset, &pg_len); |
326 | if (IS_ERR(bio)) { | 295 | if (IS_ERR(bio)) { |
327 | header->pnfs_error = PTR_ERR(bio); | 296 | header->pnfs_error = PTR_ERR(bio); |
328 | bio = NULL; | 297 | bio = NULL; |
@@ -330,75 +299,21 @@ bl_read_pagelist(struct nfs_pgio_header *hdr) | |||
330 | } | 299 | } |
331 | } | 300 | } |
332 | isect += (pg_len >> SECTOR_SHIFT); | 301 | isect += (pg_len >> SECTOR_SHIFT); |
333 | extent_length -= PAGE_CACHE_SECTORS; | 302 | extent_length -= (pg_len >> SECTOR_SHIFT); |
303 | f_offset += pg_len; | ||
304 | bytes_left -= pg_len; | ||
334 | } | 305 | } |
335 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { | 306 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { |
336 | hdr->res.eof = 1; | 307 | header->res.eof = 1; |
337 | hdr->res.count = header->inode->i_size - hdr->args.offset; | 308 | header->res.count = header->inode->i_size - header->args.offset; |
338 | } else { | 309 | } else { |
339 | hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset; | 310 | header->res.count = (isect << SECTOR_SHIFT) - header->args.offset; |
340 | } | 311 | } |
341 | out: | 312 | out: |
342 | bl_put_extent(be); | ||
343 | bl_put_extent(cow_read); | ||
344 | bl_submit_bio(READ, bio); | 313 | bl_submit_bio(READ, bio); |
314 | blk_finish_plug(&plug); | ||
345 | put_parallel(par); | 315 | put_parallel(par); |
346 | return PNFS_ATTEMPTED; | 316 | return PNFS_ATTEMPTED; |
347 | |||
348 | use_mds: | ||
349 | dprintk("Giving up and using normal NFS\n"); | ||
350 | return PNFS_NOT_ATTEMPTED; | ||
351 | } | ||
352 | |||
353 | static void mark_extents_written(struct pnfs_block_layout *bl, | ||
354 | __u64 offset, __u32 count) | ||
355 | { | ||
356 | sector_t isect, end; | ||
357 | struct pnfs_block_extent *be; | ||
358 | struct pnfs_block_short_extent *se; | ||
359 | |||
360 | dprintk("%s(%llu, %u)\n", __func__, offset, count); | ||
361 | if (count == 0) | ||
362 | return; | ||
363 | isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT; | ||
364 | end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK); | ||
365 | end >>= SECTOR_SHIFT; | ||
366 | while (isect < end) { | ||
367 | sector_t len; | ||
368 | be = bl_find_get_extent(bl, isect, NULL); | ||
369 | BUG_ON(!be); /* FIXME */ | ||
370 | len = min(end, be->be_f_offset + be->be_length) - isect; | ||
371 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
372 | se = bl_pop_one_short_extent(be->be_inval); | ||
373 | BUG_ON(!se); | ||
374 | bl_mark_for_commit(be, isect, len, se); | ||
375 | } | ||
376 | isect += len; | ||
377 | bl_put_extent(be); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | static void bl_end_io_write_zero(struct bio *bio, int err) | ||
382 | { | ||
383 | struct parallel_io *par = bio->bi_private; | ||
384 | struct bio_vec *bvec; | ||
385 | int i; | ||
386 | |||
387 | bio_for_each_segment_all(bvec, bio, i) { | ||
388 | /* This is the zeroing page we added */ | ||
389 | end_page_writeback(bvec->bv_page); | ||
390 | page_cache_release(bvec->bv_page); | ||
391 | } | ||
392 | |||
393 | if (unlikely(err)) { | ||
394 | struct nfs_pgio_header *header = par->data; | ||
395 | |||
396 | if (!header->pnfs_error) | ||
397 | header->pnfs_error = -EIO; | ||
398 | pnfs_set_lo_fail(header->lseg); | ||
399 | } | ||
400 | bio_put(bio); | ||
401 | put_parallel(par); | ||
402 | } | 317 | } |
403 | 318 | ||
404 | static void bl_end_io_write(struct bio *bio, int err) | 319 | static void bl_end_io_write(struct bio *bio, int err) |
@@ -421,533 +336,118 @@ static void bl_end_io_write(struct bio *bio, int err) | |||
421 | */ | 336 | */ |
422 | static void bl_write_cleanup(struct work_struct *work) | 337 | static void bl_write_cleanup(struct work_struct *work) |
423 | { | 338 | { |
424 | struct rpc_task *task; | 339 | struct rpc_task *task = container_of(work, struct rpc_task, u.tk_work); |
425 | struct nfs_pgio_header *hdr; | 340 | struct nfs_pgio_header *hdr = |
341 | container_of(task, struct nfs_pgio_header, task); | ||
342 | |||
426 | dprintk("%s enter\n", __func__); | 343 | dprintk("%s enter\n", __func__); |
427 | task = container_of(work, struct rpc_task, u.tk_work); | 344 | |
428 | hdr = container_of(task, struct nfs_pgio_header, task); | ||
429 | if (likely(!hdr->pnfs_error)) { | 345 | if (likely(!hdr->pnfs_error)) { |
430 | /* Marks for LAYOUTCOMMIT */ | 346 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg); |
431 | mark_extents_written(BLK_LSEG2EXT(hdr->lseg), | 347 | u64 start = hdr->args.offset & (loff_t)PAGE_CACHE_MASK; |
432 | hdr->args.offset, hdr->args.count); | 348 | u64 end = (hdr->args.offset + hdr->args.count + |
349 | PAGE_CACHE_SIZE - 1) & (loff_t)PAGE_CACHE_MASK; | ||
350 | |||
351 | ext_tree_mark_written(bl, start >> SECTOR_SHIFT, | ||
352 | (end - start) >> SECTOR_SHIFT); | ||
433 | } | 353 | } |
354 | |||
434 | pnfs_ld_write_done(hdr); | 355 | pnfs_ld_write_done(hdr); |
435 | } | 356 | } |
436 | 357 | ||
437 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ | 358 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ |
438 | static void bl_end_par_io_write(void *data, int num_se) | 359 | static void bl_end_par_io_write(void *data) |
439 | { | 360 | { |
440 | struct nfs_pgio_header *hdr = data; | 361 | struct nfs_pgio_header *hdr = data; |
441 | 362 | ||
442 | if (unlikely(hdr->pnfs_error)) { | ||
443 | bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval, | ||
444 | num_se); | ||
445 | } | ||
446 | |||
447 | hdr->task.tk_status = hdr->pnfs_error; | 363 | hdr->task.tk_status = hdr->pnfs_error; |
448 | hdr->verf.committed = NFS_FILE_SYNC; | 364 | hdr->verf.committed = NFS_FILE_SYNC; |
449 | INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); | 365 | INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); |
450 | schedule_work(&hdr->task.u.tk_work); | 366 | schedule_work(&hdr->task.u.tk_work); |
451 | } | 367 | } |
452 | 368 | ||
453 | /* FIXME STUB - mark intersection of layout and page as bad, so is not | ||
454 | * used again. | ||
455 | */ | ||
456 | static void mark_bad_read(void) | ||
457 | { | ||
458 | return; | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * map_block: map a requested I/0 block (isect) into an offset in the LVM | ||
463 | * block_device | ||
464 | */ | ||
465 | static void | ||
466 | map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) | ||
467 | { | ||
468 | dprintk("%s enter be=%p\n", __func__, be); | ||
469 | |||
470 | set_buffer_mapped(bh); | ||
471 | bh->b_bdev = be->be_mdev; | ||
472 | bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >> | ||
473 | (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT); | ||
474 | |||
475 | dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd\n", | ||
476 | __func__, (unsigned long long)isect, (long)bh->b_blocknr, | ||
477 | bh->b_size); | ||
478 | return; | ||
479 | } | ||
480 | |||
481 | static void | ||
482 | bl_read_single_end_io(struct bio *bio, int error) | ||
483 | { | ||
484 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
485 | struct page *page = bvec->bv_page; | ||
486 | |||
487 | /* Only one page in bvec */ | ||
488 | unlock_page(page); | ||
489 | } | ||
490 | |||
491 | static int | ||
492 | bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, | ||
493 | unsigned int offset, unsigned int len) | ||
494 | { | ||
495 | struct bio *bio; | ||
496 | struct page *shadow_page; | ||
497 | sector_t isect; | ||
498 | char *kaddr, *kshadow_addr; | ||
499 | int ret = 0; | ||
500 | |||
501 | dprintk("%s: offset %u len %u\n", __func__, offset, len); | ||
502 | |||
503 | shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
504 | if (shadow_page == NULL) | ||
505 | return -ENOMEM; | ||
506 | |||
507 | bio = bio_alloc(GFP_NOIO, 1); | ||
508 | if (bio == NULL) | ||
509 | return -ENOMEM; | ||
510 | |||
511 | isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + | ||
512 | (offset / SECTOR_SIZE); | ||
513 | |||
514 | bio->bi_iter.bi_sector = isect - be->be_f_offset + be->be_v_offset; | ||
515 | bio->bi_bdev = be->be_mdev; | ||
516 | bio->bi_end_io = bl_read_single_end_io; | ||
517 | |||
518 | lock_page(shadow_page); | ||
519 | if (bio_add_page(bio, shadow_page, | ||
520 | SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { | ||
521 | unlock_page(shadow_page); | ||
522 | bio_put(bio); | ||
523 | return -EIO; | ||
524 | } | ||
525 | |||
526 | submit_bio(READ, bio); | ||
527 | wait_on_page_locked(shadow_page); | ||
528 | if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { | ||
529 | ret = -EIO; | ||
530 | } else { | ||
531 | kaddr = kmap_atomic(page); | ||
532 | kshadow_addr = kmap_atomic(shadow_page); | ||
533 | memcpy(kaddr + offset, kshadow_addr + offset, len); | ||
534 | kunmap_atomic(kshadow_addr); | ||
535 | kunmap_atomic(kaddr); | ||
536 | } | ||
537 | __free_page(shadow_page); | ||
538 | bio_put(bio); | ||
539 | |||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | static int | ||
544 | bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, | ||
545 | unsigned int dirty_offset, unsigned int dirty_len, | ||
546 | bool full_page) | ||
547 | { | ||
548 | int ret = 0; | ||
549 | unsigned int start, end; | ||
550 | |||
551 | if (full_page) { | ||
552 | start = 0; | ||
553 | end = PAGE_CACHE_SIZE; | ||
554 | } else { | ||
555 | start = round_down(dirty_offset, SECTOR_SIZE); | ||
556 | end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); | ||
557 | } | ||
558 | |||
559 | dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); | ||
560 | if (!be) { | ||
561 | zero_user_segments(page, start, dirty_offset, | ||
562 | dirty_offset + dirty_len, end); | ||
563 | if (start == 0 && end == PAGE_CACHE_SIZE && | ||
564 | trylock_page(page)) { | ||
565 | SetPageUptodate(page); | ||
566 | unlock_page(page); | ||
567 | } | ||
568 | return ret; | ||
569 | } | ||
570 | |||
571 | if (start != dirty_offset) | ||
572 | ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); | ||
573 | |||
574 | if (!ret && (dirty_offset + dirty_len < end)) | ||
575 | ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, | ||
576 | end - dirty_offset - dirty_len); | ||
577 | |||
578 | return ret; | ||
579 | } | ||
580 | |||
581 | /* Given an unmapped page, zero it or read in page for COW, page is locked | ||
582 | * by caller. | ||
583 | */ | ||
584 | static int | ||
585 | init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) | ||
586 | { | ||
587 | struct buffer_head *bh = NULL; | ||
588 | int ret = 0; | ||
589 | sector_t isect; | ||
590 | |||
591 | dprintk("%s enter, %p\n", __func__, page); | ||
592 | BUG_ON(PageUptodate(page)); | ||
593 | if (!cow_read) { | ||
594 | zero_user_segment(page, 0, PAGE_SIZE); | ||
595 | SetPageUptodate(page); | ||
596 | goto cleanup; | ||
597 | } | ||
598 | |||
599 | bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0); | ||
600 | if (!bh) { | ||
601 | ret = -ENOMEM; | ||
602 | goto cleanup; | ||
603 | } | ||
604 | |||
605 | isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT; | ||
606 | map_block(bh, isect, cow_read); | ||
607 | if (!bh_uptodate_or_lock(bh)) | ||
608 | ret = bh_submit_read(bh); | ||
609 | if (ret) | ||
610 | goto cleanup; | ||
611 | SetPageUptodate(page); | ||
612 | |||
613 | cleanup: | ||
614 | if (bh) | ||
615 | free_buffer_head(bh); | ||
616 | if (ret) { | ||
617 | /* Need to mark layout with bad read...should now | ||
618 | * just use nfs4 for reads and writes. | ||
619 | */ | ||
620 | mark_bad_read(); | ||
621 | } | ||
622 | return ret; | ||
623 | } | ||
624 | |||
625 | /* Find or create a zeroing page marked being writeback. | ||
626 | * Return ERR_PTR on error, NULL to indicate skip this page and page itself | ||
627 | * to indicate write out. | ||
628 | */ | ||
629 | static struct page * | ||
630 | bl_find_get_zeroing_page(struct inode *inode, pgoff_t index, | ||
631 | struct pnfs_block_extent *cow_read) | ||
632 | { | ||
633 | struct page *page; | ||
634 | int locked = 0; | ||
635 | page = find_get_page(inode->i_mapping, index); | ||
636 | if (page) | ||
637 | goto check_page; | ||
638 | |||
639 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
640 | if (unlikely(!page)) { | ||
641 | dprintk("%s oom\n", __func__); | ||
642 | return ERR_PTR(-ENOMEM); | ||
643 | } | ||
644 | locked = 1; | ||
645 | |||
646 | check_page: | ||
647 | /* PageDirty: Other will write this out | ||
648 | * PageWriteback: Other is writing this out | ||
649 | * PageUptodate: It was read before | ||
650 | */ | ||
651 | if (PageDirty(page) || PageWriteback(page)) { | ||
652 | print_page(page); | ||
653 | if (locked) | ||
654 | unlock_page(page); | ||
655 | page_cache_release(page); | ||
656 | return NULL; | ||
657 | } | ||
658 | |||
659 | if (!locked) { | ||
660 | lock_page(page); | ||
661 | locked = 1; | ||
662 | goto check_page; | ||
663 | } | ||
664 | if (!PageUptodate(page)) { | ||
665 | /* New page, readin or zero it */ | ||
666 | init_page_for_write(page, cow_read); | ||
667 | } | ||
668 | set_page_writeback(page); | ||
669 | unlock_page(page); | ||
670 | |||
671 | return page; | ||
672 | } | ||
673 | |||
674 | static enum pnfs_try_status | 369 | static enum pnfs_try_status |
675 | bl_write_pagelist(struct nfs_pgio_header *header, int sync) | 370 | bl_write_pagelist(struct nfs_pgio_header *header, int sync) |
676 | { | 371 | { |
677 | int i, ret, npg_zero, pg_index, last = 0; | 372 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); |
373 | struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; | ||
678 | struct bio *bio = NULL; | 374 | struct bio *bio = NULL; |
679 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | 375 | struct pnfs_block_extent be; |
680 | sector_t isect, last_isect = 0, extent_length = 0; | 376 | sector_t isect, extent_length = 0; |
681 | struct parallel_io *par = NULL; | 377 | struct parallel_io *par = NULL; |
682 | loff_t offset = header->args.offset; | 378 | loff_t offset = header->args.offset; |
683 | size_t count = header->args.count; | 379 | size_t count = header->args.count; |
684 | unsigned int pg_offset, pg_len, saved_len; | ||
685 | struct page **pages = header->args.pages; | 380 | struct page **pages = header->args.pages; |
686 | struct page *page; | 381 | int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; |
687 | pgoff_t index; | 382 | unsigned int pg_len; |
688 | u64 temp; | 383 | struct blk_plug plug; |
689 | int npg_per_block = | 384 | int i; |
690 | NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; | ||
691 | 385 | ||
692 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); | 386 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); |
693 | 387 | ||
694 | if (header->dreq != NULL && | ||
695 | (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || | ||
696 | !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { | ||
697 | dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); | ||
698 | goto out_mds; | ||
699 | } | ||
700 | /* At this point, header->page_aray is a (sequential) list of nfs_pages. | 388 | /* At this point, header->page_aray is a (sequential) list of nfs_pages. |
701 | * We want to write each, and if there is an error set pnfs_error | 389 | * We want to write each, and if there is an error set pnfs_error |
702 | * to have it redone using nfs. | 390 | * to have it redone using nfs. |
703 | */ | 391 | */ |
704 | par = alloc_parallel(header); | 392 | par = alloc_parallel(header); |
705 | if (!par) | 393 | if (!par) |
706 | goto out_mds; | 394 | return PNFS_NOT_ATTEMPTED; |
707 | par->pnfs_callback = bl_end_par_io_write; | 395 | par->pnfs_callback = bl_end_par_io_write; |
708 | /* At this point, have to be more careful with error handling */ | ||
709 | 396 | ||
710 | isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | 397 | blk_start_plug(&plug); |
711 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read); | ||
712 | if (!be || !is_writable(be, isect)) { | ||
713 | dprintk("%s no matching extents!\n", __func__); | ||
714 | goto out_mds; | ||
715 | } | ||
716 | 398 | ||
717 | /* First page inside INVALID extent */ | 399 | /* we always write out the whole page */ |
718 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 400 | offset = offset & (loff_t)PAGE_CACHE_MASK; |
719 | if (likely(!bl_push_one_short_extent(be->be_inval))) | 401 | isect = offset >> SECTOR_SHIFT; |
720 | par->bse_count++; | ||
721 | else | ||
722 | goto out_mds; | ||
723 | temp = offset >> PAGE_CACHE_SHIFT; | ||
724 | npg_zero = do_div(temp, npg_per_block); | ||
725 | isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & | ||
726 | (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | ||
727 | extent_length = be->be_length - (isect - be->be_f_offset); | ||
728 | |||
729 | fill_invalid_ext: | ||
730 | dprintk("%s need to zero %d pages\n", __func__, npg_zero); | ||
731 | for (;npg_zero > 0; npg_zero--) { | ||
732 | if (bl_is_sector_init(be->be_inval, isect)) { | ||
733 | dprintk("isect %llu already init\n", | ||
734 | (unsigned long long)isect); | ||
735 | goto next_page; | ||
736 | } | ||
737 | /* page ref released in bl_end_io_write_zero */ | ||
738 | index = isect >> PAGE_CACHE_SECTOR_SHIFT; | ||
739 | dprintk("%s zero %dth page: index %lu isect %llu\n", | ||
740 | __func__, npg_zero, index, | ||
741 | (unsigned long long)isect); | ||
742 | page = bl_find_get_zeroing_page(header->inode, index, | ||
743 | cow_read); | ||
744 | if (unlikely(IS_ERR(page))) { | ||
745 | header->pnfs_error = PTR_ERR(page); | ||
746 | goto out; | ||
747 | } else if (page == NULL) | ||
748 | goto next_page; | ||
749 | |||
750 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
751 | PAGE_CACHE_SECTORS); | ||
752 | if (unlikely(ret)) { | ||
753 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
754 | __func__, ret); | ||
755 | end_page_writeback(page); | ||
756 | page_cache_release(page); | ||
757 | header->pnfs_error = ret; | ||
758 | goto out; | ||
759 | } | ||
760 | if (likely(!bl_push_one_short_extent(be->be_inval))) | ||
761 | par->bse_count++; | ||
762 | else { | ||
763 | end_page_writeback(page); | ||
764 | page_cache_release(page); | ||
765 | header->pnfs_error = -ENOMEM; | ||
766 | goto out; | ||
767 | } | ||
768 | /* FIXME: This should be done in bi_end_io */ | ||
769 | mark_extents_written(BLK_LSEG2EXT(header->lseg), | ||
770 | page->index << PAGE_CACHE_SHIFT, | ||
771 | PAGE_CACHE_SIZE); | ||
772 | |||
773 | bio = bl_add_page_to_bio(bio, npg_zero, WRITE, | ||
774 | isect, page, be, | ||
775 | bl_end_io_write_zero, par); | ||
776 | if (IS_ERR(bio)) { | ||
777 | header->pnfs_error = PTR_ERR(bio); | ||
778 | bio = NULL; | ||
779 | goto out; | ||
780 | } | ||
781 | next_page: | ||
782 | isect += PAGE_CACHE_SECTORS; | ||
783 | extent_length -= PAGE_CACHE_SECTORS; | ||
784 | } | ||
785 | if (last) | ||
786 | goto write_done; | ||
787 | } | ||
788 | bio = bl_submit_bio(WRITE, bio); | ||
789 | 402 | ||
790 | /* Middle pages */ | ||
791 | pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; | ||
792 | for (i = pg_index; i < header->page_array.npages; i++) { | 403 | for (i = pg_index; i < header->page_array.npages; i++) { |
793 | if (!extent_length) { | 404 | if (extent_length <= 0) { |
794 | /* We've used up the previous extent */ | 405 | /* We've used up the previous extent */ |
795 | bl_put_extent(be); | ||
796 | bl_put_extent(cow_read); | ||
797 | bio = bl_submit_bio(WRITE, bio); | 406 | bio = bl_submit_bio(WRITE, bio); |
798 | /* Get the next one */ | 407 | /* Get the next one */ |
799 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), | 408 | if (!ext_tree_lookup(bl, isect, &be, true)) { |
800 | isect, &cow_read); | ||
801 | if (!be || !is_writable(be, isect)) { | ||
802 | header->pnfs_error = -EINVAL; | 409 | header->pnfs_error = -EINVAL; |
803 | goto out; | 410 | goto out; |
804 | } | 411 | } |
805 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
806 | if (likely(!bl_push_one_short_extent( | ||
807 | be->be_inval))) | ||
808 | par->bse_count++; | ||
809 | else { | ||
810 | header->pnfs_error = -ENOMEM; | ||
811 | goto out; | ||
812 | } | ||
813 | } | ||
814 | extent_length = be->be_length - | ||
815 | (isect - be->be_f_offset); | ||
816 | } | ||
817 | |||
818 | dprintk("%s offset %lld count %Zu\n", __func__, offset, count); | ||
819 | pg_offset = offset & ~PAGE_CACHE_MASK; | ||
820 | if (pg_offset + count > PAGE_CACHE_SIZE) | ||
821 | pg_len = PAGE_CACHE_SIZE - pg_offset; | ||
822 | else | ||
823 | pg_len = count; | ||
824 | |||
825 | saved_len = pg_len; | ||
826 | if (be->be_state == PNFS_BLOCK_INVALID_DATA && | ||
827 | !bl_is_sector_init(be->be_inval, isect)) { | ||
828 | ret = bl_read_partial_page_sync(pages[i], cow_read, | ||
829 | pg_offset, pg_len, true); | ||
830 | if (ret) { | ||
831 | dprintk("%s bl_read_partial_page_sync fail %d\n", | ||
832 | __func__, ret); | ||
833 | header->pnfs_error = ret; | ||
834 | goto out; | ||
835 | } | ||
836 | |||
837 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
838 | PAGE_CACHE_SECTORS); | ||
839 | if (unlikely(ret)) { | ||
840 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
841 | __func__, ret); | ||
842 | header->pnfs_error = ret; | ||
843 | goto out; | ||
844 | } | ||
845 | 412 | ||
846 | /* Expand to full page write */ | 413 | extent_length = be.be_length - (isect - be.be_f_offset); |
847 | pg_offset = 0; | ||
848 | pg_len = PAGE_CACHE_SIZE; | ||
849 | } else if ((pg_offset & (SECTOR_SIZE - 1)) || | ||
850 | (pg_len & (SECTOR_SIZE - 1))){ | ||
851 | /* ahh, nasty case. We have to do sync full sector | ||
852 | * read-modify-write cycles. | ||
853 | */ | ||
854 | unsigned int saved_offset = pg_offset; | ||
855 | ret = bl_read_partial_page_sync(pages[i], be, pg_offset, | ||
856 | pg_len, false); | ||
857 | pg_offset = round_down(pg_offset, SECTOR_SIZE); | ||
858 | pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) | ||
859 | - pg_offset; | ||
860 | } | 414 | } |
861 | 415 | ||
862 | 416 | pg_len = PAGE_CACHE_SIZE; | |
863 | bio = do_add_page_to_bio(bio, header->page_array.npages - i, | 417 | bio = do_add_page_to_bio(bio, header->page_array.npages - i, |
864 | WRITE, | 418 | WRITE, isect, pages[i], &map, &be, |
865 | isect, pages[i], be, | ||
866 | bl_end_io_write, par, | 419 | bl_end_io_write, par, |
867 | pg_offset, pg_len); | 420 | 0, &pg_len); |
868 | if (IS_ERR(bio)) { | 421 | if (IS_ERR(bio)) { |
869 | header->pnfs_error = PTR_ERR(bio); | 422 | header->pnfs_error = PTR_ERR(bio); |
870 | bio = NULL; | 423 | bio = NULL; |
871 | goto out; | 424 | goto out; |
872 | } | 425 | } |
873 | offset += saved_len; | ||
874 | count -= saved_len; | ||
875 | isect += PAGE_CACHE_SECTORS; | ||
876 | last_isect = isect; | ||
877 | extent_length -= PAGE_CACHE_SECTORS; | ||
878 | } | ||
879 | 426 | ||
880 | /* Last page inside INVALID extent */ | 427 | offset += pg_len; |
881 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 428 | count -= pg_len; |
882 | bio = bl_submit_bio(WRITE, bio); | 429 | isect += (pg_len >> SECTOR_SHIFT); |
883 | temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT; | 430 | extent_length -= (pg_len >> SECTOR_SHIFT); |
884 | npg_zero = npg_per_block - do_div(temp, npg_per_block); | ||
885 | if (npg_zero < npg_per_block) { | ||
886 | last = 1; | ||
887 | goto fill_invalid_ext; | ||
888 | } | ||
889 | } | 431 | } |
890 | 432 | ||
891 | write_done: | ||
892 | header->res.count = header->args.count; | 433 | header->res.count = header->args.count; |
893 | out: | 434 | out: |
894 | bl_put_extent(be); | ||
895 | bl_put_extent(cow_read); | ||
896 | bl_submit_bio(WRITE, bio); | 435 | bl_submit_bio(WRITE, bio); |
436 | blk_finish_plug(&plug); | ||
897 | put_parallel(par); | 437 | put_parallel(par); |
898 | return PNFS_ATTEMPTED; | 438 | return PNFS_ATTEMPTED; |
899 | out_mds: | ||
900 | bl_put_extent(be); | ||
901 | bl_put_extent(cow_read); | ||
902 | kfree(par); | ||
903 | return PNFS_NOT_ATTEMPTED; | ||
904 | } | ||
905 | |||
906 | /* FIXME - range ignored */ | ||
907 | static void | ||
908 | release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range) | ||
909 | { | ||
910 | int i; | ||
911 | struct pnfs_block_extent *be; | ||
912 | |||
913 | spin_lock(&bl->bl_ext_lock); | ||
914 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
915 | while (!list_empty(&bl->bl_extents[i])) { | ||
916 | be = list_first_entry(&bl->bl_extents[i], | ||
917 | struct pnfs_block_extent, | ||
918 | be_node); | ||
919 | list_del(&be->be_node); | ||
920 | bl_put_extent(be); | ||
921 | } | ||
922 | } | ||
923 | spin_unlock(&bl->bl_ext_lock); | ||
924 | } | ||
925 | |||
926 | static void | ||
927 | release_inval_marks(struct pnfs_inval_markings *marks) | ||
928 | { | ||
929 | struct pnfs_inval_tracking *pos, *temp; | ||
930 | struct pnfs_block_short_extent *se, *stemp; | ||
931 | |||
932 | list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { | ||
933 | list_del(&pos->it_link); | ||
934 | kfree(pos); | ||
935 | } | ||
936 | |||
937 | list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { | ||
938 | list_del(&se->bse_node); | ||
939 | kfree(se); | ||
940 | } | ||
941 | return; | ||
942 | } | 439 | } |
943 | 440 | ||
944 | static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) | 441 | static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) |
945 | { | 442 | { |
946 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | 443 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); |
444 | int err; | ||
947 | 445 | ||
948 | dprintk("%s enter\n", __func__); | 446 | dprintk("%s enter\n", __func__); |
949 | release_extents(bl, NULL); | 447 | |
950 | release_inval_marks(&bl->bl_inval); | 448 | err = ext_tree_remove(bl, true, 0, LLONG_MAX); |
449 | WARN_ON(err); | ||
450 | |||
951 | kfree(bl); | 451 | kfree(bl); |
952 | } | 452 | } |
953 | 453 | ||
@@ -960,14 +460,11 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, | |||
960 | bl = kzalloc(sizeof(*bl), gfp_flags); | 460 | bl = kzalloc(sizeof(*bl), gfp_flags); |
961 | if (!bl) | 461 | if (!bl) |
962 | return NULL; | 462 | return NULL; |
463 | |||
464 | bl->bl_ext_rw = RB_ROOT; | ||
465 | bl->bl_ext_ro = RB_ROOT; | ||
963 | spin_lock_init(&bl->bl_ext_lock); | 466 | spin_lock_init(&bl->bl_ext_lock); |
964 | INIT_LIST_HEAD(&bl->bl_extents[0]); | 467 | |
965 | INIT_LIST_HEAD(&bl->bl_extents[1]); | ||
966 | INIT_LIST_HEAD(&bl->bl_commit); | ||
967 | INIT_LIST_HEAD(&bl->bl_committing); | ||
968 | bl->bl_count = 0; | ||
969 | bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT; | ||
970 | BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize); | ||
971 | return &bl->bl_layout; | 468 | return &bl->bl_layout; |
972 | } | 469 | } |
973 | 470 | ||
@@ -977,215 +474,318 @@ static void bl_free_lseg(struct pnfs_layout_segment *lseg) | |||
977 | kfree(lseg); | 474 | kfree(lseg); |
978 | } | 475 | } |
979 | 476 | ||
980 | /* We pretty much ignore lseg, and store all data layout wide, so we | 477 | /* Tracks info needed to ensure extents in layout obey constraints of spec */ |
981 | * can correctly merge. | 478 | struct layout_verification { |
982 | */ | 479 | u32 mode; /* R or RW */ |
983 | static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo, | 480 | u64 start; /* Expected start of next non-COW extent */ |
984 | struct nfs4_layoutget_res *lgr, | 481 | u64 inval; /* Start of INVAL coverage */ |
985 | gfp_t gfp_flags) | 482 | u64 cowread; /* End of COW read coverage */ |
986 | { | 483 | }; |
987 | struct pnfs_layout_segment *lseg; | ||
988 | int status; | ||
989 | 484 | ||
990 | dprintk("%s enter\n", __func__); | 485 | /* Verify the extent meets the layout requirements of the pnfs-block draft, |
991 | lseg = kzalloc(sizeof(*lseg), gfp_flags); | 486 | * section 2.3.1. |
992 | if (!lseg) | 487 | */ |
993 | return ERR_PTR(-ENOMEM); | 488 | static int verify_extent(struct pnfs_block_extent *be, |
994 | status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags); | 489 | struct layout_verification *lv) |
995 | if (status) { | 490 | { |
996 | /* We don't want to call the full-blown bl_free_lseg, | 491 | if (lv->mode == IOMODE_READ) { |
997 | * since on error extents were not touched. | 492 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA || |
998 | */ | 493 | be->be_state == PNFS_BLOCK_INVALID_DATA) |
999 | kfree(lseg); | 494 | return -EIO; |
1000 | return ERR_PTR(status); | 495 | if (be->be_f_offset != lv->start) |
496 | return -EIO; | ||
497 | lv->start += be->be_length; | ||
498 | return 0; | ||
1001 | } | 499 | } |
1002 | return lseg; | 500 | /* lv->mode == IOMODE_RW */ |
501 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { | ||
502 | if (be->be_f_offset != lv->start) | ||
503 | return -EIO; | ||
504 | if (lv->cowread > lv->start) | ||
505 | return -EIO; | ||
506 | lv->start += be->be_length; | ||
507 | lv->inval = lv->start; | ||
508 | return 0; | ||
509 | } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
510 | if (be->be_f_offset != lv->start) | ||
511 | return -EIO; | ||
512 | lv->start += be->be_length; | ||
513 | return 0; | ||
514 | } else if (be->be_state == PNFS_BLOCK_READ_DATA) { | ||
515 | if (be->be_f_offset > lv->start) | ||
516 | return -EIO; | ||
517 | if (be->be_f_offset < lv->inval) | ||
518 | return -EIO; | ||
519 | if (be->be_f_offset < lv->cowread) | ||
520 | return -EIO; | ||
521 | /* It looks like you might want to min this with lv->start, | ||
522 | * but you really don't. | ||
523 | */ | ||
524 | lv->inval = lv->inval + be->be_length; | ||
525 | lv->cowread = be->be_f_offset + be->be_length; | ||
526 | return 0; | ||
527 | } else | ||
528 | return -EIO; | ||
1003 | } | 529 | } |
1004 | 530 | ||
1005 | static void | 531 | static int decode_sector_number(__be32 **rp, sector_t *sp) |
1006 | bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, | ||
1007 | const struct nfs4_layoutcommit_args *arg) | ||
1008 | { | 532 | { |
1009 | dprintk("%s enter\n", __func__); | 533 | uint64_t s; |
1010 | encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg); | 534 | |
535 | *rp = xdr_decode_hyper(*rp, &s); | ||
536 | if (s & 0x1ff) { | ||
537 | printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); | ||
538 | return -1; | ||
539 | } | ||
540 | *sp = s >> SECTOR_SHIFT; | ||
541 | return 0; | ||
1011 | } | 542 | } |
1012 | 543 | ||
1013 | static void | 544 | static int |
1014 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | 545 | bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo, |
546 | struct layout_verification *lv, struct list_head *extents, | ||
547 | gfp_t gfp_mask) | ||
1015 | { | 548 | { |
1016 | struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout; | 549 | struct pnfs_block_extent *be; |
550 | struct nfs4_deviceid id; | ||
551 | int error; | ||
552 | __be32 *p; | ||
1017 | 553 | ||
1018 | dprintk("%s enter\n", __func__); | 554 | p = xdr_inline_decode(xdr, 28 + NFS4_DEVICEID4_SIZE); |
1019 | clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status); | 555 | if (!p) |
1020 | } | 556 | return -EIO; |
1021 | 557 | ||
1022 | static void free_blk_mountid(struct block_mount_id *mid) | 558 | be = kzalloc(sizeof(*be), GFP_NOFS); |
1023 | { | 559 | if (!be) |
1024 | if (mid) { | 560 | return -ENOMEM; |
1025 | struct pnfs_block_dev *dev, *tmp; | ||
1026 | 561 | ||
1027 | /* No need to take bm_lock as we are last user freeing bm_devlist */ | 562 | memcpy(&id, p, NFS4_DEVICEID4_SIZE); |
1028 | list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { | 563 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); |
1029 | list_del(&dev->bm_node); | 564 | |
1030 | bl_free_block_dev(dev); | 565 | error = -EIO; |
1031 | } | 566 | be->be_device = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id, |
1032 | kfree(mid); | 567 | lo->plh_lc_cred, gfp_mask); |
568 | if (!be->be_device) | ||
569 | goto out_free_be; | ||
570 | |||
571 | /* | ||
572 | * The next three values are read in as bytes, but stored in the | ||
573 | * extent structure in 512-byte granularity. | ||
574 | */ | ||
575 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | ||
576 | goto out_put_deviceid; | ||
577 | if (decode_sector_number(&p, &be->be_length) < 0) | ||
578 | goto out_put_deviceid; | ||
579 | if (decode_sector_number(&p, &be->be_v_offset) < 0) | ||
580 | goto out_put_deviceid; | ||
581 | be->be_state = be32_to_cpup(p++); | ||
582 | |||
583 | error = verify_extent(be, lv); | ||
584 | if (error) { | ||
585 | dprintk("%s: extent verification failed\n", __func__); | ||
586 | goto out_put_deviceid; | ||
1033 | } | 587 | } |
588 | |||
589 | list_add_tail(&be->be_list, extents); | ||
590 | return 0; | ||
591 | |||
592 | out_put_deviceid: | ||
593 | nfs4_put_deviceid_node(be->be_device); | ||
594 | out_free_be: | ||
595 | kfree(be); | ||
596 | return error; | ||
1034 | } | 597 | } |
1035 | 598 | ||
1036 | /* This is mostly copied from the filelayout_get_device_info function. | 599 | static struct pnfs_layout_segment * |
1037 | * It seems much of this should be at the generic pnfs level. | 600 | bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, |
1038 | */ | 601 | gfp_t gfp_mask) |
1039 | static struct pnfs_block_dev * | ||
1040 | nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | ||
1041 | struct nfs4_deviceid *d_id) | ||
1042 | { | 602 | { |
1043 | struct pnfs_device *dev; | 603 | struct layout_verification lv = { |
1044 | struct pnfs_block_dev *rv; | 604 | .mode = lgr->range.iomode, |
1045 | u32 max_resp_sz; | 605 | .start = lgr->range.offset >> SECTOR_SHIFT, |
1046 | int max_pages; | 606 | .inval = lgr->range.offset >> SECTOR_SHIFT, |
1047 | struct page **pages = NULL; | 607 | .cowread = lgr->range.offset >> SECTOR_SHIFT, |
1048 | int i, rc; | 608 | }; |
609 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
610 | struct pnfs_layout_segment *lseg; | ||
611 | struct xdr_buf buf; | ||
612 | struct xdr_stream xdr; | ||
613 | struct page *scratch; | ||
614 | int status, i; | ||
615 | uint32_t count; | ||
616 | __be32 *p; | ||
617 | LIST_HEAD(extents); | ||
618 | |||
619 | dprintk("---> %s\n", __func__); | ||
620 | |||
621 | lseg = kzalloc(sizeof(*lseg), gfp_mask); | ||
622 | if (!lseg) | ||
623 | return ERR_PTR(-ENOMEM); | ||
624 | |||
625 | status = -ENOMEM; | ||
626 | scratch = alloc_page(gfp_mask); | ||
627 | if (!scratch) | ||
628 | goto out; | ||
629 | |||
630 | xdr_init_decode_pages(&xdr, &buf, | ||
631 | lgr->layoutp->pages, lgr->layoutp->len); | ||
632 | xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); | ||
633 | |||
634 | status = -EIO; | ||
635 | p = xdr_inline_decode(&xdr, 4); | ||
636 | if (unlikely(!p)) | ||
637 | goto out_free_scratch; | ||
638 | |||
639 | count = be32_to_cpup(p++); | ||
640 | dprintk("%s: number of extents %d\n", __func__, count); | ||
1049 | 641 | ||
1050 | /* | 642 | /* |
1051 | * Use the session max response size as the basis for setting | 643 | * Decode individual extents, putting them in temporary staging area |
1052 | * GETDEVICEINFO's maxcount | 644 | * until whole layout is decoded to make error recovery easier. |
1053 | */ | 645 | */ |
1054 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | 646 | for (i = 0; i < count; i++) { |
1055 | max_pages = nfs_page_array_len(0, max_resp_sz); | 647 | status = bl_alloc_extent(&xdr, lo, &lv, &extents, gfp_mask); |
1056 | dprintk("%s max_resp_sz %u max_pages %d\n", | 648 | if (status) |
1057 | __func__, max_resp_sz, max_pages); | 649 | goto process_extents; |
1058 | |||
1059 | dev = kmalloc(sizeof(*dev), GFP_NOFS); | ||
1060 | if (!dev) { | ||
1061 | dprintk("%s kmalloc failed\n", __func__); | ||
1062 | return ERR_PTR(-ENOMEM); | ||
1063 | } | 650 | } |
1064 | 651 | ||
1065 | pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS); | 652 | if (lgr->range.offset + lgr->range.length != |
1066 | if (pages == NULL) { | 653 | lv.start << SECTOR_SHIFT) { |
1067 | kfree(dev); | 654 | dprintk("%s Final length mismatch\n", __func__); |
1068 | return ERR_PTR(-ENOMEM); | 655 | status = -EIO; |
656 | goto process_extents; | ||
1069 | } | 657 | } |
1070 | for (i = 0; i < max_pages; i++) { | 658 | |
1071 | pages[i] = alloc_page(GFP_NOFS); | 659 | if (lv.start < lv.cowread) { |
1072 | if (!pages[i]) { | 660 | dprintk("%s Final uncovered COW extent\n", __func__); |
1073 | rv = ERR_PTR(-ENOMEM); | 661 | status = -EIO; |
1074 | goto out_free; | ||
1075 | } | ||
1076 | } | 662 | } |
1077 | 663 | ||
1078 | memcpy(&dev->dev_id, d_id, sizeof(*d_id)); | 664 | process_extents: |
1079 | dev->layout_type = LAYOUT_BLOCK_VOLUME; | 665 | while (!list_empty(&extents)) { |
1080 | dev->pages = pages; | 666 | struct pnfs_block_extent *be = |
1081 | dev->pgbase = 0; | 667 | list_first_entry(&extents, struct pnfs_block_extent, |
1082 | dev->pglen = PAGE_SIZE * max_pages; | 668 | be_list); |
1083 | dev->mincount = 0; | 669 | list_del(&be->be_list); |
1084 | dev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | 670 | |
1085 | 671 | if (!status) | |
1086 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); | 672 | status = ext_tree_insert(bl, be); |
1087 | rc = nfs4_proc_getdeviceinfo(server, dev, NULL); | 673 | |
1088 | dprintk("%s getdevice info returns %d\n", __func__, rc); | 674 | if (status) { |
1089 | if (rc) { | 675 | nfs4_put_deviceid_node(be->be_device); |
1090 | rv = ERR_PTR(rc); | 676 | kfree(be); |
1091 | goto out_free; | 677 | } |
1092 | } | 678 | } |
1093 | 679 | ||
1094 | rv = nfs4_blk_decode_device(server, dev); | 680 | out_free_scratch: |
1095 | out_free: | 681 | __free_page(scratch); |
1096 | for (i = 0; i < max_pages; i++) | 682 | out: |
1097 | __free_page(pages[i]); | 683 | dprintk("%s returns %d\n", __func__, status); |
1098 | kfree(pages); | 684 | if (status) { |
1099 | kfree(dev); | 685 | kfree(lseg); |
1100 | return rv; | 686 | return ERR_PTR(status); |
687 | } | ||
688 | return lseg; | ||
1101 | } | 689 | } |
1102 | 690 | ||
1103 | static int | 691 | static void |
1104 | bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | 692 | bl_return_range(struct pnfs_layout_hdr *lo, |
693 | struct pnfs_layout_range *range) | ||
1105 | { | 694 | { |
1106 | struct block_mount_id *b_mt_id = NULL; | 695 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); |
1107 | struct pnfs_devicelist *dlist = NULL; | 696 | sector_t offset = range->offset >> SECTOR_SHIFT, end; |
1108 | struct pnfs_block_dev *bdev; | ||
1109 | LIST_HEAD(block_disklist); | ||
1110 | int status, i; | ||
1111 | |||
1112 | dprintk("%s enter\n", __func__); | ||
1113 | 697 | ||
1114 | if (server->pnfs_blksize == 0) { | 698 | if (range->offset % 8) { |
1115 | dprintk("%s Server did not return blksize\n", __func__); | 699 | dprintk("%s: offset %lld not block size aligned\n", |
1116 | return -EINVAL; | 700 | __func__, range->offset); |
1117 | } | 701 | return; |
1118 | b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS); | ||
1119 | if (!b_mt_id) { | ||
1120 | status = -ENOMEM; | ||
1121 | goto out_error; | ||
1122 | } | ||
1123 | /* Initialize nfs4 block layout mount id */ | ||
1124 | spin_lock_init(&b_mt_id->bm_lock); | ||
1125 | INIT_LIST_HEAD(&b_mt_id->bm_devlist); | ||
1126 | |||
1127 | dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); | ||
1128 | if (!dlist) { | ||
1129 | status = -ENOMEM; | ||
1130 | goto out_error; | ||
1131 | } | 702 | } |
1132 | dlist->eof = 0; | 703 | |
1133 | while (!dlist->eof) { | 704 | if (range->length != NFS4_MAX_UINT64) { |
1134 | status = nfs4_proc_getdevicelist(server, fh, dlist); | 705 | if (range->length % 8) { |
1135 | if (status) | 706 | dprintk("%s: length %lld not block size aligned\n", |
1136 | goto out_error; | 707 | __func__, range->length); |
1137 | dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n", | 708 | return; |
1138 | __func__, dlist->num_devs, dlist->eof); | ||
1139 | for (i = 0; i < dlist->num_devs; i++) { | ||
1140 | bdev = nfs4_blk_get_deviceinfo(server, fh, | ||
1141 | &dlist->dev_id[i]); | ||
1142 | if (IS_ERR(bdev)) { | ||
1143 | status = PTR_ERR(bdev); | ||
1144 | goto out_error; | ||
1145 | } | ||
1146 | spin_lock(&b_mt_id->bm_lock); | ||
1147 | list_add(&bdev->bm_node, &b_mt_id->bm_devlist); | ||
1148 | spin_unlock(&b_mt_id->bm_lock); | ||
1149 | } | 709 | } |
1150 | } | ||
1151 | dprintk("%s SUCCESS\n", __func__); | ||
1152 | server->pnfs_ld_data = b_mt_id; | ||
1153 | 710 | ||
1154 | out_return: | 711 | end = offset + (range->length >> SECTOR_SHIFT); |
1155 | kfree(dlist); | 712 | } else { |
1156 | return status; | 713 | end = round_down(NFS4_MAX_UINT64, PAGE_SIZE); |
714 | } | ||
1157 | 715 | ||
1158 | out_error: | 716 | ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end); |
1159 | free_blk_mountid(b_mt_id); | ||
1160 | goto out_return; | ||
1161 | } | 717 | } |
1162 | 718 | ||
1163 | static int | 719 | static int |
1164 | bl_clear_layoutdriver(struct nfs_server *server) | 720 | bl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg) |
721 | { | ||
722 | return ext_tree_prepare_commit(arg); | ||
723 | } | ||
724 | |||
725 | static void | ||
726 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | ||
1165 | { | 727 | { |
1166 | struct block_mount_id *b_mt_id = server->pnfs_ld_data; | 728 | ext_tree_mark_committed(&lcdata->args, lcdata->res.status); |
729 | } | ||
1167 | 730 | ||
731 | static int | ||
732 | bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | ||
733 | { | ||
1168 | dprintk("%s enter\n", __func__); | 734 | dprintk("%s enter\n", __func__); |
1169 | free_blk_mountid(b_mt_id); | 735 | |
1170 | dprintk("%s RETURNS\n", __func__); | 736 | if (server->pnfs_blksize == 0) { |
737 | dprintk("%s Server did not return blksize\n", __func__); | ||
738 | return -EINVAL; | ||
739 | } | ||
740 | if (server->pnfs_blksize > PAGE_SIZE) { | ||
741 | printk(KERN_ERR "%s: pNFS blksize %d not supported.\n", | ||
742 | __func__, server->pnfs_blksize); | ||
743 | return -EINVAL; | ||
744 | } | ||
745 | |||
1171 | return 0; | 746 | return 0; |
1172 | } | 747 | } |
1173 | 748 | ||
1174 | static bool | 749 | static bool |
1175 | is_aligned_req(struct nfs_page *req, unsigned int alignment) | 750 | is_aligned_req(struct nfs_pageio_descriptor *pgio, |
751 | struct nfs_page *req, unsigned int alignment) | ||
1176 | { | 752 | { |
1177 | return IS_ALIGNED(req->wb_offset, alignment) && | 753 | /* |
1178 | IS_ALIGNED(req->wb_bytes, alignment); | 754 | * Always accept buffered writes, higher layers take care of the |
755 | * right alignment. | ||
756 | */ | ||
757 | if (pgio->pg_dreq == NULL) | ||
758 | return true; | ||
759 | |||
760 | if (!IS_ALIGNED(req->wb_offset, alignment)) | ||
761 | return false; | ||
762 | |||
763 | if (IS_ALIGNED(req->wb_bytes, alignment)) | ||
764 | return true; | ||
765 | |||
766 | if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) { | ||
767 | /* | ||
768 | * If the write goes up to the inode size, just write | ||
769 | * the full page. Data past the inode size is | ||
770 | * guaranteed to be zeroed by the higher level client | ||
771 | * code, and this behaviour is mandated by RFC 5663 | ||
772 | * section 2.3.2. | ||
773 | */ | ||
774 | return true; | ||
775 | } | ||
776 | |||
777 | return false; | ||
1179 | } | 778 | } |
1180 | 779 | ||
1181 | static void | 780 | static void |
1182 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 781 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1183 | { | 782 | { |
1184 | if (pgio->pg_dreq != NULL && | 783 | if (!is_aligned_req(pgio, req, SECTOR_SIZE)) { |
1185 | !is_aligned_req(req, SECTOR_SIZE)) | ||
1186 | nfs_pageio_reset_read_mds(pgio); | 784 | nfs_pageio_reset_read_mds(pgio); |
1187 | else | 785 | return; |
1188 | pnfs_generic_pg_init_read(pgio, req); | 786 | } |
787 | |||
788 | pnfs_generic_pg_init_read(pgio, req); | ||
1189 | } | 789 | } |
1190 | 790 | ||
1191 | /* | 791 | /* |
@@ -1196,10 +796,8 @@ static size_t | |||
1196 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 796 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
1197 | struct nfs_page *req) | 797 | struct nfs_page *req) |
1198 | { | 798 | { |
1199 | if (pgio->pg_dreq != NULL && | 799 | if (!is_aligned_req(pgio, req, SECTOR_SIZE)) |
1200 | !is_aligned_req(req, SECTOR_SIZE)) | ||
1201 | return 0; | 800 | return 0; |
1202 | |||
1203 | return pnfs_generic_pg_test(pgio, prev, req); | 801 | return pnfs_generic_pg_test(pgio, prev, req); |
1204 | } | 802 | } |
1205 | 803 | ||
@@ -1229,19 +827,20 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) | |||
1229 | static void | 827 | static void |
1230 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 828 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1231 | { | 829 | { |
1232 | if (pgio->pg_dreq != NULL && | 830 | u64 wb_size; |
1233 | !is_aligned_req(req, PAGE_CACHE_SIZE)) { | 831 | |
832 | if (!is_aligned_req(pgio, req, PAGE_SIZE)) { | ||
1234 | nfs_pageio_reset_write_mds(pgio); | 833 | nfs_pageio_reset_write_mds(pgio); |
1235 | } else { | 834 | return; |
1236 | u64 wb_size; | ||
1237 | if (pgio->pg_dreq == NULL) | ||
1238 | wb_size = pnfs_num_cont_bytes(pgio->pg_inode, | ||
1239 | req->wb_index); | ||
1240 | else | ||
1241 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
1242 | |||
1243 | pnfs_generic_pg_init_write(pgio, req, wb_size); | ||
1244 | } | 835 | } |
836 | |||
837 | if (pgio->pg_dreq == NULL) | ||
838 | wb_size = pnfs_num_cont_bytes(pgio->pg_inode, | ||
839 | req->wb_index); | ||
840 | else | ||
841 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
842 | |||
843 | pnfs_generic_pg_init_write(pgio, req, wb_size); | ||
1245 | } | 844 | } |
1246 | 845 | ||
1247 | /* | 846 | /* |
@@ -1252,10 +851,8 @@ static size_t | |||
1252 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 851 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
1253 | struct nfs_page *req) | 852 | struct nfs_page *req) |
1254 | { | 853 | { |
1255 | if (pgio->pg_dreq != NULL && | 854 | if (!is_aligned_req(pgio, req, PAGE_SIZE)) |
1256 | !is_aligned_req(req, PAGE_CACHE_SIZE)) | ||
1257 | return 0; | 855 | return 0; |
1258 | |||
1259 | return pnfs_generic_pg_test(pgio, prev, req); | 856 | return pnfs_generic_pg_test(pgio, prev, req); |
1260 | } | 857 | } |
1261 | 858 | ||
@@ -1275,146 +872,24 @@ static struct pnfs_layoutdriver_type blocklayout_type = { | |||
1275 | .id = LAYOUT_BLOCK_VOLUME, | 872 | .id = LAYOUT_BLOCK_VOLUME, |
1276 | .name = "LAYOUT_BLOCK_VOLUME", | 873 | .name = "LAYOUT_BLOCK_VOLUME", |
1277 | .owner = THIS_MODULE, | 874 | .owner = THIS_MODULE, |
875 | .flags = PNFS_LAYOUTRET_ON_SETATTR | | ||
876 | PNFS_READ_WHOLE_PAGE, | ||
1278 | .read_pagelist = bl_read_pagelist, | 877 | .read_pagelist = bl_read_pagelist, |
1279 | .write_pagelist = bl_write_pagelist, | 878 | .write_pagelist = bl_write_pagelist, |
1280 | .alloc_layout_hdr = bl_alloc_layout_hdr, | 879 | .alloc_layout_hdr = bl_alloc_layout_hdr, |
1281 | .free_layout_hdr = bl_free_layout_hdr, | 880 | .free_layout_hdr = bl_free_layout_hdr, |
1282 | .alloc_lseg = bl_alloc_lseg, | 881 | .alloc_lseg = bl_alloc_lseg, |
1283 | .free_lseg = bl_free_lseg, | 882 | .free_lseg = bl_free_lseg, |
1284 | .encode_layoutcommit = bl_encode_layoutcommit, | 883 | .return_range = bl_return_range, |
884 | .prepare_layoutcommit = bl_prepare_layoutcommit, | ||
1285 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, | 885 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, |
1286 | .set_layoutdriver = bl_set_layoutdriver, | 886 | .set_layoutdriver = bl_set_layoutdriver, |
1287 | .clear_layoutdriver = bl_clear_layoutdriver, | 887 | .alloc_deviceid_node = bl_alloc_deviceid_node, |
888 | .free_deviceid_node = bl_free_deviceid_node, | ||
1288 | .pg_read_ops = &bl_pg_read_ops, | 889 | .pg_read_ops = &bl_pg_read_ops, |
1289 | .pg_write_ops = &bl_pg_write_ops, | 890 | .pg_write_ops = &bl_pg_write_ops, |
1290 | }; | 891 | }; |
1291 | 892 | ||
1292 | static const struct rpc_pipe_ops bl_upcall_ops = { | ||
1293 | .upcall = rpc_pipe_generic_upcall, | ||
1294 | .downcall = bl_pipe_downcall, | ||
1295 | .destroy_msg = bl_pipe_destroy_msg, | ||
1296 | }; | ||
1297 | |||
1298 | static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, | ||
1299 | struct rpc_pipe *pipe) | ||
1300 | { | ||
1301 | struct dentry *dir, *dentry; | ||
1302 | |||
1303 | dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); | ||
1304 | if (dir == NULL) | ||
1305 | return ERR_PTR(-ENOENT); | ||
1306 | dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); | ||
1307 | dput(dir); | ||
1308 | return dentry; | ||
1309 | } | ||
1310 | |||
1311 | static void nfs4blocklayout_unregister_sb(struct super_block *sb, | ||
1312 | struct rpc_pipe *pipe) | ||
1313 | { | ||
1314 | if (pipe->dentry) | ||
1315 | rpc_unlink(pipe->dentry); | ||
1316 | } | ||
1317 | |||
1318 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | ||
1319 | void *ptr) | ||
1320 | { | ||
1321 | struct super_block *sb = ptr; | ||
1322 | struct net *net = sb->s_fs_info; | ||
1323 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1324 | struct dentry *dentry; | ||
1325 | int ret = 0; | ||
1326 | |||
1327 | if (!try_module_get(THIS_MODULE)) | ||
1328 | return 0; | ||
1329 | |||
1330 | if (nn->bl_device_pipe == NULL) { | ||
1331 | module_put(THIS_MODULE); | ||
1332 | return 0; | ||
1333 | } | ||
1334 | |||
1335 | switch (event) { | ||
1336 | case RPC_PIPEFS_MOUNT: | ||
1337 | dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); | ||
1338 | if (IS_ERR(dentry)) { | ||
1339 | ret = PTR_ERR(dentry); | ||
1340 | break; | ||
1341 | } | ||
1342 | nn->bl_device_pipe->dentry = dentry; | ||
1343 | break; | ||
1344 | case RPC_PIPEFS_UMOUNT: | ||
1345 | if (nn->bl_device_pipe->dentry) | ||
1346 | nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); | ||
1347 | break; | ||
1348 | default: | ||
1349 | ret = -ENOTSUPP; | ||
1350 | break; | ||
1351 | } | ||
1352 | module_put(THIS_MODULE); | ||
1353 | return ret; | ||
1354 | } | ||
1355 | |||
1356 | static struct notifier_block nfs4blocklayout_block = { | ||
1357 | .notifier_call = rpc_pipefs_event, | ||
1358 | }; | ||
1359 | |||
1360 | static struct dentry *nfs4blocklayout_register_net(struct net *net, | ||
1361 | struct rpc_pipe *pipe) | ||
1362 | { | ||
1363 | struct super_block *pipefs_sb; | ||
1364 | struct dentry *dentry; | ||
1365 | |||
1366 | pipefs_sb = rpc_get_sb_net(net); | ||
1367 | if (!pipefs_sb) | ||
1368 | return NULL; | ||
1369 | dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); | ||
1370 | rpc_put_sb_net(net); | ||
1371 | return dentry; | ||
1372 | } | ||
1373 | |||
1374 | static void nfs4blocklayout_unregister_net(struct net *net, | ||
1375 | struct rpc_pipe *pipe) | ||
1376 | { | ||
1377 | struct super_block *pipefs_sb; | ||
1378 | |||
1379 | pipefs_sb = rpc_get_sb_net(net); | ||
1380 | if (pipefs_sb) { | ||
1381 | nfs4blocklayout_unregister_sb(pipefs_sb, pipe); | ||
1382 | rpc_put_sb_net(net); | ||
1383 | } | ||
1384 | } | ||
1385 | |||
1386 | static int nfs4blocklayout_net_init(struct net *net) | ||
1387 | { | ||
1388 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1389 | struct dentry *dentry; | ||
1390 | |||
1391 | init_waitqueue_head(&nn->bl_wq); | ||
1392 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); | ||
1393 | if (IS_ERR(nn->bl_device_pipe)) | ||
1394 | return PTR_ERR(nn->bl_device_pipe); | ||
1395 | dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); | ||
1396 | if (IS_ERR(dentry)) { | ||
1397 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
1398 | return PTR_ERR(dentry); | ||
1399 | } | ||
1400 | nn->bl_device_pipe->dentry = dentry; | ||
1401 | return 0; | ||
1402 | } | ||
1403 | |||
1404 | static void nfs4blocklayout_net_exit(struct net *net) | ||
1405 | { | ||
1406 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1407 | |||
1408 | nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); | ||
1409 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
1410 | nn->bl_device_pipe = NULL; | ||
1411 | } | ||
1412 | |||
1413 | static struct pernet_operations nfs4blocklayout_net_ops = { | ||
1414 | .init = nfs4blocklayout_net_init, | ||
1415 | .exit = nfs4blocklayout_net_exit, | ||
1416 | }; | ||
1417 | |||
1418 | static int __init nfs4blocklayout_init(void) | 893 | static int __init nfs4blocklayout_init(void) |
1419 | { | 894 | { |
1420 | int ret; | 895 | int ret; |
@@ -1424,20 +899,14 @@ static int __init nfs4blocklayout_init(void) | |||
1424 | ret = pnfs_register_layoutdriver(&blocklayout_type); | 899 | ret = pnfs_register_layoutdriver(&blocklayout_type); |
1425 | if (ret) | 900 | if (ret) |
1426 | goto out; | 901 | goto out; |
1427 | 902 | ret = bl_init_pipefs(); | |
1428 | ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); | ||
1429 | if (ret) | 903 | if (ret) |
1430 | goto out_remove; | 904 | goto out_unregister; |
1431 | ret = register_pernet_subsys(&nfs4blocklayout_net_ops); | 905 | return 0; |
1432 | if (ret) | ||
1433 | goto out_notifier; | ||
1434 | out: | ||
1435 | return ret; | ||
1436 | 906 | ||
1437 | out_notifier: | 907 | out_unregister: |
1438 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
1439 | out_remove: | ||
1440 | pnfs_unregister_layoutdriver(&blocklayout_type); | 908 | pnfs_unregister_layoutdriver(&blocklayout_type); |
909 | out: | ||
1441 | return ret; | 910 | return ret; |
1442 | } | 911 | } |
1443 | 912 | ||
@@ -1446,8 +915,7 @@ static void __exit nfs4blocklayout_exit(void) | |||
1446 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", | 915 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", |
1447 | __func__); | 916 | __func__); |
1448 | 917 | ||
1449 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | 918 | bl_cleanup_pipefs(); |
1450 | unregister_pernet_subsys(&nfs4blocklayout_net_ops); | ||
1451 | pnfs_unregister_layoutdriver(&blocklayout_type); | 919 | pnfs_unregister_layoutdriver(&blocklayout_type); |
1452 | } | 920 | } |
1453 | 921 | ||
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 9838fb020473..92dca9e90d8d 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -44,105 +44,112 @@ | |||
44 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) | 44 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) |
45 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) | 45 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) |
46 | 46 | ||
47 | struct block_mount_id { | 47 | struct pnfs_block_dev; |
48 | spinlock_t bm_lock; /* protects list */ | ||
49 | struct list_head bm_devlist; /* holds pnfs_block_dev */ | ||
50 | }; | ||
51 | 48 | ||
52 | struct pnfs_block_dev { | 49 | enum pnfs_block_volume_type { |
53 | struct list_head bm_node; | 50 | PNFS_BLOCK_VOLUME_SIMPLE = 0, |
54 | struct nfs4_deviceid bm_mdevid; /* associated devid */ | 51 | PNFS_BLOCK_VOLUME_SLICE = 1, |
55 | struct block_device *bm_mdev; /* meta device itself */ | 52 | PNFS_BLOCK_VOLUME_CONCAT = 2, |
56 | struct net *net; | 53 | PNFS_BLOCK_VOLUME_STRIPE = 3, |
57 | }; | 54 | }; |
58 | 55 | ||
59 | enum exstate4 { | 56 | #define PNFS_BLOCK_MAX_UUIDS 4 |
60 | PNFS_BLOCK_READWRITE_DATA = 0, | 57 | #define PNFS_BLOCK_MAX_DEVICES 64 |
61 | PNFS_BLOCK_READ_DATA = 1, | 58 | |
62 | PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */ | 59 | /* |
63 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ | 60 | * Random upper cap for the uuid length to avoid unbounded allocation. |
61 | * Not actually limited by the protocol. | ||
62 | */ | ||
63 | #define PNFS_BLOCK_UUID_LEN 128 | ||
64 | |||
65 | |||
66 | struct pnfs_block_volume { | ||
67 | enum pnfs_block_volume_type type; | ||
68 | union { | ||
69 | struct { | ||
70 | int len; | ||
71 | int nr_sigs; | ||
72 | struct { | ||
73 | u64 offset; | ||
74 | u32 sig_len; | ||
75 | u8 sig[PNFS_BLOCK_UUID_LEN]; | ||
76 | } sigs[PNFS_BLOCK_MAX_UUIDS]; | ||
77 | } simple; | ||
78 | struct { | ||
79 | u64 start; | ||
80 | u64 len; | ||
81 | u32 volume; | ||
82 | } slice; | ||
83 | struct { | ||
84 | u32 volumes_count; | ||
85 | u32 volumes[PNFS_BLOCK_MAX_DEVICES]; | ||
86 | } concat; | ||
87 | struct { | ||
88 | u64 chunk_size; | ||
89 | u32 volumes_count; | ||
90 | u32 volumes[PNFS_BLOCK_MAX_DEVICES]; | ||
91 | } stripe; | ||
92 | }; | ||
64 | }; | 93 | }; |
65 | 94 | ||
66 | #define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */ | 95 | struct pnfs_block_dev_map { |
96 | sector_t start; | ||
97 | sector_t len; | ||
67 | 98 | ||
68 | struct my_tree { | 99 | sector_t disk_offset; |
69 | sector_t mtt_step_size; /* Internal sector alignment */ | 100 | struct block_device *bdev; |
70 | struct list_head mtt_stub; /* Should be a radix tree */ | ||
71 | }; | 101 | }; |
72 | 102 | ||
73 | struct pnfs_inval_markings { | 103 | struct pnfs_block_dev { |
74 | spinlock_t im_lock; | 104 | struct nfs4_deviceid_node node; |
75 | struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ | 105 | |
76 | sector_t im_block_size; /* Server blocksize in sectors */ | 106 | u64 start; |
77 | struct list_head im_extents; /* Short extents for INVAL->RW conversion */ | 107 | u64 len; |
108 | |||
109 | u32 nr_children; | ||
110 | struct pnfs_block_dev *children; | ||
111 | u64 chunk_size; | ||
112 | |||
113 | struct block_device *bdev; | ||
114 | u64 disk_offset; | ||
115 | |||
116 | bool (*map)(struct pnfs_block_dev *dev, u64 offset, | ||
117 | struct pnfs_block_dev_map *map); | ||
78 | }; | 118 | }; |
79 | 119 | ||
80 | struct pnfs_inval_tracking { | 120 | enum exstate4 { |
81 | struct list_head it_link; | 121 | PNFS_BLOCK_READWRITE_DATA = 0, |
82 | int it_sector; | 122 | PNFS_BLOCK_READ_DATA = 1, |
83 | int it_tags; | 123 | PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */ |
124 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ | ||
84 | }; | 125 | }; |
85 | 126 | ||
86 | /* sector_t fields are all in 512-byte sectors */ | 127 | /* sector_t fields are all in 512-byte sectors */ |
87 | struct pnfs_block_extent { | 128 | struct pnfs_block_extent { |
88 | struct kref be_refcnt; | 129 | union { |
89 | struct list_head be_node; /* link into lseg list */ | 130 | struct rb_node be_node; |
90 | struct nfs4_deviceid be_devid; /* FIXME: could use device cache instead */ | 131 | struct list_head be_list; |
91 | struct block_device *be_mdev; | 132 | }; |
133 | struct nfs4_deviceid_node *be_device; | ||
92 | sector_t be_f_offset; /* the starting offset in the file */ | 134 | sector_t be_f_offset; /* the starting offset in the file */ |
93 | sector_t be_length; /* the size of the extent */ | 135 | sector_t be_length; /* the size of the extent */ |
94 | sector_t be_v_offset; /* the starting offset in the volume */ | 136 | sector_t be_v_offset; /* the starting offset in the volume */ |
95 | enum exstate4 be_state; /* the state of this extent */ | 137 | enum exstate4 be_state; /* the state of this extent */ |
96 | struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */ | 138 | #define EXTENT_WRITTEN 1 |
139 | #define EXTENT_COMMITTING 2 | ||
140 | unsigned int be_tag; | ||
97 | }; | 141 | }; |
98 | 142 | ||
99 | /* Shortened extent used by LAYOUTCOMMIT */ | 143 | /* on the wire size of the extent */ |
100 | struct pnfs_block_short_extent { | 144 | #define BL_EXTENT_SIZE (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE) |
101 | struct list_head bse_node; | ||
102 | struct nfs4_deviceid bse_devid; | ||
103 | struct block_device *bse_mdev; | ||
104 | sector_t bse_f_offset; /* the starting offset in the file */ | ||
105 | sector_t bse_length; /* the size of the extent */ | ||
106 | }; | ||
107 | |||
108 | static inline void | ||
109 | BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) | ||
110 | { | ||
111 | spin_lock_init(&marks->im_lock); | ||
112 | INIT_LIST_HEAD(&marks->im_tree.mtt_stub); | ||
113 | INIT_LIST_HEAD(&marks->im_extents); | ||
114 | marks->im_block_size = blocksize; | ||
115 | marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, | ||
116 | blocksize); | ||
117 | } | ||
118 | |||
119 | enum extentclass4 { | ||
120 | RW_EXTENT = 0, /* READWRTE and INVAL */ | ||
121 | RO_EXTENT = 1, /* READ and NONE */ | ||
122 | EXTENT_LISTS = 2, | ||
123 | }; | ||
124 | |||
125 | static inline int bl_choose_list(enum exstate4 state) | ||
126 | { | ||
127 | if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA) | ||
128 | return RO_EXTENT; | ||
129 | else | ||
130 | return RW_EXTENT; | ||
131 | } | ||
132 | 145 | ||
133 | struct pnfs_block_layout { | 146 | struct pnfs_block_layout { |
134 | struct pnfs_layout_hdr bl_layout; | 147 | struct pnfs_layout_hdr bl_layout; |
135 | struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */ | 148 | struct rb_root bl_ext_rw; |
149 | struct rb_root bl_ext_ro; | ||
136 | spinlock_t bl_ext_lock; /* Protects list manipulation */ | 150 | spinlock_t bl_ext_lock; /* Protects list manipulation */ |
137 | struct list_head bl_extents[EXTENT_LISTS]; /* R and RW extents */ | ||
138 | struct list_head bl_commit; /* Needs layout commit */ | ||
139 | struct list_head bl_committing; /* Layout committing */ | ||
140 | unsigned int bl_count; /* entries in bl_commit */ | ||
141 | sector_t bl_blocksize; /* Server blocksize in sectors */ | ||
142 | }; | 151 | }; |
143 | 152 | ||
144 | #define BLK_ID(lo) ((struct block_mount_id *)(NFS_SERVER(lo->plh_inode)->pnfs_ld_data)) | ||
145 | |||
146 | static inline struct pnfs_block_layout * | 153 | static inline struct pnfs_block_layout * |
147 | BLK_LO2EXT(struct pnfs_layout_hdr *lo) | 154 | BLK_LO2EXT(struct pnfs_layout_hdr *lo) |
148 | { | 155 | { |
@@ -171,41 +178,27 @@ struct bl_msg_hdr { | |||
171 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ | 178 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ |
172 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ | 179 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ |
173 | 180 | ||
174 | /* blocklayoutdev.c */ | 181 | /* dev.c */ |
175 | ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); | 182 | struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, |
176 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *); | 183 | struct pnfs_device *pdev, gfp_t gfp_mask); |
177 | void nfs4_blkdev_put(struct block_device *bdev); | 184 | void bl_free_deviceid_node(struct nfs4_deviceid_node *d); |
178 | struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, | 185 | |
179 | struct pnfs_device *dev); | 186 | /* extent_tree.c */ |
180 | int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | 187 | int ext_tree_insert(struct pnfs_block_layout *bl, |
181 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | 188 | struct pnfs_block_extent *new); |
182 | 189 | int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start, | |
183 | /* blocklayoutdm.c */ | 190 | sector_t end); |
184 | void bl_free_block_dev(struct pnfs_block_dev *bdev); | 191 | int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, |
185 | 192 | sector_t len); | |
186 | /* extents.c */ | 193 | bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, |
187 | struct pnfs_block_extent * | 194 | struct pnfs_block_extent *ret, bool rw); |
188 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | 195 | int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); |
189 | struct pnfs_block_extent **cow_read); | 196 | void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status); |
190 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | 197 | |
191 | sector_t offset, sector_t length); | 198 | /* rpc_pipefs.c */ |
192 | void bl_put_extent(struct pnfs_block_extent *be); | 199 | dev_t bl_resolve_deviceid(struct nfs_server *server, |
193 | struct pnfs_block_extent *bl_alloc_extent(void); | 200 | struct pnfs_block_volume *b, gfp_t gfp_mask); |
194 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); | 201 | int __init bl_init_pipefs(void); |
195 | int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | 202 | void __exit bl_cleanup_pipefs(void); |
196 | struct xdr_stream *xdr, | ||
197 | const struct nfs4_layoutcommit_args *arg); | ||
198 | void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
199 | const struct nfs4_layoutcommit_args *arg, | ||
200 | int status); | ||
201 | int bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
202 | struct pnfs_block_extent *new); | ||
203 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
204 | sector_t offset, sector_t length, | ||
205 | struct pnfs_block_short_extent *new); | ||
206 | int bl_push_one_short_extent(struct pnfs_inval_markings *marks); | ||
207 | struct pnfs_block_short_extent * | ||
208 | bl_pop_one_short_extent(struct pnfs_inval_markings *marks); | ||
209 | void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free); | ||
210 | 203 | ||
211 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ | 204 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ |
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c deleted file mode 100644 index 04303b5c9361..000000000000 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ /dev/null | |||
@@ -1,384 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdev.c | ||
3 | * | ||
4 | * Device operations for the pnfs nfs4 file layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/buffer_head.h> /* __bread */ | ||
34 | |||
35 | #include <linux/genhd.h> | ||
36 | #include <linux/blkdev.h> | ||
37 | #include <linux/hash.h> | ||
38 | |||
39 | #include "blocklayout.h" | ||
40 | |||
41 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
42 | |||
43 | static int decode_sector_number(__be32 **rp, sector_t *sp) | ||
44 | { | ||
45 | uint64_t s; | ||
46 | |||
47 | *rp = xdr_decode_hyper(*rp, &s); | ||
48 | if (s & 0x1ff) { | ||
49 | printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); | ||
50 | return -1; | ||
51 | } | ||
52 | *sp = s >> SECTOR_SHIFT; | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * Release the block device | ||
58 | */ | ||
59 | void nfs4_blkdev_put(struct block_device *bdev) | ||
60 | { | ||
61 | dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev), | ||
62 | MINOR(bdev->bd_dev)); | ||
63 | blkdev_put(bdev, FMODE_READ); | ||
64 | } | ||
65 | |||
66 | ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | ||
67 | size_t mlen) | ||
68 | { | ||
69 | struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, | ||
70 | nfs_net_id); | ||
71 | |||
72 | if (mlen != sizeof (struct bl_dev_msg)) | ||
73 | return -EINVAL; | ||
74 | |||
75 | if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) | ||
76 | return -EFAULT; | ||
77 | |||
78 | wake_up(&nn->bl_wq); | ||
79 | |||
80 | return mlen; | ||
81 | } | ||
82 | |||
83 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) | ||
84 | { | ||
85 | struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg); | ||
86 | |||
87 | if (msg->errno >= 0) | ||
88 | return; | ||
89 | wake_up(bl_pipe_msg->bl_wq); | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. | ||
94 | */ | ||
95 | struct pnfs_block_dev * | ||
96 | nfs4_blk_decode_device(struct nfs_server *server, | ||
97 | struct pnfs_device *dev) | ||
98 | { | ||
99 | struct pnfs_block_dev *rv; | ||
100 | struct block_device *bd = NULL; | ||
101 | struct bl_pipe_msg bl_pipe_msg; | ||
102 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
103 | struct bl_msg_hdr bl_msg = { | ||
104 | .type = BL_DEVICE_MOUNT, | ||
105 | .totallen = dev->mincount, | ||
106 | }; | ||
107 | uint8_t *dataptr; | ||
108 | DECLARE_WAITQUEUE(wq, current); | ||
109 | int offset, len, i, rc; | ||
110 | struct net *net = server->nfs_client->cl_net; | ||
111 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
112 | struct bl_dev_msg *reply = &nn->bl_mount_reply; | ||
113 | |||
114 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | ||
115 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, | ||
116 | dev->mincount); | ||
117 | |||
118 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
119 | memset(msg, 0, sizeof(*msg)); | ||
120 | msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); | ||
121 | if (!msg->data) { | ||
122 | rv = ERR_PTR(-ENOMEM); | ||
123 | goto out; | ||
124 | } | ||
125 | |||
126 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); | ||
127 | dataptr = (uint8_t *) msg->data; | ||
128 | len = dev->mincount; | ||
129 | offset = sizeof(bl_msg); | ||
130 | for (i = 0; len > 0; i++) { | ||
131 | memcpy(&dataptr[offset], page_address(dev->pages[i]), | ||
132 | len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE); | ||
133 | len -= PAGE_CACHE_SIZE; | ||
134 | offset += PAGE_CACHE_SIZE; | ||
135 | } | ||
136 | msg->len = sizeof(bl_msg) + dev->mincount; | ||
137 | |||
138 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | ||
139 | add_wait_queue(&nn->bl_wq, &wq); | ||
140 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); | ||
141 | if (rc < 0) { | ||
142 | remove_wait_queue(&nn->bl_wq, &wq); | ||
143 | rv = ERR_PTR(rc); | ||
144 | goto out; | ||
145 | } | ||
146 | |||
147 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
148 | schedule(); | ||
149 | __set_current_state(TASK_RUNNING); | ||
150 | remove_wait_queue(&nn->bl_wq, &wq); | ||
151 | |||
152 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | ||
153 | dprintk("%s failed to open device: %d\n", | ||
154 | __func__, reply->status); | ||
155 | rv = ERR_PTR(-EINVAL); | ||
156 | goto out; | ||
157 | } | ||
158 | |||
159 | bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), | ||
160 | FMODE_READ, NULL); | ||
161 | if (IS_ERR(bd)) { | ||
162 | dprintk("%s failed to open device : %ld\n", __func__, | ||
163 | PTR_ERR(bd)); | ||
164 | rv = ERR_CAST(bd); | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | rv = kzalloc(sizeof(*rv), GFP_NOFS); | ||
169 | if (!rv) { | ||
170 | rv = ERR_PTR(-ENOMEM); | ||
171 | goto out; | ||
172 | } | ||
173 | |||
174 | rv->bm_mdev = bd; | ||
175 | memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); | ||
176 | rv->net = net; | ||
177 | dprintk("%s Created device %s with bd_block_size %u\n", | ||
178 | __func__, | ||
179 | bd->bd_disk->disk_name, | ||
180 | bd->bd_block_size); | ||
181 | |||
182 | out: | ||
183 | kfree(msg->data); | ||
184 | return rv; | ||
185 | } | ||
186 | |||
187 | /* Map deviceid returned by the server to constructed block_device */ | ||
188 | static struct block_device *translate_devid(struct pnfs_layout_hdr *lo, | ||
189 | struct nfs4_deviceid *id) | ||
190 | { | ||
191 | struct block_device *rv = NULL; | ||
192 | struct block_mount_id *mid; | ||
193 | struct pnfs_block_dev *dev; | ||
194 | |||
195 | dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id); | ||
196 | mid = BLK_ID(lo); | ||
197 | spin_lock(&mid->bm_lock); | ||
198 | list_for_each_entry(dev, &mid->bm_devlist, bm_node) { | ||
199 | if (memcmp(id->data, dev->bm_mdevid.data, | ||
200 | NFS4_DEVICEID4_SIZE) == 0) { | ||
201 | rv = dev->bm_mdev; | ||
202 | goto out; | ||
203 | } | ||
204 | } | ||
205 | out: | ||
206 | spin_unlock(&mid->bm_lock); | ||
207 | dprintk("%s returning %p\n", __func__, rv); | ||
208 | return rv; | ||
209 | } | ||
210 | |||
211 | /* Tracks info needed to ensure extents in layout obey constraints of spec */ | ||
212 | struct layout_verification { | ||
213 | u32 mode; /* R or RW */ | ||
214 | u64 start; /* Expected start of next non-COW extent */ | ||
215 | u64 inval; /* Start of INVAL coverage */ | ||
216 | u64 cowread; /* End of COW read coverage */ | ||
217 | }; | ||
218 | |||
219 | /* Verify the extent meets the layout requirements of the pnfs-block draft, | ||
220 | * section 2.3.1. | ||
221 | */ | ||
222 | static int verify_extent(struct pnfs_block_extent *be, | ||
223 | struct layout_verification *lv) | ||
224 | { | ||
225 | if (lv->mode == IOMODE_READ) { | ||
226 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
227 | be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
228 | return -EIO; | ||
229 | if (be->be_f_offset != lv->start) | ||
230 | return -EIO; | ||
231 | lv->start += be->be_length; | ||
232 | return 0; | ||
233 | } | ||
234 | /* lv->mode == IOMODE_RW */ | ||
235 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { | ||
236 | if (be->be_f_offset != lv->start) | ||
237 | return -EIO; | ||
238 | if (lv->cowread > lv->start) | ||
239 | return -EIO; | ||
240 | lv->start += be->be_length; | ||
241 | lv->inval = lv->start; | ||
242 | return 0; | ||
243 | } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
244 | if (be->be_f_offset != lv->start) | ||
245 | return -EIO; | ||
246 | lv->start += be->be_length; | ||
247 | return 0; | ||
248 | } else if (be->be_state == PNFS_BLOCK_READ_DATA) { | ||
249 | if (be->be_f_offset > lv->start) | ||
250 | return -EIO; | ||
251 | if (be->be_f_offset < lv->inval) | ||
252 | return -EIO; | ||
253 | if (be->be_f_offset < lv->cowread) | ||
254 | return -EIO; | ||
255 | /* It looks like you might want to min this with lv->start, | ||
256 | * but you really don't. | ||
257 | */ | ||
258 | lv->inval = lv->inval + be->be_length; | ||
259 | lv->cowread = be->be_f_offset + be->be_length; | ||
260 | return 0; | ||
261 | } else | ||
262 | return -EIO; | ||
263 | } | ||
264 | |||
265 | /* XDR decode pnfs_block_layout4 structure */ | ||
266 | int | ||
267 | nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | ||
268 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) | ||
269 | { | ||
270 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
271 | int i, status = -EIO; | ||
272 | uint32_t count; | ||
273 | struct pnfs_block_extent *be = NULL, *save; | ||
274 | struct xdr_stream stream; | ||
275 | struct xdr_buf buf; | ||
276 | struct page *scratch; | ||
277 | __be32 *p; | ||
278 | struct layout_verification lv = { | ||
279 | .mode = lgr->range.iomode, | ||
280 | .start = lgr->range.offset >> SECTOR_SHIFT, | ||
281 | .inval = lgr->range.offset >> SECTOR_SHIFT, | ||
282 | .cowread = lgr->range.offset >> SECTOR_SHIFT, | ||
283 | }; | ||
284 | LIST_HEAD(extents); | ||
285 | |||
286 | dprintk("---> %s\n", __func__); | ||
287 | |||
288 | scratch = alloc_page(gfp_flags); | ||
289 | if (!scratch) | ||
290 | return -ENOMEM; | ||
291 | |||
292 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); | ||
293 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
294 | |||
295 | p = xdr_inline_decode(&stream, 4); | ||
296 | if (unlikely(!p)) | ||
297 | goto out_err; | ||
298 | |||
299 | count = be32_to_cpup(p++); | ||
300 | |||
301 | dprintk("%s enter, number of extents %i\n", __func__, count); | ||
302 | p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count); | ||
303 | if (unlikely(!p)) | ||
304 | goto out_err; | ||
305 | |||
306 | /* Decode individual extents, putting them in temporary | ||
307 | * staging area until whole layout is decoded to make error | ||
308 | * recovery easier. | ||
309 | */ | ||
310 | for (i = 0; i < count; i++) { | ||
311 | be = bl_alloc_extent(); | ||
312 | if (!be) { | ||
313 | status = -ENOMEM; | ||
314 | goto out_err; | ||
315 | } | ||
316 | memcpy(&be->be_devid, p, NFS4_DEVICEID4_SIZE); | ||
317 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
318 | be->be_mdev = translate_devid(lo, &be->be_devid); | ||
319 | if (!be->be_mdev) | ||
320 | goto out_err; | ||
321 | |||
322 | /* The next three values are read in as bytes, | ||
323 | * but stored as 512-byte sector lengths | ||
324 | */ | ||
325 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | ||
326 | goto out_err; | ||
327 | if (decode_sector_number(&p, &be->be_length) < 0) | ||
328 | goto out_err; | ||
329 | if (decode_sector_number(&p, &be->be_v_offset) < 0) | ||
330 | goto out_err; | ||
331 | be->be_state = be32_to_cpup(p++); | ||
332 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
333 | be->be_inval = &bl->bl_inval; | ||
334 | if (verify_extent(be, &lv)) { | ||
335 | dprintk("%s verify failed\n", __func__); | ||
336 | goto out_err; | ||
337 | } | ||
338 | list_add_tail(&be->be_node, &extents); | ||
339 | } | ||
340 | if (lgr->range.offset + lgr->range.length != | ||
341 | lv.start << SECTOR_SHIFT) { | ||
342 | dprintk("%s Final length mismatch\n", __func__); | ||
343 | be = NULL; | ||
344 | goto out_err; | ||
345 | } | ||
346 | if (lv.start < lv.cowread) { | ||
347 | dprintk("%s Final uncovered COW extent\n", __func__); | ||
348 | be = NULL; | ||
349 | goto out_err; | ||
350 | } | ||
351 | /* Extents decoded properly, now try to merge them in to | ||
352 | * existing layout extents. | ||
353 | */ | ||
354 | spin_lock(&bl->bl_ext_lock); | ||
355 | list_for_each_entry_safe(be, save, &extents, be_node) { | ||
356 | list_del(&be->be_node); | ||
357 | status = bl_add_merge_extent(bl, be); | ||
358 | if (status) { | ||
359 | spin_unlock(&bl->bl_ext_lock); | ||
360 | /* This is a fairly catastrophic error, as the | ||
361 | * entire layout extent lists are now corrupted. | ||
362 | * We should have some way to distinguish this. | ||
363 | */ | ||
364 | be = NULL; | ||
365 | goto out_err; | ||
366 | } | ||
367 | } | ||
368 | spin_unlock(&bl->bl_ext_lock); | ||
369 | status = 0; | ||
370 | out: | ||
371 | __free_page(scratch); | ||
372 | dprintk("%s returns %i\n", __func__, status); | ||
373 | return status; | ||
374 | |||
375 | out_err: | ||
376 | bl_put_extent(be); | ||
377 | while (!list_empty(&extents)) { | ||
378 | be = list_first_entry(&extents, struct pnfs_block_extent, | ||
379 | be_node); | ||
380 | list_del(&be->be_node); | ||
381 | bl_put_extent(be); | ||
382 | } | ||
383 | goto out; | ||
384 | } | ||
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c deleted file mode 100644 index 8999cfddd866..000000000000 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ /dev/null | |||
@@ -1,108 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdm.c | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2007 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Fred Isaman <iisaman@umich.edu> | ||
10 | * Andy Adamson <andros@citi.umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include <linux/genhd.h> /* gendisk - used in a dprintk*/ | ||
34 | #include <linux/sched.h> | ||
35 | #include <linux/hash.h> | ||
36 | |||
37 | #include "blocklayout.h" | ||
38 | |||
39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
40 | |||
41 | static void dev_remove(struct net *net, dev_t dev) | ||
42 | { | ||
43 | struct bl_pipe_msg bl_pipe_msg; | ||
44 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
45 | struct bl_dev_msg bl_umount_request; | ||
46 | struct bl_msg_hdr bl_msg = { | ||
47 | .type = BL_DEVICE_UMOUNT, | ||
48 | .totallen = sizeof(bl_umount_request), | ||
49 | }; | ||
50 | uint8_t *dataptr; | ||
51 | DECLARE_WAITQUEUE(wq, current); | ||
52 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
53 | |||
54 | dprintk("Entering %s\n", __func__); | ||
55 | |||
56 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
57 | memset(msg, 0, sizeof(*msg)); | ||
58 | msg->len = sizeof(bl_msg) + bl_msg.totallen; | ||
59 | msg->data = kzalloc(msg->len, GFP_NOFS); | ||
60 | if (!msg->data) | ||
61 | goto out; | ||
62 | |||
63 | memset(&bl_umount_request, 0, sizeof(bl_umount_request)); | ||
64 | bl_umount_request.major = MAJOR(dev); | ||
65 | bl_umount_request.minor = MINOR(dev); | ||
66 | |||
67 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); | ||
68 | dataptr = (uint8_t *) msg->data; | ||
69 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); | ||
70 | |||
71 | add_wait_queue(&nn->bl_wq, &wq); | ||
72 | if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { | ||
73 | remove_wait_queue(&nn->bl_wq, &wq); | ||
74 | goto out; | ||
75 | } | ||
76 | |||
77 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
78 | schedule(); | ||
79 | __set_current_state(TASK_RUNNING); | ||
80 | remove_wait_queue(&nn->bl_wq, &wq); | ||
81 | |||
82 | out: | ||
83 | kfree(msg->data); | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Release meta device | ||
88 | */ | ||
89 | static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) | ||
90 | { | ||
91 | dprintk("%s Releasing\n", __func__); | ||
92 | nfs4_blkdev_put(bdev->bm_mdev); | ||
93 | dev_remove(bdev->net, bdev->bm_mdev->bd_dev); | ||
94 | } | ||
95 | |||
96 | void bl_free_block_dev(struct pnfs_block_dev *bdev) | ||
97 | { | ||
98 | if (bdev) { | ||
99 | if (bdev->bm_mdev) { | ||
100 | dprintk("%s Removing DM device: %d:%d\n", | ||
101 | __func__, | ||
102 | MAJOR(bdev->bm_mdev->bd_dev), | ||
103 | MINOR(bdev->bm_mdev->bd_dev)); | ||
104 | nfs4_blk_metadev_release(bdev); | ||
105 | } | ||
106 | kfree(bdev); | ||
107 | } | ||
108 | } | ||
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c new file mode 100644 index 000000000000..5aed4f98df41 --- /dev/null +++ b/fs/nfs/blocklayout/dev.c | |||
@@ -0,0 +1,363 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | #include <linux/sunrpc/svc.h> | ||
5 | #include <linux/blkdev.h> | ||
6 | #include <linux/nfs4.h> | ||
7 | #include <linux/nfs_fs.h> | ||
8 | #include <linux/nfs_xdr.h> | ||
9 | |||
10 | #include "blocklayout.h" | ||
11 | |||
12 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
13 | |||
14 | static void | ||
15 | bl_free_device(struct pnfs_block_dev *dev) | ||
16 | { | ||
17 | if (dev->nr_children) { | ||
18 | int i; | ||
19 | |||
20 | for (i = 0; i < dev->nr_children; i++) | ||
21 | bl_free_device(&dev->children[i]); | ||
22 | kfree(dev->children); | ||
23 | } else { | ||
24 | if (dev->bdev) | ||
25 | blkdev_put(dev->bdev, FMODE_READ); | ||
26 | } | ||
27 | } | ||
28 | |||
29 | void | ||
30 | bl_free_deviceid_node(struct nfs4_deviceid_node *d) | ||
31 | { | ||
32 | struct pnfs_block_dev *dev = | ||
33 | container_of(d, struct pnfs_block_dev, node); | ||
34 | |||
35 | bl_free_device(dev); | ||
36 | kfree(dev); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) | ||
41 | { | ||
42 | __be32 *p; | ||
43 | int i; | ||
44 | |||
45 | p = xdr_inline_decode(xdr, 4); | ||
46 | if (!p) | ||
47 | return -EIO; | ||
48 | b->type = be32_to_cpup(p++); | ||
49 | |||
50 | switch (b->type) { | ||
51 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
52 | p = xdr_inline_decode(xdr, 4); | ||
53 | if (!p) | ||
54 | return -EIO; | ||
55 | b->simple.nr_sigs = be32_to_cpup(p++); | ||
56 | if (!b->simple.nr_sigs) { | ||
57 | dprintk("no signature\n"); | ||
58 | return -EIO; | ||
59 | } | ||
60 | |||
61 | b->simple.len = 4 + 4; | ||
62 | for (i = 0; i < b->simple.nr_sigs; i++) { | ||
63 | p = xdr_inline_decode(xdr, 8 + 4); | ||
64 | if (!p) | ||
65 | return -EIO; | ||
66 | p = xdr_decode_hyper(p, &b->simple.sigs[i].offset); | ||
67 | b->simple.sigs[i].sig_len = be32_to_cpup(p++); | ||
68 | |||
69 | p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len); | ||
70 | if (!p) | ||
71 | return -EIO; | ||
72 | memcpy(&b->simple.sigs[i].sig, p, | ||
73 | b->simple.sigs[i].sig_len); | ||
74 | |||
75 | b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len; | ||
76 | } | ||
77 | break; | ||
78 | case PNFS_BLOCK_VOLUME_SLICE: | ||
79 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | ||
80 | if (!p) | ||
81 | return -EIO; | ||
82 | p = xdr_decode_hyper(p, &b->slice.start); | ||
83 | p = xdr_decode_hyper(p, &b->slice.len); | ||
84 | b->slice.volume = be32_to_cpup(p++); | ||
85 | break; | ||
86 | case PNFS_BLOCK_VOLUME_CONCAT: | ||
87 | p = xdr_inline_decode(xdr, 4); | ||
88 | if (!p) | ||
89 | return -EIO; | ||
90 | b->concat.volumes_count = be32_to_cpup(p++); | ||
91 | |||
92 | p = xdr_inline_decode(xdr, b->concat.volumes_count * 4); | ||
93 | if (!p) | ||
94 | return -EIO; | ||
95 | for (i = 0; i < b->concat.volumes_count; i++) | ||
96 | b->concat.volumes[i] = be32_to_cpup(p++); | ||
97 | break; | ||
98 | case PNFS_BLOCK_VOLUME_STRIPE: | ||
99 | p = xdr_inline_decode(xdr, 8 + 4); | ||
100 | if (!p) | ||
101 | return -EIO; | ||
102 | p = xdr_decode_hyper(p, &b->stripe.chunk_size); | ||
103 | b->stripe.volumes_count = be32_to_cpup(p++); | ||
104 | |||
105 | p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4); | ||
106 | if (!p) | ||
107 | return -EIO; | ||
108 | for (i = 0; i < b->stripe.volumes_count; i++) | ||
109 | b->stripe.volumes[i] = be32_to_cpup(p++); | ||
110 | break; | ||
111 | default: | ||
112 | dprintk("unknown volume type!\n"); | ||
113 | return -EIO; | ||
114 | } | ||
115 | |||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, | ||
120 | struct pnfs_block_dev_map *map) | ||
121 | { | ||
122 | map->start = dev->start; | ||
123 | map->len = dev->len; | ||
124 | map->disk_offset = dev->disk_offset; | ||
125 | map->bdev = dev->bdev; | ||
126 | return true; | ||
127 | } | ||
128 | |||
129 | static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset, | ||
130 | struct pnfs_block_dev_map *map) | ||
131 | { | ||
132 | int i; | ||
133 | |||
134 | for (i = 0; i < dev->nr_children; i++) { | ||
135 | struct pnfs_block_dev *child = &dev->children[i]; | ||
136 | |||
137 | if (child->start > offset || | ||
138 | child->start + child->len <= offset) | ||
139 | continue; | ||
140 | |||
141 | child->map(child, offset - child->start, map); | ||
142 | return true; | ||
143 | } | ||
144 | |||
145 | dprintk("%s: ran off loop!\n", __func__); | ||
146 | return false; | ||
147 | } | ||
148 | |||
149 | static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, | ||
150 | struct pnfs_block_dev_map *map) | ||
151 | { | ||
152 | struct pnfs_block_dev *child; | ||
153 | u64 chunk; | ||
154 | u32 chunk_idx; | ||
155 | u64 disk_offset; | ||
156 | |||
157 | chunk = div_u64(offset, dev->chunk_size); | ||
158 | div_u64_rem(chunk, dev->nr_children, &chunk_idx); | ||
159 | |||
160 | if (chunk_idx > dev->nr_children) { | ||
161 | dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", | ||
162 | __func__, chunk_idx, offset, dev->chunk_size); | ||
163 | /* error, should not happen */ | ||
164 | return false; | ||
165 | } | ||
166 | |||
167 | /* truncate offset to the beginning of the stripe */ | ||
168 | offset = chunk * dev->chunk_size; | ||
169 | |||
170 | /* disk offset of the stripe */ | ||
171 | disk_offset = div_u64(offset, dev->nr_children); | ||
172 | |||
173 | child = &dev->children[chunk_idx]; | ||
174 | child->map(child, disk_offset, map); | ||
175 | |||
176 | map->start += offset; | ||
177 | map->disk_offset += disk_offset; | ||
178 | map->len = dev->chunk_size; | ||
179 | return true; | ||
180 | } | ||
181 | |||
182 | static int | ||
183 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | ||
184 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); | ||
185 | |||
186 | |||
187 | static int | ||
188 | bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, | ||
189 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
190 | { | ||
191 | struct pnfs_block_volume *v = &volumes[idx]; | ||
192 | dev_t dev; | ||
193 | |||
194 | dev = bl_resolve_deviceid(server, v, gfp_mask); | ||
195 | if (!dev) | ||
196 | return -EIO; | ||
197 | |||
198 | d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); | ||
199 | if (IS_ERR(d->bdev)) { | ||
200 | printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", | ||
201 | MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev)); | ||
202 | return PTR_ERR(d->bdev); | ||
203 | } | ||
204 | |||
205 | |||
206 | d->len = i_size_read(d->bdev->bd_inode); | ||
207 | d->map = bl_map_simple; | ||
208 | |||
209 | printk(KERN_INFO "pNFS: using block device %s\n", | ||
210 | d->bdev->bd_disk->disk_name); | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | static int | ||
215 | bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, | ||
216 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
217 | { | ||
218 | struct pnfs_block_volume *v = &volumes[idx]; | ||
219 | int ret; | ||
220 | |||
221 | ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask); | ||
222 | if (ret) | ||
223 | return ret; | ||
224 | |||
225 | d->disk_offset = v->slice.start; | ||
226 | d->len = v->slice.len; | ||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | static int | ||
231 | bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, | ||
232 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
233 | { | ||
234 | struct pnfs_block_volume *v = &volumes[idx]; | ||
235 | u64 len = 0; | ||
236 | int ret, i; | ||
237 | |||
238 | d->children = kcalloc(v->concat.volumes_count, | ||
239 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | ||
240 | if (!d->children) | ||
241 | return -ENOMEM; | ||
242 | |||
243 | for (i = 0; i < v->concat.volumes_count; i++) { | ||
244 | ret = bl_parse_deviceid(server, &d->children[i], | ||
245 | volumes, v->concat.volumes[i], gfp_mask); | ||
246 | if (ret) | ||
247 | return ret; | ||
248 | |||
249 | d->nr_children++; | ||
250 | d->children[i].start += len; | ||
251 | len += d->children[i].len; | ||
252 | } | ||
253 | |||
254 | d->len = len; | ||
255 | d->map = bl_map_concat; | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | static int | ||
260 | bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, | ||
261 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
262 | { | ||
263 | struct pnfs_block_volume *v = &volumes[idx]; | ||
264 | u64 len = 0; | ||
265 | int ret, i; | ||
266 | |||
267 | d->children = kcalloc(v->stripe.volumes_count, | ||
268 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | ||
269 | if (!d->children) | ||
270 | return -ENOMEM; | ||
271 | |||
272 | for (i = 0; i < v->stripe.volumes_count; i++) { | ||
273 | ret = bl_parse_deviceid(server, &d->children[i], | ||
274 | volumes, v->stripe.volumes[i], gfp_mask); | ||
275 | if (ret) | ||
276 | return ret; | ||
277 | |||
278 | d->nr_children++; | ||
279 | len += d->children[i].len; | ||
280 | } | ||
281 | |||
282 | d->len = len; | ||
283 | d->chunk_size = v->stripe.chunk_size; | ||
284 | d->map = bl_map_stripe; | ||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | static int | ||
289 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | ||
290 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
291 | { | ||
292 | switch (volumes[idx].type) { | ||
293 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
294 | return bl_parse_simple(server, d, volumes, idx, gfp_mask); | ||
295 | case PNFS_BLOCK_VOLUME_SLICE: | ||
296 | return bl_parse_slice(server, d, volumes, idx, gfp_mask); | ||
297 | case PNFS_BLOCK_VOLUME_CONCAT: | ||
298 | return bl_parse_concat(server, d, volumes, idx, gfp_mask); | ||
299 | case PNFS_BLOCK_VOLUME_STRIPE: | ||
300 | return bl_parse_stripe(server, d, volumes, idx, gfp_mask); | ||
301 | default: | ||
302 | dprintk("unsupported volume type: %d\n", volumes[idx].type); | ||
303 | return -EIO; | ||
304 | } | ||
305 | } | ||
306 | |||
307 | struct nfs4_deviceid_node * | ||
308 | bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||
309 | gfp_t gfp_mask) | ||
310 | { | ||
311 | struct nfs4_deviceid_node *node = NULL; | ||
312 | struct pnfs_block_volume *volumes; | ||
313 | struct pnfs_block_dev *top; | ||
314 | struct xdr_stream xdr; | ||
315 | struct xdr_buf buf; | ||
316 | struct page *scratch; | ||
317 | int nr_volumes, ret, i; | ||
318 | __be32 *p; | ||
319 | |||
320 | scratch = alloc_page(gfp_mask); | ||
321 | if (!scratch) | ||
322 | goto out; | ||
323 | |||
324 | xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); | ||
325 | xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); | ||
326 | |||
327 | p = xdr_inline_decode(&xdr, sizeof(__be32)); | ||
328 | if (!p) | ||
329 | goto out_free_scratch; | ||
330 | nr_volumes = be32_to_cpup(p++); | ||
331 | |||
332 | volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume), | ||
333 | gfp_mask); | ||
334 | if (!volumes) | ||
335 | goto out_free_scratch; | ||
336 | |||
337 | for (i = 0; i < nr_volumes; i++) { | ||
338 | ret = nfs4_block_decode_volume(&xdr, &volumes[i]); | ||
339 | if (ret < 0) | ||
340 | goto out_free_volumes; | ||
341 | } | ||
342 | |||
343 | top = kzalloc(sizeof(*top), gfp_mask); | ||
344 | if (!top) | ||
345 | goto out_free_volumes; | ||
346 | |||
347 | ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); | ||
348 | if (ret) { | ||
349 | bl_free_device(top); | ||
350 | kfree(top); | ||
351 | goto out_free_volumes; | ||
352 | } | ||
353 | |||
354 | node = &top->node; | ||
355 | nfs4_init_deviceid_node(node, server, &pdev->dev_id); | ||
356 | |||
357 | out_free_volumes: | ||
358 | kfree(volumes); | ||
359 | out_free_scratch: | ||
360 | __free_page(scratch); | ||
361 | out: | ||
362 | return node; | ||
363 | } | ||
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c new file mode 100644 index 000000000000..31d0b5e53dfd --- /dev/null +++ b/fs/nfs/blocklayout/extent_tree.c | |||
@@ -0,0 +1,602 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | |||
5 | #include <linux/vmalloc.h> | ||
6 | |||
7 | #include "blocklayout.h" | ||
8 | |||
9 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
10 | |||
11 | static inline struct pnfs_block_extent * | ||
12 | ext_node(struct rb_node *node) | ||
13 | { | ||
14 | return rb_entry(node, struct pnfs_block_extent, be_node); | ||
15 | } | ||
16 | |||
17 | static struct pnfs_block_extent * | ||
18 | ext_tree_first(struct rb_root *root) | ||
19 | { | ||
20 | struct rb_node *node = rb_first(root); | ||
21 | return node ? ext_node(node) : NULL; | ||
22 | } | ||
23 | |||
24 | static struct pnfs_block_extent * | ||
25 | ext_tree_prev(struct pnfs_block_extent *be) | ||
26 | { | ||
27 | struct rb_node *node = rb_prev(&be->be_node); | ||
28 | return node ? ext_node(node) : NULL; | ||
29 | } | ||
30 | |||
31 | static struct pnfs_block_extent * | ||
32 | ext_tree_next(struct pnfs_block_extent *be) | ||
33 | { | ||
34 | struct rb_node *node = rb_next(&be->be_node); | ||
35 | return node ? ext_node(node) : NULL; | ||
36 | } | ||
37 | |||
38 | static inline sector_t | ||
39 | ext_f_end(struct pnfs_block_extent *be) | ||
40 | { | ||
41 | return be->be_f_offset + be->be_length; | ||
42 | } | ||
43 | |||
44 | static struct pnfs_block_extent * | ||
45 | __ext_tree_search(struct rb_root *root, sector_t start) | ||
46 | { | ||
47 | struct rb_node *node = root->rb_node; | ||
48 | struct pnfs_block_extent *be = NULL; | ||
49 | |||
50 | while (node) { | ||
51 | be = ext_node(node); | ||
52 | if (start < be->be_f_offset) | ||
53 | node = node->rb_left; | ||
54 | else if (start >= ext_f_end(be)) | ||
55 | node = node->rb_right; | ||
56 | else | ||
57 | return be; | ||
58 | } | ||
59 | |||
60 | if (be) { | ||
61 | if (start < be->be_f_offset) | ||
62 | return be; | ||
63 | |||
64 | if (start >= ext_f_end(be)) | ||
65 | return ext_tree_next(be); | ||
66 | } | ||
67 | |||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | static bool | ||
72 | ext_can_merge(struct pnfs_block_extent *be1, struct pnfs_block_extent *be2) | ||
73 | { | ||
74 | if (be1->be_state != be2->be_state) | ||
75 | return false; | ||
76 | if (be1->be_device != be2->be_device) | ||
77 | return false; | ||
78 | |||
79 | if (be1->be_f_offset + be1->be_length != be2->be_f_offset) | ||
80 | return false; | ||
81 | |||
82 | if (be1->be_state != PNFS_BLOCK_NONE_DATA && | ||
83 | (be1->be_v_offset + be1->be_length != be2->be_v_offset)) | ||
84 | return false; | ||
85 | |||
86 | if (be1->be_state == PNFS_BLOCK_INVALID_DATA && | ||
87 | be1->be_tag != be2->be_tag) | ||
88 | return false; | ||
89 | |||
90 | return true; | ||
91 | } | ||
92 | |||
93 | static struct pnfs_block_extent * | ||
94 | ext_try_to_merge_left(struct rb_root *root, struct pnfs_block_extent *be) | ||
95 | { | ||
96 | struct pnfs_block_extent *left = ext_tree_prev(be); | ||
97 | |||
98 | if (left && ext_can_merge(left, be)) { | ||
99 | left->be_length += be->be_length; | ||
100 | rb_erase(&be->be_node, root); | ||
101 | nfs4_put_deviceid_node(be->be_device); | ||
102 | kfree(be); | ||
103 | return left; | ||
104 | } | ||
105 | |||
106 | return be; | ||
107 | } | ||
108 | |||
109 | static struct pnfs_block_extent * | ||
110 | ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be) | ||
111 | { | ||
112 | struct pnfs_block_extent *right = ext_tree_next(be); | ||
113 | |||
114 | if (right && ext_can_merge(be, right)) { | ||
115 | be->be_length += right->be_length; | ||
116 | rb_erase(&right->be_node, root); | ||
117 | nfs4_put_deviceid_node(right->be_device); | ||
118 | kfree(right); | ||
119 | } | ||
120 | |||
121 | return be; | ||
122 | } | ||
123 | |||
124 | static void | ||
125 | __ext_tree_insert(struct rb_root *root, | ||
126 | struct pnfs_block_extent *new, bool merge_ok) | ||
127 | { | ||
128 | struct rb_node **p = &root->rb_node, *parent = NULL; | ||
129 | struct pnfs_block_extent *be; | ||
130 | |||
131 | while (*p) { | ||
132 | parent = *p; | ||
133 | be = ext_node(parent); | ||
134 | |||
135 | if (new->be_f_offset < be->be_f_offset) { | ||
136 | if (merge_ok && ext_can_merge(new, be)) { | ||
137 | be->be_f_offset = new->be_f_offset; | ||
138 | if (be->be_state != PNFS_BLOCK_NONE_DATA) | ||
139 | be->be_v_offset = new->be_v_offset; | ||
140 | be->be_length += new->be_length; | ||
141 | be = ext_try_to_merge_left(root, be); | ||
142 | goto free_new; | ||
143 | } | ||
144 | p = &(*p)->rb_left; | ||
145 | } else if (new->be_f_offset >= ext_f_end(be)) { | ||
146 | if (merge_ok && ext_can_merge(be, new)) { | ||
147 | be->be_length += new->be_length; | ||
148 | be = ext_try_to_merge_right(root, be); | ||
149 | goto free_new; | ||
150 | } | ||
151 | p = &(*p)->rb_right; | ||
152 | } else { | ||
153 | BUG(); | ||
154 | } | ||
155 | } | ||
156 | |||
157 | rb_link_node(&new->be_node, parent, p); | ||
158 | rb_insert_color(&new->be_node, root); | ||
159 | return; | ||
160 | free_new: | ||
161 | nfs4_put_deviceid_node(new->be_device); | ||
162 | kfree(new); | ||
163 | } | ||
164 | |||
165 | static int | ||
166 | __ext_tree_remove(struct rb_root *root, sector_t start, sector_t end) | ||
167 | { | ||
168 | struct pnfs_block_extent *be; | ||
169 | sector_t len1 = 0, len2 = 0; | ||
170 | sector_t orig_v_offset; | ||
171 | sector_t orig_len; | ||
172 | |||
173 | be = __ext_tree_search(root, start); | ||
174 | if (!be) | ||
175 | return 0; | ||
176 | if (be->be_f_offset >= end) | ||
177 | return 0; | ||
178 | |||
179 | orig_v_offset = be->be_v_offset; | ||
180 | orig_len = be->be_length; | ||
181 | |||
182 | if (start > be->be_f_offset) | ||
183 | len1 = start - be->be_f_offset; | ||
184 | if (ext_f_end(be) > end) | ||
185 | len2 = ext_f_end(be) - end; | ||
186 | |||
187 | if (len2 > 0) { | ||
188 | if (len1 > 0) { | ||
189 | struct pnfs_block_extent *new; | ||
190 | |||
191 | new = kzalloc(sizeof(*new), GFP_ATOMIC); | ||
192 | if (!new) | ||
193 | return -ENOMEM; | ||
194 | |||
195 | be->be_length = len1; | ||
196 | |||
197 | new->be_f_offset = end; | ||
198 | if (be->be_state != PNFS_BLOCK_NONE_DATA) { | ||
199 | new->be_v_offset = | ||
200 | orig_v_offset + orig_len - len2; | ||
201 | } | ||
202 | new->be_length = len2; | ||
203 | new->be_state = be->be_state; | ||
204 | new->be_tag = be->be_tag; | ||
205 | new->be_device = nfs4_get_deviceid(be->be_device); | ||
206 | |||
207 | __ext_tree_insert(root, new, true); | ||
208 | } else { | ||
209 | be->be_f_offset = end; | ||
210 | if (be->be_state != PNFS_BLOCK_NONE_DATA) { | ||
211 | be->be_v_offset = | ||
212 | orig_v_offset + orig_len - len2; | ||
213 | } | ||
214 | be->be_length = len2; | ||
215 | } | ||
216 | } else { | ||
217 | if (len1 > 0) { | ||
218 | be->be_length = len1; | ||
219 | be = ext_tree_next(be); | ||
220 | } | ||
221 | |||
222 | while (be && ext_f_end(be) <= end) { | ||
223 | struct pnfs_block_extent *next = ext_tree_next(be); | ||
224 | |||
225 | rb_erase(&be->be_node, root); | ||
226 | nfs4_put_deviceid_node(be->be_device); | ||
227 | kfree(be); | ||
228 | be = next; | ||
229 | } | ||
230 | |||
231 | if (be && be->be_f_offset < end) { | ||
232 | len1 = ext_f_end(be) - end; | ||
233 | be->be_f_offset = end; | ||
234 | if (be->be_state != PNFS_BLOCK_NONE_DATA) | ||
235 | be->be_v_offset += be->be_length - len1; | ||
236 | be->be_length = len1; | ||
237 | } | ||
238 | } | ||
239 | |||
240 | return 0; | ||
241 | } | ||
242 | |||
243 | int | ||
244 | ext_tree_insert(struct pnfs_block_layout *bl, struct pnfs_block_extent *new) | ||
245 | { | ||
246 | struct pnfs_block_extent *be; | ||
247 | struct rb_root *root; | ||
248 | int err = 0; | ||
249 | |||
250 | switch (new->be_state) { | ||
251 | case PNFS_BLOCK_READWRITE_DATA: | ||
252 | case PNFS_BLOCK_INVALID_DATA: | ||
253 | root = &bl->bl_ext_rw; | ||
254 | break; | ||
255 | case PNFS_BLOCK_READ_DATA: | ||
256 | case PNFS_BLOCK_NONE_DATA: | ||
257 | root = &bl->bl_ext_ro; | ||
258 | break; | ||
259 | default: | ||
260 | dprintk("invalid extent type\n"); | ||
261 | return -EINVAL; | ||
262 | } | ||
263 | |||
264 | spin_lock(&bl->bl_ext_lock); | ||
265 | retry: | ||
266 | be = __ext_tree_search(root, new->be_f_offset); | ||
267 | if (!be || be->be_f_offset >= ext_f_end(new)) { | ||
268 | __ext_tree_insert(root, new, true); | ||
269 | } else if (new->be_f_offset >= be->be_f_offset) { | ||
270 | if (ext_f_end(new) <= ext_f_end(be)) { | ||
271 | nfs4_put_deviceid_node(new->be_device); | ||
272 | kfree(new); | ||
273 | } else { | ||
274 | sector_t new_len = ext_f_end(new) - ext_f_end(be); | ||
275 | sector_t diff = new->be_length - new_len; | ||
276 | |||
277 | new->be_f_offset += diff; | ||
278 | new->be_v_offset += diff; | ||
279 | new->be_length = new_len; | ||
280 | goto retry; | ||
281 | } | ||
282 | } else if (ext_f_end(new) <= ext_f_end(be)) { | ||
283 | new->be_length = be->be_f_offset - new->be_f_offset; | ||
284 | __ext_tree_insert(root, new, true); | ||
285 | } else { | ||
286 | struct pnfs_block_extent *split; | ||
287 | sector_t new_len = ext_f_end(new) - ext_f_end(be); | ||
288 | sector_t diff = new->be_length - new_len; | ||
289 | |||
290 | split = kmemdup(new, sizeof(*new), GFP_ATOMIC); | ||
291 | if (!split) { | ||
292 | err = -EINVAL; | ||
293 | goto out; | ||
294 | } | ||
295 | |||
296 | split->be_length = be->be_f_offset - split->be_f_offset; | ||
297 | split->be_device = nfs4_get_deviceid(new->be_device); | ||
298 | __ext_tree_insert(root, split, true); | ||
299 | |||
300 | new->be_f_offset += diff; | ||
301 | new->be_v_offset += diff; | ||
302 | new->be_length = new_len; | ||
303 | goto retry; | ||
304 | } | ||
305 | out: | ||
306 | spin_unlock(&bl->bl_ext_lock); | ||
307 | return err; | ||
308 | } | ||
309 | |||
310 | static bool | ||
311 | __ext_tree_lookup(struct rb_root *root, sector_t isect, | ||
312 | struct pnfs_block_extent *ret) | ||
313 | { | ||
314 | struct rb_node *node; | ||
315 | struct pnfs_block_extent *be; | ||
316 | |||
317 | node = root->rb_node; | ||
318 | while (node) { | ||
319 | be = ext_node(node); | ||
320 | if (isect < be->be_f_offset) | ||
321 | node = node->rb_left; | ||
322 | else if (isect >= ext_f_end(be)) | ||
323 | node = node->rb_right; | ||
324 | else { | ||
325 | *ret = *be; | ||
326 | return true; | ||
327 | } | ||
328 | } | ||
329 | |||
330 | return false; | ||
331 | } | ||
332 | |||
333 | bool | ||
334 | ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, | ||
335 | struct pnfs_block_extent *ret, bool rw) | ||
336 | { | ||
337 | bool found = false; | ||
338 | |||
339 | spin_lock(&bl->bl_ext_lock); | ||
340 | if (!rw) | ||
341 | found = __ext_tree_lookup(&bl->bl_ext_ro, isect, ret); | ||
342 | if (!found) | ||
343 | found = __ext_tree_lookup(&bl->bl_ext_rw, isect, ret); | ||
344 | spin_unlock(&bl->bl_ext_lock); | ||
345 | |||
346 | return found; | ||
347 | } | ||
348 | |||
349 | int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, | ||
350 | sector_t start, sector_t end) | ||
351 | { | ||
352 | int err, err2; | ||
353 | |||
354 | spin_lock(&bl->bl_ext_lock); | ||
355 | err = __ext_tree_remove(&bl->bl_ext_ro, start, end); | ||
356 | if (rw) { | ||
357 | err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end); | ||
358 | if (!err) | ||
359 | err = err2; | ||
360 | } | ||
361 | spin_unlock(&bl->bl_ext_lock); | ||
362 | |||
363 | return err; | ||
364 | } | ||
365 | |||
366 | static int | ||
367 | ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be, | ||
368 | sector_t split) | ||
369 | { | ||
370 | struct pnfs_block_extent *new; | ||
371 | sector_t orig_len = be->be_length; | ||
372 | |||
373 | new = kzalloc(sizeof(*new), GFP_ATOMIC); | ||
374 | if (!new) | ||
375 | return -ENOMEM; | ||
376 | |||
377 | be->be_length = split - be->be_f_offset; | ||
378 | |||
379 | new->be_f_offset = split; | ||
380 | if (be->be_state != PNFS_BLOCK_NONE_DATA) | ||
381 | new->be_v_offset = be->be_v_offset + be->be_length; | ||
382 | new->be_length = orig_len - be->be_length; | ||
383 | new->be_state = be->be_state; | ||
384 | new->be_tag = be->be_tag; | ||
385 | new->be_device = nfs4_get_deviceid(be->be_device); | ||
386 | |||
387 | __ext_tree_insert(root, new, false); | ||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | int | ||
392 | ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, | ||
393 | sector_t len) | ||
394 | { | ||
395 | struct rb_root *root = &bl->bl_ext_rw; | ||
396 | sector_t end = start + len; | ||
397 | struct pnfs_block_extent *be; | ||
398 | int err = 0; | ||
399 | |||
400 | spin_lock(&bl->bl_ext_lock); | ||
401 | /* | ||
402 | * First remove all COW extents or holes from written to range. | ||
403 | */ | ||
404 | err = __ext_tree_remove(&bl->bl_ext_ro, start, end); | ||
405 | if (err) | ||
406 | goto out; | ||
407 | |||
408 | /* | ||
409 | * Then mark all invalid extents in the range as written to. | ||
410 | */ | ||
411 | for (be = __ext_tree_search(root, start); be; be = ext_tree_next(be)) { | ||
412 | if (be->be_f_offset >= end) | ||
413 | break; | ||
414 | |||
415 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || be->be_tag) | ||
416 | continue; | ||
417 | |||
418 | if (be->be_f_offset < start) { | ||
419 | struct pnfs_block_extent *left = ext_tree_prev(be); | ||
420 | |||
421 | if (left && ext_can_merge(left, be)) { | ||
422 | sector_t diff = start - be->be_f_offset; | ||
423 | |||
424 | left->be_length += diff; | ||
425 | |||
426 | be->be_f_offset += diff; | ||
427 | be->be_v_offset += diff; | ||
428 | be->be_length -= diff; | ||
429 | } else { | ||
430 | err = ext_tree_split(root, be, start); | ||
431 | if (err) | ||
432 | goto out; | ||
433 | } | ||
434 | } | ||
435 | |||
436 | if (ext_f_end(be) > end) { | ||
437 | struct pnfs_block_extent *right = ext_tree_next(be); | ||
438 | |||
439 | if (right && ext_can_merge(be, right)) { | ||
440 | sector_t diff = end - be->be_f_offset; | ||
441 | |||
442 | be->be_length -= diff; | ||
443 | |||
444 | right->be_f_offset -= diff; | ||
445 | right->be_v_offset -= diff; | ||
446 | right->be_length += diff; | ||
447 | } else { | ||
448 | err = ext_tree_split(root, be, end); | ||
449 | if (err) | ||
450 | goto out; | ||
451 | } | ||
452 | } | ||
453 | |||
454 | if (be->be_f_offset >= start && ext_f_end(be) <= end) { | ||
455 | be->be_tag = EXTENT_WRITTEN; | ||
456 | be = ext_try_to_merge_left(root, be); | ||
457 | be = ext_try_to_merge_right(root, be); | ||
458 | } | ||
459 | } | ||
460 | out: | ||
461 | spin_unlock(&bl->bl_ext_lock); | ||
462 | return err; | ||
463 | } | ||
464 | |||
465 | static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg, | ||
466 | size_t buffer_size) | ||
467 | { | ||
468 | if (arg->layoutupdate_pages != &arg->layoutupdate_page) { | ||
469 | int nr_pages = DIV_ROUND_UP(buffer_size, PAGE_SIZE), i; | ||
470 | |||
471 | for (i = 0; i < nr_pages; i++) | ||
472 | put_page(arg->layoutupdate_pages[i]); | ||
473 | kfree(arg->layoutupdate_pages); | ||
474 | } else { | ||
475 | put_page(arg->layoutupdate_page); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, | ||
480 | size_t buffer_size, size_t *count) | ||
481 | { | ||
482 | struct pnfs_block_extent *be; | ||
483 | int ret = 0; | ||
484 | |||
485 | spin_lock(&bl->bl_ext_lock); | ||
486 | for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { | ||
487 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || | ||
488 | be->be_tag != EXTENT_WRITTEN) | ||
489 | continue; | ||
490 | |||
491 | (*count)++; | ||
492 | if (*count * BL_EXTENT_SIZE > buffer_size) { | ||
493 | /* keep counting.. */ | ||
494 | ret = -ENOSPC; | ||
495 | continue; | ||
496 | } | ||
497 | |||
498 | p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, | ||
499 | NFS4_DEVICEID4_SIZE); | ||
500 | p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT); | ||
501 | p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); | ||
502 | p = xdr_encode_hyper(p, 0LL); | ||
503 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | ||
504 | |||
505 | be->be_tag = EXTENT_COMMITTING; | ||
506 | } | ||
507 | spin_unlock(&bl->bl_ext_lock); | ||
508 | |||
509 | return ret; | ||
510 | } | ||
511 | |||
512 | int | ||
513 | ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) | ||
514 | { | ||
515 | struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout); | ||
516 | size_t count = 0, buffer_size = PAGE_SIZE; | ||
517 | __be32 *start_p; | ||
518 | int ret; | ||
519 | |||
520 | dprintk("%s enter\n", __func__); | ||
521 | |||
522 | arg->layoutupdate_page = alloc_page(GFP_NOFS); | ||
523 | if (!arg->layoutupdate_page) | ||
524 | return -ENOMEM; | ||
525 | start_p = page_address(arg->layoutupdate_page); | ||
526 | arg->layoutupdate_pages = &arg->layoutupdate_page; | ||
527 | |||
528 | retry: | ||
529 | ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count); | ||
530 | if (unlikely(ret)) { | ||
531 | ext_tree_free_commitdata(arg, buffer_size); | ||
532 | |||
533 | buffer_size = sizeof(__be32) + BL_EXTENT_SIZE * count; | ||
534 | count = 0; | ||
535 | |||
536 | arg->layoutupdate_pages = | ||
537 | kcalloc(DIV_ROUND_UP(buffer_size, PAGE_SIZE), | ||
538 | sizeof(struct page *), GFP_NOFS); | ||
539 | if (!arg->layoutupdate_pages) | ||
540 | return -ENOMEM; | ||
541 | |||
542 | start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL); | ||
543 | if (!start_p) { | ||
544 | kfree(arg->layoutupdate_pages); | ||
545 | return -ENOMEM; | ||
546 | } | ||
547 | |||
548 | goto retry; | ||
549 | } | ||
550 | |||
551 | *start_p = cpu_to_be32(count); | ||
552 | arg->layoutupdate_len = sizeof(__be32) + BL_EXTENT_SIZE * count; | ||
553 | |||
554 | if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { | ||
555 | __be32 *p = start_p; | ||
556 | int i = 0; | ||
557 | |||
558 | for (p = start_p; | ||
559 | p < start_p + arg->layoutupdate_len; | ||
560 | p += PAGE_SIZE) { | ||
561 | arg->layoutupdate_pages[i++] = vmalloc_to_page(p); | ||
562 | } | ||
563 | } | ||
564 | |||
565 | dprintk("%s found %zu ranges\n", __func__, count); | ||
566 | return 0; | ||
567 | } | ||
568 | |||
569 | void | ||
570 | ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status) | ||
571 | { | ||
572 | struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout); | ||
573 | struct rb_root *root = &bl->bl_ext_rw; | ||
574 | struct pnfs_block_extent *be; | ||
575 | |||
576 | dprintk("%s status %d\n", __func__, status); | ||
577 | |||
578 | ext_tree_free_commitdata(arg, arg->layoutupdate_len); | ||
579 | |||
580 | spin_lock(&bl->bl_ext_lock); | ||
581 | for (be = ext_tree_first(root); be; be = ext_tree_next(be)) { | ||
582 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || | ||
583 | be->be_tag != EXTENT_COMMITTING) | ||
584 | continue; | ||
585 | |||
586 | if (status) { | ||
587 | /* | ||
588 | * Mark as written and try again. | ||
589 | * | ||
590 | * XXX: some real error handling here wouldn't hurt.. | ||
591 | */ | ||
592 | be->be_tag = EXTENT_WRITTEN; | ||
593 | } else { | ||
594 | be->be_state = PNFS_BLOCK_READWRITE_DATA; | ||
595 | be->be_tag = 0; | ||
596 | } | ||
597 | |||
598 | be = ext_try_to_merge_left(root, be); | ||
599 | be = ext_try_to_merge_right(root, be); | ||
600 | } | ||
601 | spin_unlock(&bl->bl_ext_lock); | ||
602 | } | ||
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c deleted file mode 100644 index 4d0161442565..000000000000 --- a/fs/nfs/blocklayout/extents.c +++ /dev/null | |||
@@ -1,908 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayout.h | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include "blocklayout.h" | ||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
35 | |||
36 | /* Bit numbers */ | ||
37 | #define EXTENT_INITIALIZED 0 | ||
38 | #define EXTENT_WRITTEN 1 | ||
39 | #define EXTENT_IN_COMMIT 2 | ||
40 | #define INTERNAL_EXISTS MY_MAX_TAGS | ||
41 | #define INTERNAL_MASK ((1 << INTERNAL_EXISTS) - 1) | ||
42 | |||
43 | /* Returns largest t<=s s.t. t%base==0 */ | ||
44 | static inline sector_t normalize(sector_t s, int base) | ||
45 | { | ||
46 | sector_t tmp = s; /* Since do_div modifies its argument */ | ||
47 | return s - sector_div(tmp, base); | ||
48 | } | ||
49 | |||
50 | static inline sector_t normalize_up(sector_t s, int base) | ||
51 | { | ||
52 | return normalize(s + base - 1, base); | ||
53 | } | ||
54 | |||
55 | /* Complete stub using list while determine API wanted */ | ||
56 | |||
57 | /* Returns tags, or negative */ | ||
58 | static int32_t _find_entry(struct my_tree *tree, u64 s) | ||
59 | { | ||
60 | struct pnfs_inval_tracking *pos; | ||
61 | |||
62 | dprintk("%s(%llu) enter\n", __func__, s); | ||
63 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
64 | if (pos->it_sector > s) | ||
65 | continue; | ||
66 | else if (pos->it_sector == s) | ||
67 | return pos->it_tags & INTERNAL_MASK; | ||
68 | else | ||
69 | break; | ||
70 | } | ||
71 | return -ENOENT; | ||
72 | } | ||
73 | |||
74 | static inline | ||
75 | int _has_tag(struct my_tree *tree, u64 s, int32_t tag) | ||
76 | { | ||
77 | int32_t tags; | ||
78 | |||
79 | dprintk("%s(%llu, %i) enter\n", __func__, s, tag); | ||
80 | s = normalize(s, tree->mtt_step_size); | ||
81 | tags = _find_entry(tree, s); | ||
82 | if ((tags < 0) || !(tags & (1 << tag))) | ||
83 | return 0; | ||
84 | else | ||
85 | return 1; | ||
86 | } | ||
87 | |||
88 | /* Creates entry with tag, or if entry already exists, unions tag to it. | ||
89 | * If storage is not NULL, newly created entry will use it. | ||
90 | * Returns number of entries added, or negative on error. | ||
91 | */ | ||
92 | static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, | ||
93 | struct pnfs_inval_tracking *storage) | ||
94 | { | ||
95 | int found = 0; | ||
96 | struct pnfs_inval_tracking *pos; | ||
97 | |||
98 | dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage); | ||
99 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
100 | if (pos->it_sector > s) | ||
101 | continue; | ||
102 | else if (pos->it_sector == s) { | ||
103 | found = 1; | ||
104 | break; | ||
105 | } else | ||
106 | break; | ||
107 | } | ||
108 | if (found) { | ||
109 | pos->it_tags |= (1 << tag); | ||
110 | return 0; | ||
111 | } else { | ||
112 | struct pnfs_inval_tracking *new; | ||
113 | new = storage; | ||
114 | new->it_sector = s; | ||
115 | new->it_tags = (1 << tag); | ||
116 | list_add(&new->it_link, &pos->it_link); | ||
117 | return 1; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | /* XXXX Really want option to not create */ | ||
122 | /* Over range, unions tag with existing entries, else creates entry with tag */ | ||
123 | static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) | ||
124 | { | ||
125 | u64 i; | ||
126 | |||
127 | dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length); | ||
128 | for (i = normalize(s, tree->mtt_step_size); i < s + length; | ||
129 | i += tree->mtt_step_size) | ||
130 | if (_add_entry(tree, i, tag, NULL)) | ||
131 | return -ENOMEM; | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* Ensure that future operations on given range of tree will not malloc */ | ||
136 | static int _preload_range(struct pnfs_inval_markings *marks, | ||
137 | u64 offset, u64 length) | ||
138 | { | ||
139 | u64 start, end, s; | ||
140 | int count, i, used = 0, status = -ENOMEM; | ||
141 | struct pnfs_inval_tracking **storage; | ||
142 | struct my_tree *tree = &marks->im_tree; | ||
143 | |||
144 | dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); | ||
145 | start = normalize(offset, tree->mtt_step_size); | ||
146 | end = normalize_up(offset + length, tree->mtt_step_size); | ||
147 | count = (int)(end - start) / (int)tree->mtt_step_size; | ||
148 | |||
149 | /* Pre-malloc what memory we might need */ | ||
150 | storage = kcalloc(count, sizeof(*storage), GFP_NOFS); | ||
151 | if (!storage) | ||
152 | return -ENOMEM; | ||
153 | for (i = 0; i < count; i++) { | ||
154 | storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking), | ||
155 | GFP_NOFS); | ||
156 | if (!storage[i]) | ||
157 | goto out_cleanup; | ||
158 | } | ||
159 | |||
160 | spin_lock_bh(&marks->im_lock); | ||
161 | for (s = start; s < end; s += tree->mtt_step_size) | ||
162 | used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); | ||
163 | spin_unlock_bh(&marks->im_lock); | ||
164 | |||
165 | status = 0; | ||
166 | |||
167 | out_cleanup: | ||
168 | for (i = used; i < count; i++) { | ||
169 | if (!storage[i]) | ||
170 | break; | ||
171 | kfree(storage[i]); | ||
172 | } | ||
173 | kfree(storage); | ||
174 | return status; | ||
175 | } | ||
176 | |||
177 | /* We are relying on page lock to serialize this */ | ||
178 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) | ||
179 | { | ||
180 | int rv; | ||
181 | |||
182 | spin_lock_bh(&marks->im_lock); | ||
183 | rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); | ||
184 | spin_unlock_bh(&marks->im_lock); | ||
185 | return rv; | ||
186 | } | ||
187 | |||
188 | /* Assume start, end already sector aligned */ | ||
189 | static int | ||
190 | _range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag) | ||
191 | { | ||
192 | struct pnfs_inval_tracking *pos; | ||
193 | u64 expect = 0; | ||
194 | |||
195 | dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); | ||
196 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
197 | if (pos->it_sector >= end) | ||
198 | continue; | ||
199 | if (!expect) { | ||
200 | if ((pos->it_sector == end - tree->mtt_step_size) && | ||
201 | (pos->it_tags & (1 << tag))) { | ||
202 | expect = pos->it_sector - tree->mtt_step_size; | ||
203 | if (pos->it_sector < tree->mtt_step_size || expect < start) | ||
204 | return 1; | ||
205 | continue; | ||
206 | } else { | ||
207 | return 0; | ||
208 | } | ||
209 | } | ||
210 | if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) | ||
211 | return 0; | ||
212 | expect -= tree->mtt_step_size; | ||
213 | if (expect < start) | ||
214 | return 1; | ||
215 | } | ||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static int is_range_written(struct pnfs_inval_markings *marks, | ||
220 | sector_t start, sector_t end) | ||
221 | { | ||
222 | int rv; | ||
223 | |||
224 | spin_lock_bh(&marks->im_lock); | ||
225 | rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); | ||
226 | spin_unlock_bh(&marks->im_lock); | ||
227 | return rv; | ||
228 | } | ||
229 | |||
230 | /* Marks sectors in [offest, offset_length) as having been initialized. | ||
231 | * All lengths are step-aligned, where step is min(pagesize, blocksize). | ||
232 | * Currently assumes offset is page-aligned | ||
233 | */ | ||
234 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | ||
235 | sector_t offset, sector_t length) | ||
236 | { | ||
237 | sector_t start, end; | ||
238 | |||
239 | dprintk("%s(offset=%llu,len=%llu) enter\n", | ||
240 | __func__, (u64)offset, (u64)length); | ||
241 | |||
242 | start = normalize(offset, marks->im_block_size); | ||
243 | end = normalize_up(offset + length, marks->im_block_size); | ||
244 | if (_preload_range(marks, start, end - start)) | ||
245 | goto outerr; | ||
246 | |||
247 | spin_lock_bh(&marks->im_lock); | ||
248 | if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) | ||
249 | goto out_unlock; | ||
250 | spin_unlock_bh(&marks->im_lock); | ||
251 | |||
252 | return 0; | ||
253 | |||
254 | out_unlock: | ||
255 | spin_unlock_bh(&marks->im_lock); | ||
256 | outerr: | ||
257 | return -ENOMEM; | ||
258 | } | ||
259 | |||
260 | /* Marks sectors in [offest, offset+length) as having been written to disk. | ||
261 | * All lengths should be block aligned. | ||
262 | */ | ||
263 | static int mark_written_sectors(struct pnfs_inval_markings *marks, | ||
264 | sector_t offset, sector_t length) | ||
265 | { | ||
266 | int status; | ||
267 | |||
268 | dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, | ||
269 | (u64)offset, (u64)length); | ||
270 | spin_lock_bh(&marks->im_lock); | ||
271 | status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); | ||
272 | spin_unlock_bh(&marks->im_lock); | ||
273 | return status; | ||
274 | } | ||
275 | |||
276 | static void print_short_extent(struct pnfs_block_short_extent *be) | ||
277 | { | ||
278 | dprintk("PRINT SHORT EXTENT extent %p\n", be); | ||
279 | if (be) { | ||
280 | dprintk(" be_f_offset %llu\n", (u64)be->bse_f_offset); | ||
281 | dprintk(" be_length %llu\n", (u64)be->bse_length); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | static void print_clist(struct list_head *list, unsigned int count) | ||
286 | { | ||
287 | struct pnfs_block_short_extent *be; | ||
288 | unsigned int i = 0; | ||
289 | |||
290 | ifdebug(FACILITY) { | ||
291 | printk(KERN_DEBUG "****************\n"); | ||
292 | printk(KERN_DEBUG "Extent list looks like:\n"); | ||
293 | list_for_each_entry(be, list, bse_node) { | ||
294 | i++; | ||
295 | print_short_extent(be); | ||
296 | } | ||
297 | if (i != count) | ||
298 | printk(KERN_DEBUG "\n\nExpected %u entries\n\n\n", count); | ||
299 | printk(KERN_DEBUG "****************\n"); | ||
300 | } | ||
301 | } | ||
302 | |||
303 | /* Note: In theory, we should do more checking that devid's match between | ||
304 | * old and new, but if they don't, the lists are too corrupt to salvage anyway. | ||
305 | */ | ||
306 | /* Note this is very similar to bl_add_merge_extent */ | ||
307 | static void add_to_commitlist(struct pnfs_block_layout *bl, | ||
308 | struct pnfs_block_short_extent *new) | ||
309 | { | ||
310 | struct list_head *clist = &bl->bl_commit; | ||
311 | struct pnfs_block_short_extent *old, *save; | ||
312 | sector_t end = new->bse_f_offset + new->bse_length; | ||
313 | |||
314 | dprintk("%s enter\n", __func__); | ||
315 | print_short_extent(new); | ||
316 | print_clist(clist, bl->bl_count); | ||
317 | bl->bl_count++; | ||
318 | /* Scan for proper place to insert, extending new to the left | ||
319 | * as much as possible. | ||
320 | */ | ||
321 | list_for_each_entry_safe(old, save, clist, bse_node) { | ||
322 | if (new->bse_f_offset < old->bse_f_offset) | ||
323 | break; | ||
324 | if (end <= old->bse_f_offset + old->bse_length) { | ||
325 | /* Range is already in list */ | ||
326 | bl->bl_count--; | ||
327 | kfree(new); | ||
328 | return; | ||
329 | } else if (new->bse_f_offset <= | ||
330 | old->bse_f_offset + old->bse_length) { | ||
331 | /* new overlaps or abuts existing be */ | ||
332 | if (new->bse_mdev == old->bse_mdev) { | ||
333 | /* extend new to fully replace old */ | ||
334 | new->bse_length += new->bse_f_offset - | ||
335 | old->bse_f_offset; | ||
336 | new->bse_f_offset = old->bse_f_offset; | ||
337 | list_del(&old->bse_node); | ||
338 | bl->bl_count--; | ||
339 | kfree(old); | ||
340 | } | ||
341 | } | ||
342 | } | ||
343 | /* Note that if we never hit the above break, old will not point to a | ||
344 | * valid extent. However, in that case &old->bse_node==list. | ||
345 | */ | ||
346 | list_add_tail(&new->bse_node, &old->bse_node); | ||
347 | /* Scan forward for overlaps. If we find any, extend new and | ||
348 | * remove the overlapped extent. | ||
349 | */ | ||
350 | old = list_prepare_entry(new, clist, bse_node); | ||
351 | list_for_each_entry_safe_continue(old, save, clist, bse_node) { | ||
352 | if (end < old->bse_f_offset) | ||
353 | break; | ||
354 | /* new overlaps or abuts old */ | ||
355 | if (new->bse_mdev == old->bse_mdev) { | ||
356 | if (end < old->bse_f_offset + old->bse_length) { | ||
357 | /* extend new to fully cover old */ | ||
358 | end = old->bse_f_offset + old->bse_length; | ||
359 | new->bse_length = end - new->bse_f_offset; | ||
360 | } | ||
361 | list_del(&old->bse_node); | ||
362 | bl->bl_count--; | ||
363 | kfree(old); | ||
364 | } | ||
365 | } | ||
366 | dprintk("%s: after merging\n", __func__); | ||
367 | print_clist(clist, bl->bl_count); | ||
368 | } | ||
369 | |||
370 | /* Note the range described by offset, length is guaranteed to be contained | ||
371 | * within be. | ||
372 | * new will be freed, either by this function or add_to_commitlist if they | ||
373 | * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist. | ||
374 | */ | ||
375 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
376 | sector_t offset, sector_t length, | ||
377 | struct pnfs_block_short_extent *new) | ||
378 | { | ||
379 | sector_t new_end, end = offset + length; | ||
380 | struct pnfs_block_layout *bl = container_of(be->be_inval, | ||
381 | struct pnfs_block_layout, | ||
382 | bl_inval); | ||
383 | |||
384 | mark_written_sectors(be->be_inval, offset, length); | ||
385 | /* We want to add the range to commit list, but it must be | ||
386 | * block-normalized, and verified that the normalized range has | ||
387 | * been entirely written to disk. | ||
388 | */ | ||
389 | new->bse_f_offset = offset; | ||
390 | offset = normalize(offset, bl->bl_blocksize); | ||
391 | if (offset < new->bse_f_offset) { | ||
392 | if (is_range_written(be->be_inval, offset, new->bse_f_offset)) | ||
393 | new->bse_f_offset = offset; | ||
394 | else | ||
395 | new->bse_f_offset = offset + bl->bl_blocksize; | ||
396 | } | ||
397 | new_end = normalize_up(end, bl->bl_blocksize); | ||
398 | if (end < new_end) { | ||
399 | if (is_range_written(be->be_inval, end, new_end)) | ||
400 | end = new_end; | ||
401 | else | ||
402 | end = new_end - bl->bl_blocksize; | ||
403 | } | ||
404 | if (end <= new->bse_f_offset) { | ||
405 | kfree(new); | ||
406 | return 0; | ||
407 | } | ||
408 | new->bse_length = end - new->bse_f_offset; | ||
409 | new->bse_devid = be->be_devid; | ||
410 | new->bse_mdev = be->be_mdev; | ||
411 | |||
412 | spin_lock(&bl->bl_ext_lock); | ||
413 | add_to_commitlist(bl, new); | ||
414 | spin_unlock(&bl->bl_ext_lock); | ||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | static void print_bl_extent(struct pnfs_block_extent *be) | ||
419 | { | ||
420 | dprintk("PRINT EXTENT extent %p\n", be); | ||
421 | if (be) { | ||
422 | dprintk(" be_f_offset %llu\n", (u64)be->be_f_offset); | ||
423 | dprintk(" be_length %llu\n", (u64)be->be_length); | ||
424 | dprintk(" be_v_offset %llu\n", (u64)be->be_v_offset); | ||
425 | dprintk(" be_state %d\n", be->be_state); | ||
426 | } | ||
427 | } | ||
428 | |||
429 | static void | ||
430 | destroy_extent(struct kref *kref) | ||
431 | { | ||
432 | struct pnfs_block_extent *be; | ||
433 | |||
434 | be = container_of(kref, struct pnfs_block_extent, be_refcnt); | ||
435 | dprintk("%s be=%p\n", __func__, be); | ||
436 | kfree(be); | ||
437 | } | ||
438 | |||
439 | void | ||
440 | bl_put_extent(struct pnfs_block_extent *be) | ||
441 | { | ||
442 | if (be) { | ||
443 | dprintk("%s enter %p (%i)\n", __func__, be, | ||
444 | atomic_read(&be->be_refcnt.refcount)); | ||
445 | kref_put(&be->be_refcnt, destroy_extent); | ||
446 | } | ||
447 | } | ||
448 | |||
449 | struct pnfs_block_extent *bl_alloc_extent(void) | ||
450 | { | ||
451 | struct pnfs_block_extent *be; | ||
452 | |||
453 | be = kmalloc(sizeof(struct pnfs_block_extent), GFP_NOFS); | ||
454 | if (!be) | ||
455 | return NULL; | ||
456 | INIT_LIST_HEAD(&be->be_node); | ||
457 | kref_init(&be->be_refcnt); | ||
458 | be->be_inval = NULL; | ||
459 | return be; | ||
460 | } | ||
461 | |||
462 | static void print_elist(struct list_head *list) | ||
463 | { | ||
464 | struct pnfs_block_extent *be; | ||
465 | dprintk("****************\n"); | ||
466 | dprintk("Extent list looks like:\n"); | ||
467 | list_for_each_entry(be, list, be_node) { | ||
468 | print_bl_extent(be); | ||
469 | } | ||
470 | dprintk("****************\n"); | ||
471 | } | ||
472 | |||
473 | static inline int | ||
474 | extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new) | ||
475 | { | ||
476 | /* Note this assumes new->be_f_offset >= old->be_f_offset */ | ||
477 | return (new->be_state == old->be_state) && | ||
478 | ((new->be_state == PNFS_BLOCK_NONE_DATA) || | ||
479 | ((new->be_v_offset - old->be_v_offset == | ||
480 | new->be_f_offset - old->be_f_offset) && | ||
481 | new->be_mdev == old->be_mdev)); | ||
482 | } | ||
483 | |||
484 | /* Adds new to appropriate list in bl, modifying new and removing existing | ||
485 | * extents as appropriate to deal with overlaps. | ||
486 | * | ||
487 | * See bl_find_get_extent for list constraints. | ||
488 | * | ||
489 | * Refcount on new is already set. If end up not using it, or error out, | ||
490 | * need to put the reference. | ||
491 | * | ||
492 | * bl->bl_ext_lock is held by caller. | ||
493 | */ | ||
494 | int | ||
495 | bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
496 | struct pnfs_block_extent *new) | ||
497 | { | ||
498 | struct pnfs_block_extent *be, *tmp; | ||
499 | sector_t end = new->be_f_offset + new->be_length; | ||
500 | struct list_head *list; | ||
501 | |||
502 | dprintk("%s enter with be=%p\n", __func__, new); | ||
503 | print_bl_extent(new); | ||
504 | list = &bl->bl_extents[bl_choose_list(new->be_state)]; | ||
505 | print_elist(list); | ||
506 | |||
507 | /* Scan for proper place to insert, extending new to the left | ||
508 | * as much as possible. | ||
509 | */ | ||
510 | list_for_each_entry_safe_reverse(be, tmp, list, be_node) { | ||
511 | if (new->be_f_offset >= be->be_f_offset + be->be_length) | ||
512 | break; | ||
513 | if (new->be_f_offset >= be->be_f_offset) { | ||
514 | if (end <= be->be_f_offset + be->be_length) { | ||
515 | /* new is a subset of existing be*/ | ||
516 | if (extents_consistent(be, new)) { | ||
517 | dprintk("%s: new is subset, ignoring\n", | ||
518 | __func__); | ||
519 | bl_put_extent(new); | ||
520 | return 0; | ||
521 | } else { | ||
522 | goto out_err; | ||
523 | } | ||
524 | } else { | ||
525 | /* |<-- be -->| | ||
526 | * |<-- new -->| */ | ||
527 | if (extents_consistent(be, new)) { | ||
528 | /* extend new to fully replace be */ | ||
529 | new->be_length += new->be_f_offset - | ||
530 | be->be_f_offset; | ||
531 | new->be_f_offset = be->be_f_offset; | ||
532 | new->be_v_offset = be->be_v_offset; | ||
533 | dprintk("%s: removing %p\n", __func__, be); | ||
534 | list_del(&be->be_node); | ||
535 | bl_put_extent(be); | ||
536 | } else { | ||
537 | goto out_err; | ||
538 | } | ||
539 | } | ||
540 | } else if (end >= be->be_f_offset + be->be_length) { | ||
541 | /* new extent overlap existing be */ | ||
542 | if (extents_consistent(be, new)) { | ||
543 | /* extend new to fully replace be */ | ||
544 | dprintk("%s: removing %p\n", __func__, be); | ||
545 | list_del(&be->be_node); | ||
546 | bl_put_extent(be); | ||
547 | } else { | ||
548 | goto out_err; | ||
549 | } | ||
550 | } else if (end > be->be_f_offset) { | ||
551 | /* |<-- be -->| | ||
552 | *|<-- new -->| */ | ||
553 | if (extents_consistent(new, be)) { | ||
554 | /* extend new to fully replace be */ | ||
555 | new->be_length += be->be_f_offset + be->be_length - | ||
556 | new->be_f_offset - new->be_length; | ||
557 | dprintk("%s: removing %p\n", __func__, be); | ||
558 | list_del(&be->be_node); | ||
559 | bl_put_extent(be); | ||
560 | } else { | ||
561 | goto out_err; | ||
562 | } | ||
563 | } | ||
564 | } | ||
565 | /* Note that if we never hit the above break, be will not point to a | ||
566 | * valid extent. However, in that case &be->be_node==list. | ||
567 | */ | ||
568 | list_add(&new->be_node, &be->be_node); | ||
569 | dprintk("%s: inserting new\n", __func__); | ||
570 | print_elist(list); | ||
571 | /* FIXME - The per-list consistency checks have all been done, | ||
572 | * should now check cross-list consistency. | ||
573 | */ | ||
574 | return 0; | ||
575 | |||
576 | out_err: | ||
577 | bl_put_extent(new); | ||
578 | return -EIO; | ||
579 | } | ||
580 | |||
581 | /* Returns extent, or NULL. If a second READ extent exists, it is returned | ||
582 | * in cow_read, if given. | ||
583 | * | ||
584 | * The extents are kept in two seperate ordered lists, one for READ and NONE, | ||
585 | * one for READWRITE and INVALID. Within each list, we assume: | ||
586 | * 1. Extents are ordered by file offset. | ||
587 | * 2. For any given isect, there is at most one extents that matches. | ||
588 | */ | ||
589 | struct pnfs_block_extent * | ||
590 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | ||
591 | struct pnfs_block_extent **cow_read) | ||
592 | { | ||
593 | struct pnfs_block_extent *be, *cow, *ret; | ||
594 | int i; | ||
595 | |||
596 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
597 | cow = ret = NULL; | ||
598 | spin_lock(&bl->bl_ext_lock); | ||
599 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
600 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
601 | if (isect >= be->be_f_offset + be->be_length) | ||
602 | break; | ||
603 | if (isect >= be->be_f_offset) { | ||
604 | /* We have found an extent */ | ||
605 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
606 | atomic_read(&be->be_refcnt.refcount)); | ||
607 | kref_get(&be->be_refcnt); | ||
608 | if (!ret) | ||
609 | ret = be; | ||
610 | else if (be->be_state != PNFS_BLOCK_READ_DATA) | ||
611 | bl_put_extent(be); | ||
612 | else | ||
613 | cow = be; | ||
614 | break; | ||
615 | } | ||
616 | } | ||
617 | if (ret && | ||
618 | (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA)) | ||
619 | break; | ||
620 | } | ||
621 | spin_unlock(&bl->bl_ext_lock); | ||
622 | if (cow_read) | ||
623 | *cow_read = cow; | ||
624 | print_bl_extent(ret); | ||
625 | return ret; | ||
626 | } | ||
627 | |||
628 | /* Similar to bl_find_get_extent, but called with lock held, and ignores cow */ | ||
629 | static struct pnfs_block_extent * | ||
630 | bl_find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect) | ||
631 | { | ||
632 | struct pnfs_block_extent *be, *ret = NULL; | ||
633 | int i; | ||
634 | |||
635 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
636 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
637 | if (ret) | ||
638 | break; | ||
639 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
640 | if (isect >= be->be_f_offset + be->be_length) | ||
641 | break; | ||
642 | if (isect >= be->be_f_offset) { | ||
643 | /* We have found an extent */ | ||
644 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
645 | atomic_read(&be->be_refcnt.refcount)); | ||
646 | kref_get(&be->be_refcnt); | ||
647 | ret = be; | ||
648 | break; | ||
649 | } | ||
650 | } | ||
651 | } | ||
652 | print_bl_extent(ret); | ||
653 | return ret; | ||
654 | } | ||
655 | |||
656 | int | ||
657 | encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
658 | struct xdr_stream *xdr, | ||
659 | const struct nfs4_layoutcommit_args *arg) | ||
660 | { | ||
661 | struct pnfs_block_short_extent *lce, *save; | ||
662 | unsigned int count = 0; | ||
663 | __be32 *p, *xdr_start; | ||
664 | |||
665 | dprintk("%s enter\n", __func__); | ||
666 | /* BUG - creation of bl_commit is buggy - need to wait for | ||
667 | * entire block to be marked WRITTEN before it can be added. | ||
668 | */ | ||
669 | spin_lock(&bl->bl_ext_lock); | ||
670 | /* Want to adjust for possible truncate */ | ||
671 | /* We now want to adjust argument range */ | ||
672 | |||
673 | /* XDR encode the ranges found */ | ||
674 | xdr_start = xdr_reserve_space(xdr, 8); | ||
675 | if (!xdr_start) | ||
676 | goto out; | ||
677 | list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) { | ||
678 | p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data)); | ||
679 | if (!p) | ||
680 | break; | ||
681 | p = xdr_encode_opaque_fixed(p, lce->bse_devid.data, NFS4_DEVICEID4_SIZE); | ||
682 | p = xdr_encode_hyper(p, lce->bse_f_offset << SECTOR_SHIFT); | ||
683 | p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); | ||
684 | p = xdr_encode_hyper(p, 0LL); | ||
685 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | ||
686 | list_move_tail(&lce->bse_node, &bl->bl_committing); | ||
687 | bl->bl_count--; | ||
688 | count++; | ||
689 | } | ||
690 | xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4); | ||
691 | xdr_start[1] = cpu_to_be32(count); | ||
692 | out: | ||
693 | spin_unlock(&bl->bl_ext_lock); | ||
694 | dprintk("%s found %i ranges\n", __func__, count); | ||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | /* Helper function to set_to_rw that initialize a new extent */ | ||
699 | static void | ||
700 | _prep_new_extent(struct pnfs_block_extent *new, | ||
701 | struct pnfs_block_extent *orig, | ||
702 | sector_t offset, sector_t length, int state) | ||
703 | { | ||
704 | kref_init(&new->be_refcnt); | ||
705 | /* don't need to INIT_LIST_HEAD(&new->be_node) */ | ||
706 | memcpy(&new->be_devid, &orig->be_devid, sizeof(struct nfs4_deviceid)); | ||
707 | new->be_mdev = orig->be_mdev; | ||
708 | new->be_f_offset = offset; | ||
709 | new->be_length = length; | ||
710 | new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset; | ||
711 | new->be_state = state; | ||
712 | new->be_inval = orig->be_inval; | ||
713 | } | ||
714 | |||
715 | /* Tries to merge be with extent in front of it in list. | ||
716 | * Frees storage if not used. | ||
717 | */ | ||
718 | static struct pnfs_block_extent * | ||
719 | _front_merge(struct pnfs_block_extent *be, struct list_head *head, | ||
720 | struct pnfs_block_extent *storage) | ||
721 | { | ||
722 | struct pnfs_block_extent *prev; | ||
723 | |||
724 | if (!storage) | ||
725 | goto no_merge; | ||
726 | if (&be->be_node == head || be->be_node.prev == head) | ||
727 | goto no_merge; | ||
728 | prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node); | ||
729 | if ((prev->be_f_offset + prev->be_length != be->be_f_offset) || | ||
730 | !extents_consistent(prev, be)) | ||
731 | goto no_merge; | ||
732 | _prep_new_extent(storage, prev, prev->be_f_offset, | ||
733 | prev->be_length + be->be_length, prev->be_state); | ||
734 | list_replace(&prev->be_node, &storage->be_node); | ||
735 | bl_put_extent(prev); | ||
736 | list_del(&be->be_node); | ||
737 | bl_put_extent(be); | ||
738 | return storage; | ||
739 | |||
740 | no_merge: | ||
741 | kfree(storage); | ||
742 | return be; | ||
743 | } | ||
744 | |||
745 | static u64 | ||
746 | set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length) | ||
747 | { | ||
748 | u64 rv = offset + length; | ||
749 | struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old; | ||
750 | struct pnfs_block_extent *children[3]; | ||
751 | struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL; | ||
752 | int i = 0, j; | ||
753 | |||
754 | dprintk("%s(%llu, %llu)\n", __func__, offset, length); | ||
755 | /* Create storage for up to three new extents e1, e2, e3 */ | ||
756 | e1 = kmalloc(sizeof(*e1), GFP_ATOMIC); | ||
757 | e2 = kmalloc(sizeof(*e2), GFP_ATOMIC); | ||
758 | e3 = kmalloc(sizeof(*e3), GFP_ATOMIC); | ||
759 | /* BUG - we are ignoring any failure */ | ||
760 | if (!e1 || !e2 || !e3) | ||
761 | goto out_nosplit; | ||
762 | |||
763 | spin_lock(&bl->bl_ext_lock); | ||
764 | be = bl_find_get_extent_locked(bl, offset); | ||
765 | rv = be->be_f_offset + be->be_length; | ||
766 | if (be->be_state != PNFS_BLOCK_INVALID_DATA) { | ||
767 | spin_unlock(&bl->bl_ext_lock); | ||
768 | goto out_nosplit; | ||
769 | } | ||
770 | /* Add e* to children, bumping e*'s krefs */ | ||
771 | if (be->be_f_offset != offset) { | ||
772 | _prep_new_extent(e1, be, be->be_f_offset, | ||
773 | offset - be->be_f_offset, | ||
774 | PNFS_BLOCK_INVALID_DATA); | ||
775 | children[i++] = e1; | ||
776 | print_bl_extent(e1); | ||
777 | } else | ||
778 | merge1 = e1; | ||
779 | _prep_new_extent(e2, be, offset, | ||
780 | min(length, be->be_f_offset + be->be_length - offset), | ||
781 | PNFS_BLOCK_READWRITE_DATA); | ||
782 | children[i++] = e2; | ||
783 | print_bl_extent(e2); | ||
784 | if (offset + length < be->be_f_offset + be->be_length) { | ||
785 | _prep_new_extent(e3, be, e2->be_f_offset + e2->be_length, | ||
786 | be->be_f_offset + be->be_length - | ||
787 | offset - length, | ||
788 | PNFS_BLOCK_INVALID_DATA); | ||
789 | children[i++] = e3; | ||
790 | print_bl_extent(e3); | ||
791 | } else | ||
792 | merge2 = e3; | ||
793 | |||
794 | /* Remove be from list, and insert the e* */ | ||
795 | /* We don't get refs on e*, since this list is the base reference | ||
796 | * set when init'ed. | ||
797 | */ | ||
798 | if (i < 3) | ||
799 | children[i] = NULL; | ||
800 | new = children[0]; | ||
801 | list_replace(&be->be_node, &new->be_node); | ||
802 | bl_put_extent(be); | ||
803 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1); | ||
804 | for (j = 1; j < i; j++) { | ||
805 | old = new; | ||
806 | new = children[j]; | ||
807 | list_add(&new->be_node, &old->be_node); | ||
808 | } | ||
809 | if (merge2) { | ||
810 | /* This is a HACK, should just create a _back_merge function */ | ||
811 | new = list_entry(new->be_node.next, | ||
812 | struct pnfs_block_extent, be_node); | ||
813 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2); | ||
814 | } | ||
815 | spin_unlock(&bl->bl_ext_lock); | ||
816 | |||
817 | /* Since we removed the base reference above, be is now scheduled for | ||
818 | * destruction. | ||
819 | */ | ||
820 | bl_put_extent(be); | ||
821 | dprintk("%s returns %llu after split\n", __func__, rv); | ||
822 | return rv; | ||
823 | |||
824 | out_nosplit: | ||
825 | kfree(e1); | ||
826 | kfree(e2); | ||
827 | kfree(e3); | ||
828 | dprintk("%s returns %llu without splitting\n", __func__, rv); | ||
829 | return rv; | ||
830 | } | ||
831 | |||
832 | void | ||
833 | clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
834 | const struct nfs4_layoutcommit_args *arg, | ||
835 | int status) | ||
836 | { | ||
837 | struct pnfs_block_short_extent *lce, *save; | ||
838 | |||
839 | dprintk("%s status %d\n", __func__, status); | ||
840 | list_for_each_entry_safe(lce, save, &bl->bl_committing, bse_node) { | ||
841 | if (likely(!status)) { | ||
842 | u64 offset = lce->bse_f_offset; | ||
843 | u64 end = offset + lce->bse_length; | ||
844 | |||
845 | do { | ||
846 | offset = set_to_rw(bl, offset, end - offset); | ||
847 | } while (offset < end); | ||
848 | list_del(&lce->bse_node); | ||
849 | |||
850 | kfree(lce); | ||
851 | } else { | ||
852 | list_del(&lce->bse_node); | ||
853 | spin_lock(&bl->bl_ext_lock); | ||
854 | add_to_commitlist(bl, lce); | ||
855 | spin_unlock(&bl->bl_ext_lock); | ||
856 | } | ||
857 | } | ||
858 | } | ||
859 | |||
860 | int bl_push_one_short_extent(struct pnfs_inval_markings *marks) | ||
861 | { | ||
862 | struct pnfs_block_short_extent *new; | ||
863 | |||
864 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
865 | if (unlikely(!new)) | ||
866 | return -ENOMEM; | ||
867 | |||
868 | spin_lock_bh(&marks->im_lock); | ||
869 | list_add(&new->bse_node, &marks->im_extents); | ||
870 | spin_unlock_bh(&marks->im_lock); | ||
871 | |||
872 | return 0; | ||
873 | } | ||
874 | |||
875 | struct pnfs_block_short_extent * | ||
876 | bl_pop_one_short_extent(struct pnfs_inval_markings *marks) | ||
877 | { | ||
878 | struct pnfs_block_short_extent *rv = NULL; | ||
879 | |||
880 | spin_lock_bh(&marks->im_lock); | ||
881 | if (!list_empty(&marks->im_extents)) { | ||
882 | rv = list_entry((&marks->im_extents)->next, | ||
883 | struct pnfs_block_short_extent, bse_node); | ||
884 | list_del_init(&rv->bse_node); | ||
885 | } | ||
886 | spin_unlock_bh(&marks->im_lock); | ||
887 | |||
888 | return rv; | ||
889 | } | ||
890 | |||
891 | void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free) | ||
892 | { | ||
893 | struct pnfs_block_short_extent *se = NULL, *tmp; | ||
894 | |||
895 | if (num_to_free <= 0) | ||
896 | return; | ||
897 | |||
898 | spin_lock(&marks->im_lock); | ||
899 | list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) { | ||
900 | list_del(&se->bse_node); | ||
901 | kfree(se); | ||
902 | if (--num_to_free == 0) | ||
903 | break; | ||
904 | } | ||
905 | spin_unlock(&marks->im_lock); | ||
906 | |||
907 | BUG_ON(num_to_free > 0); | ||
908 | } | ||
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c new file mode 100644 index 000000000000..8d04bda2bd2e --- /dev/null +++ b/fs/nfs/blocklayout/rpc_pipefs.c | |||
@@ -0,0 +1,285 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006,2007 The Regents of the University of Michigan. | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * Andy Adamson <andros@citi.umich.edu> | ||
6 | * Fred Isaman <iisaman@umich.edu> | ||
7 | * | ||
8 | * permission is granted to use, copy, create derivative works and | ||
9 | * redistribute this software and such derivative works for any purpose, | ||
10 | * so long as the name of the university of michigan is not used in | ||
11 | * any advertising or publicity pertaining to the use or distribution | ||
12 | * of this software without specific, written prior authorization. if | ||
13 | * the above copyright notice or any other identification of the | ||
14 | * university of michigan is included in any copy of any portion of | ||
15 | * this software, then the disclaimer below must also be included. | ||
16 | * | ||
17 | * this software is provided as is, without representation from the | ||
18 | * university of michigan as to its fitness for any purpose, and without | ||
19 | * warranty by the university of michigan of any kind, either express | ||
20 | * or implied, including without limitation the implied warranties of | ||
21 | * merchantability and fitness for a particular purpose. the regents | ||
22 | * of the university of michigan shall not be liable for any damages, | ||
23 | * including special, indirect, incidental, or consequential damages, | ||
24 | * with respect to any claim arising out or in connection with the use | ||
25 | * of the software, even if it has been or is hereafter advised of the | ||
26 | * possibility of such damages. | ||
27 | */ | ||
28 | |||
29 | #include <linux/module.h> | ||
30 | #include <linux/genhd.h> | ||
31 | #include <linux/blkdev.h> | ||
32 | |||
33 | #include "blocklayout.h" | ||
34 | |||
35 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
36 | |||
37 | static void | ||
38 | nfs4_encode_simple(__be32 *p, struct pnfs_block_volume *b) | ||
39 | { | ||
40 | int i; | ||
41 | |||
42 | *p++ = cpu_to_be32(1); | ||
43 | *p++ = cpu_to_be32(b->type); | ||
44 | *p++ = cpu_to_be32(b->simple.nr_sigs); | ||
45 | for (i = 0; i < b->simple.nr_sigs; i++) { | ||
46 | p = xdr_encode_hyper(p, b->simple.sigs[i].offset); | ||
47 | p = xdr_encode_opaque(p, b->simple.sigs[i].sig, | ||
48 | b->simple.sigs[i].sig_len); | ||
49 | } | ||
50 | } | ||
51 | |||
52 | dev_t | ||
53 | bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, | ||
54 | gfp_t gfp_mask) | ||
55 | { | ||
56 | struct net *net = server->nfs_client->cl_net; | ||
57 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
58 | struct bl_dev_msg *reply = &nn->bl_mount_reply; | ||
59 | struct bl_pipe_msg bl_pipe_msg; | ||
60 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
61 | struct bl_msg_hdr *bl_msg; | ||
62 | DECLARE_WAITQUEUE(wq, current); | ||
63 | dev_t dev = 0; | ||
64 | int rc; | ||
65 | |||
66 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | ||
67 | |||
68 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
69 | |||
70 | b->simple.len += 4; /* single volume */ | ||
71 | if (b->simple.len > PAGE_SIZE) | ||
72 | return -EIO; | ||
73 | |||
74 | memset(msg, 0, sizeof(*msg)); | ||
75 | msg->len = sizeof(*bl_msg) + b->simple.len; | ||
76 | msg->data = kzalloc(msg->len, gfp_mask); | ||
77 | if (!msg->data) | ||
78 | goto out; | ||
79 | |||
80 | bl_msg = msg->data; | ||
81 | bl_msg->type = BL_DEVICE_MOUNT, | ||
82 | bl_msg->totallen = b->simple.len; | ||
83 | nfs4_encode_simple(msg->data + sizeof(*bl_msg), b); | ||
84 | |||
85 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | ||
86 | add_wait_queue(&nn->bl_wq, &wq); | ||
87 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); | ||
88 | if (rc < 0) { | ||
89 | remove_wait_queue(&nn->bl_wq, &wq); | ||
90 | goto out; | ||
91 | } | ||
92 | |||
93 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
94 | schedule(); | ||
95 | __set_current_state(TASK_RUNNING); | ||
96 | remove_wait_queue(&nn->bl_wq, &wq); | ||
97 | |||
98 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | ||
99 | printk(KERN_WARNING "%s failed to decode device: %d\n", | ||
100 | __func__, reply->status); | ||
101 | goto out; | ||
102 | } | ||
103 | |||
104 | dev = MKDEV(reply->major, reply->minor); | ||
105 | out: | ||
106 | kfree(msg->data); | ||
107 | return dev; | ||
108 | } | ||
109 | |||
110 | static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | ||
111 | size_t mlen) | ||
112 | { | ||
113 | struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, | ||
114 | nfs_net_id); | ||
115 | |||
116 | if (mlen != sizeof (struct bl_dev_msg)) | ||
117 | return -EINVAL; | ||
118 | |||
119 | if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) | ||
120 | return -EFAULT; | ||
121 | |||
122 | wake_up(&nn->bl_wq); | ||
123 | |||
124 | return mlen; | ||
125 | } | ||
126 | |||
127 | static void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) | ||
128 | { | ||
129 | struct bl_pipe_msg *bl_pipe_msg = | ||
130 | container_of(msg, struct bl_pipe_msg, msg); | ||
131 | |||
132 | if (msg->errno >= 0) | ||
133 | return; | ||
134 | wake_up(bl_pipe_msg->bl_wq); | ||
135 | } | ||
136 | |||
137 | static const struct rpc_pipe_ops bl_upcall_ops = { | ||
138 | .upcall = rpc_pipe_generic_upcall, | ||
139 | .downcall = bl_pipe_downcall, | ||
140 | .destroy_msg = bl_pipe_destroy_msg, | ||
141 | }; | ||
142 | |||
143 | static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, | ||
144 | struct rpc_pipe *pipe) | ||
145 | { | ||
146 | struct dentry *dir, *dentry; | ||
147 | |||
148 | dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); | ||
149 | if (dir == NULL) | ||
150 | return ERR_PTR(-ENOENT); | ||
151 | dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); | ||
152 | dput(dir); | ||
153 | return dentry; | ||
154 | } | ||
155 | |||
156 | static void nfs4blocklayout_unregister_sb(struct super_block *sb, | ||
157 | struct rpc_pipe *pipe) | ||
158 | { | ||
159 | if (pipe->dentry) | ||
160 | rpc_unlink(pipe->dentry); | ||
161 | } | ||
162 | |||
163 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | ||
164 | void *ptr) | ||
165 | { | ||
166 | struct super_block *sb = ptr; | ||
167 | struct net *net = sb->s_fs_info; | ||
168 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
169 | struct dentry *dentry; | ||
170 | int ret = 0; | ||
171 | |||
172 | if (!try_module_get(THIS_MODULE)) | ||
173 | return 0; | ||
174 | |||
175 | if (nn->bl_device_pipe == NULL) { | ||
176 | module_put(THIS_MODULE); | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | switch (event) { | ||
181 | case RPC_PIPEFS_MOUNT: | ||
182 | dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); | ||
183 | if (IS_ERR(dentry)) { | ||
184 | ret = PTR_ERR(dentry); | ||
185 | break; | ||
186 | } | ||
187 | nn->bl_device_pipe->dentry = dentry; | ||
188 | break; | ||
189 | case RPC_PIPEFS_UMOUNT: | ||
190 | if (nn->bl_device_pipe->dentry) | ||
191 | nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); | ||
192 | break; | ||
193 | default: | ||
194 | ret = -ENOTSUPP; | ||
195 | break; | ||
196 | } | ||
197 | module_put(THIS_MODULE); | ||
198 | return ret; | ||
199 | } | ||
200 | |||
201 | static struct notifier_block nfs4blocklayout_block = { | ||
202 | .notifier_call = rpc_pipefs_event, | ||
203 | }; | ||
204 | |||
205 | static struct dentry *nfs4blocklayout_register_net(struct net *net, | ||
206 | struct rpc_pipe *pipe) | ||
207 | { | ||
208 | struct super_block *pipefs_sb; | ||
209 | struct dentry *dentry; | ||
210 | |||
211 | pipefs_sb = rpc_get_sb_net(net); | ||
212 | if (!pipefs_sb) | ||
213 | return NULL; | ||
214 | dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); | ||
215 | rpc_put_sb_net(net); | ||
216 | return dentry; | ||
217 | } | ||
218 | |||
219 | static void nfs4blocklayout_unregister_net(struct net *net, | ||
220 | struct rpc_pipe *pipe) | ||
221 | { | ||
222 | struct super_block *pipefs_sb; | ||
223 | |||
224 | pipefs_sb = rpc_get_sb_net(net); | ||
225 | if (pipefs_sb) { | ||
226 | nfs4blocklayout_unregister_sb(pipefs_sb, pipe); | ||
227 | rpc_put_sb_net(net); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | static int nfs4blocklayout_net_init(struct net *net) | ||
232 | { | ||
233 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
234 | struct dentry *dentry; | ||
235 | |||
236 | init_waitqueue_head(&nn->bl_wq); | ||
237 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); | ||
238 | if (IS_ERR(nn->bl_device_pipe)) | ||
239 | return PTR_ERR(nn->bl_device_pipe); | ||
240 | dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); | ||
241 | if (IS_ERR(dentry)) { | ||
242 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
243 | return PTR_ERR(dentry); | ||
244 | } | ||
245 | nn->bl_device_pipe->dentry = dentry; | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | static void nfs4blocklayout_net_exit(struct net *net) | ||
250 | { | ||
251 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
252 | |||
253 | nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); | ||
254 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
255 | nn->bl_device_pipe = NULL; | ||
256 | } | ||
257 | |||
258 | static struct pernet_operations nfs4blocklayout_net_ops = { | ||
259 | .init = nfs4blocklayout_net_init, | ||
260 | .exit = nfs4blocklayout_net_exit, | ||
261 | }; | ||
262 | |||
263 | int __init bl_init_pipefs(void) | ||
264 | { | ||
265 | int ret; | ||
266 | |||
267 | ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); | ||
268 | if (ret) | ||
269 | goto out; | ||
270 | ret = register_pernet_subsys(&nfs4blocklayout_net_ops); | ||
271 | if (ret) | ||
272 | goto out_unregister_notifier; | ||
273 | return 0; | ||
274 | |||
275 | out_unregister_notifier: | ||
276 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
277 | out: | ||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | void __exit bl_cleanup_pipefs(void) | ||
282 | { | ||
283 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
284 | unregister_pernet_subsys(&nfs4blocklayout_net_ops); | ||
285 | } | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 54de482143cc..b8fb3a4ef649 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -235,7 +235,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, | |||
235 | 235 | ||
236 | cb_info->serv = serv; | 236 | cb_info->serv = serv; |
237 | cb_info->rqst = rqstp; | 237 | cb_info->rqst = rqstp; |
238 | cb_info->task = kthread_run(callback_svc, cb_info->rqst, | 238 | cb_info->task = kthread_create(callback_svc, cb_info->rqst, |
239 | "nfsv4.%u-svc", minorversion); | 239 | "nfsv4.%u-svc", minorversion); |
240 | if (IS_ERR(cb_info->task)) { | 240 | if (IS_ERR(cb_info->task)) { |
241 | ret = PTR_ERR(cb_info->task); | 241 | ret = PTR_ERR(cb_info->task); |
@@ -244,6 +244,8 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, | |||
244 | cb_info->task = NULL; | 244 | cb_info->task = NULL; |
245 | return ret; | 245 | return ret; |
246 | } | 246 | } |
247 | rqstp->rq_task = cb_info->task; | ||
248 | wake_up_process(cb_info->task); | ||
247 | dprintk("nfs_callback_up: service started\n"); | 249 | dprintk("nfs_callback_up: service started\n"); |
248 | return 0; | 250 | return 0; |
249 | } | 251 | } |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 41db5258e7a7..73466b934090 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -171,14 +171,26 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
171 | goto out; | 171 | goto out; |
172 | 172 | ||
173 | ino = lo->plh_inode; | 173 | ino = lo->plh_inode; |
174 | |||
175 | spin_lock(&ino->i_lock); | ||
176 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); | ||
177 | spin_unlock(&ino->i_lock); | ||
178 | |||
179 | pnfs_layoutcommit_inode(ino, false); | ||
180 | |||
174 | spin_lock(&ino->i_lock); | 181 | spin_lock(&ino->i_lock); |
175 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 182 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
176 | pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, | 183 | pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, |
177 | &args->cbl_range)) | 184 | &args->cbl_range)) { |
178 | rv = NFS4ERR_DELAY; | 185 | rv = NFS4ERR_DELAY; |
179 | else | 186 | goto unlock; |
180 | rv = NFS4ERR_NOMATCHING_LAYOUT; | 187 | } |
181 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); | 188 | |
189 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { | ||
190 | NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, | ||
191 | &args->cbl_range); | ||
192 | } | ||
193 | unlock: | ||
182 | spin_unlock(&ino->i_lock); | 194 | spin_unlock(&ino->i_lock); |
183 | pnfs_free_lseg_list(&free_me_list); | 195 | pnfs_free_lseg_list(&free_me_list); |
184 | pnfs_put_layout_hdr(lo); | 196 | pnfs_put_layout_hdr(lo); |
@@ -277,9 +289,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, | |||
277 | } | 289 | } |
278 | 290 | ||
279 | found: | 291 | found: |
280 | if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) | ||
281 | dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, " | ||
282 | "deleting instead\n", __func__); | ||
283 | nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); | 292 | nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); |
284 | } | 293 | } |
285 | 294 | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 6a4f3666e273..f9f4845db989 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -1252,6 +1252,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) | |||
1252 | * set up the iterator to start reading from the server list and return the first item | 1252 | * set up the iterator to start reading from the server list and return the first item |
1253 | */ | 1253 | */ |
1254 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) | 1254 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) |
1255 | __acquires(&nn->nfs_client_lock) | ||
1255 | { | 1256 | { |
1256 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); | 1257 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); |
1257 | 1258 | ||
@@ -1274,6 +1275,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) | |||
1274 | * clean up after reading from the transports list | 1275 | * clean up after reading from the transports list |
1275 | */ | 1276 | */ |
1276 | static void nfs_server_list_stop(struct seq_file *p, void *v) | 1277 | static void nfs_server_list_stop(struct seq_file *p, void *v) |
1278 | __releases(&nn->nfs_client_lock) | ||
1277 | { | 1279 | { |
1278 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); | 1280 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); |
1279 | 1281 | ||
@@ -1318,7 +1320,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v) | |||
1318 | */ | 1320 | */ |
1319 | static int nfs_volume_list_open(struct inode *inode, struct file *file) | 1321 | static int nfs_volume_list_open(struct inode *inode, struct file *file) |
1320 | { | 1322 | { |
1321 | return seq_open_net(inode, file, &nfs_server_list_ops, | 1323 | return seq_open_net(inode, file, &nfs_volume_list_ops, |
1322 | sizeof(struct seq_net_private)); | 1324 | sizeof(struct seq_net_private)); |
1323 | } | 1325 | } |
1324 | 1326 | ||
@@ -1326,6 +1328,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) | |||
1326 | * set up the iterator to start reading from the volume list and return the first item | 1328 | * set up the iterator to start reading from the volume list and return the first item |
1327 | */ | 1329 | */ |
1328 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) | 1330 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) |
1331 | __acquires(&nn->nfs_client_lock) | ||
1329 | { | 1332 | { |
1330 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); | 1333 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); |
1331 | 1334 | ||
@@ -1348,6 +1351,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) | |||
1348 | * clean up after reading from the transports list | 1351 | * clean up after reading from the transports list |
1349 | */ | 1352 | */ |
1350 | static void nfs_volume_list_stop(struct seq_file *p, void *v) | 1353 | static void nfs_volume_list_stop(struct seq_file *p, void *v) |
1354 | __releases(&nn->nfs_client_lock) | ||
1351 | { | 1355 | { |
1352 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); | 1356 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); |
1353 | 1357 | ||
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 65ef6e00deee..dda4b8667c02 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -178,7 +178,6 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, | |||
178 | return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | 178 | return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); |
179 | } | 179 | } |
180 | 180 | ||
181 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
182 | /* | 181 | /* |
183 | * nfs_direct_cmp_commit_data_verf - compare verifier for commit data | 182 | * nfs_direct_cmp_commit_data_verf - compare verifier for commit data |
184 | * @dreq - direct request possibly spanning multiple servers | 183 | * @dreq - direct request possibly spanning multiple servers |
@@ -197,7 +196,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, | |||
197 | WARN_ON_ONCE(verfp->committed < 0); | 196 | WARN_ON_ONCE(verfp->committed < 0); |
198 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); | 197 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); |
199 | } | 198 | } |
200 | #endif | ||
201 | 199 | ||
202 | /** | 200 | /** |
203 | * nfs_direct_IO - NFS address space operation for direct I/O | 201 | * nfs_direct_IO - NFS address space operation for direct I/O |
@@ -576,7 +574,6 @@ out: | |||
576 | return result; | 574 | return result; |
577 | } | 575 | } |
578 | 576 | ||
579 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
580 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | 577 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) |
581 | { | 578 | { |
582 | struct nfs_pageio_descriptor desc; | 579 | struct nfs_pageio_descriptor desc; |
@@ -700,17 +697,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
700 | schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ | 697 | schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ |
701 | } | 698 | } |
702 | 699 | ||
703 | #else | ||
704 | static void nfs_direct_write_schedule_work(struct work_struct *work) | ||
705 | { | ||
706 | } | ||
707 | |||
708 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) | ||
709 | { | ||
710 | nfs_direct_complete(dreq, true); | ||
711 | } | ||
712 | #endif | ||
713 | |||
714 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | 700 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) |
715 | { | 701 | { |
716 | struct nfs_direct_req *dreq = hdr->dreq; | 702 | struct nfs_direct_req *dreq = hdr->dreq; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 524dd80d1898..6920127c5eb7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "internal.h" | 36 | #include "internal.h" |
37 | #include "iostat.h" | 37 | #include "iostat.h" |
38 | #include "fscache.h" | 38 | #include "fscache.h" |
39 | #include "pnfs.h" | ||
39 | 40 | ||
40 | #include "nfstrace.h" | 41 | #include "nfstrace.h" |
41 | 42 | ||
@@ -327,6 +328,12 @@ static int nfs_want_read_modify_write(struct file *file, struct page *page, | |||
327 | unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); | 328 | unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); |
328 | unsigned int end = offset + len; | 329 | unsigned int end = offset + len; |
329 | 330 | ||
331 | if (pnfs_ld_read_whole_page(file->f_mapping->host)) { | ||
332 | if (!PageUptodate(page)) | ||
333 | return 1; | ||
334 | return 0; | ||
335 | } | ||
336 | |||
330 | if ((file->f_mode & FMODE_READ) && /* open for read? */ | 337 | if ((file->f_mode & FMODE_READ) && /* open for read? */ |
331 | !PageUptodate(page) && /* Uptodate? */ | 338 | !PageUptodate(page) && /* Uptodate? */ |
332 | !PagePrivate(page) && /* i/o request already? */ | 339 | !PagePrivate(page) && /* i/o request already? */ |
@@ -468,17 +475,26 @@ static int nfs_release_page(struct page *page, gfp_t gfp) | |||
468 | 475 | ||
469 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); | 476 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); |
470 | 477 | ||
471 | /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not | 478 | /* Always try to initiate a 'commit' if relevant, but only |
472 | * doing this memory reclaim for a fs-related allocation. | 479 | * wait for it if __GFP_WAIT is set. Even then, only wait 1 |
480 | * second and only if the 'bdi' is not congested. | ||
481 | * Waiting indefinitely can cause deadlocks when the NFS | ||
482 | * server is on this machine, when a new TCP connection is | ||
483 | * needed and in other rare cases. There is no particular | ||
484 | * need to wait extensively here. A short wait has the | ||
485 | * benefit that someone else can worry about the freezer. | ||
473 | */ | 486 | */ |
474 | if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && | 487 | if (mapping) { |
475 | !(current->flags & PF_FSTRANS)) { | 488 | struct nfs_server *nfss = NFS_SERVER(mapping->host); |
476 | int how = FLUSH_SYNC; | 489 | nfs_commit_inode(mapping->host, 0); |
477 | 490 | if ((gfp & __GFP_WAIT) && | |
478 | /* Don't let kswapd deadlock waiting for OOM RPC calls */ | 491 | !bdi_write_congested(&nfss->backing_dev_info)) { |
479 | if (current_is_kswapd()) | 492 | wait_on_page_bit_killable_timeout(page, PG_private, |
480 | how = 0; | 493 | HZ); |
481 | nfs_commit_inode(mapping->host, how); | 494 | if (PagePrivate(page)) |
495 | set_bdi_congested(&nfss->backing_dev_info, | ||
496 | BLK_RW_ASYNC); | ||
497 | } | ||
482 | } | 498 | } |
483 | /* If PagePrivate() is set, then the page is not freeable */ | 499 | /* If PagePrivate() is set, then the page is not freeable */ |
484 | if (PagePrivate(page)) | 500 | if (PagePrivate(page)) |
@@ -539,13 +555,25 @@ static int nfs_launder_page(struct page *page) | |||
539 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, | 555 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
540 | sector_t *span) | 556 | sector_t *span) |
541 | { | 557 | { |
558 | int ret; | ||
559 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); | ||
560 | |||
542 | *span = sis->pages; | 561 | *span = sis->pages; |
543 | return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1); | 562 | |
563 | rcu_read_lock(); | ||
564 | ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); | ||
565 | rcu_read_unlock(); | ||
566 | |||
567 | return ret; | ||
544 | } | 568 | } |
545 | 569 | ||
546 | static void nfs_swap_deactivate(struct file *file) | 570 | static void nfs_swap_deactivate(struct file *file) |
547 | { | 571 | { |
548 | xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0); | 572 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); |
573 | |||
574 | rcu_read_lock(); | ||
575 | xs_swapper(rcu_dereference(clnt->cl_xprt), 0); | ||
576 | rcu_read_unlock(); | ||
549 | } | 577 | } |
550 | #endif | 578 | #endif |
551 | 579 | ||
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 90978075f730..abc5056999d6 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -265,7 +265,7 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) | |||
265 | { | 265 | { |
266 | 266 | ||
267 | if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || | 267 | if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || |
268 | hdr->res.verf->committed == NFS_FILE_SYNC) | 268 | hdr->res.verf->committed != NFS_DATA_SYNC) |
269 | return; | 269 | return; |
270 | 270 | ||
271 | pnfs_set_layoutcommit(hdr); | 271 | pnfs_set_layoutcommit(hdr); |
@@ -403,6 +403,9 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
403 | return -EAGAIN; | 403 | return -EAGAIN; |
404 | } | 404 | } |
405 | 405 | ||
406 | if (data->verf.committed == NFS_UNSTABLE) | ||
407 | pnfs_commit_set_layoutcommit(data); | ||
408 | |||
406 | return 0; | 409 | return 0; |
407 | } | 410 | } |
408 | 411 | ||
@@ -646,18 +649,15 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
646 | } | 649 | } |
647 | 650 | ||
648 | /* find and reference the deviceid */ | 651 | /* find and reference the deviceid */ |
649 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, | 652 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id, |
650 | NFS_SERVER(lo->plh_inode)->nfs_client, id); | 653 | lo->plh_lc_cred, gfp_flags); |
651 | if (d == NULL) { | 654 | if (d == NULL) |
652 | dsaddr = filelayout_get_device_info(lo->plh_inode, id, | 655 | goto out; |
653 | lo->plh_lc_cred, gfp_flags); | 656 | |
654 | if (dsaddr == NULL) | 657 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); |
655 | goto out; | ||
656 | } else | ||
657 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | ||
658 | /* Found deviceid is unavailable */ | 658 | /* Found deviceid is unavailable */ |
659 | if (filelayout_test_devid_unavailable(&dsaddr->id_node)) | 659 | if (filelayout_test_devid_unavailable(&dsaddr->id_node)) |
660 | goto out_put; | 660 | goto out_put; |
661 | 661 | ||
662 | fl->dsaddr = dsaddr; | 662 | fl->dsaddr = dsaddr; |
663 | 663 | ||
@@ -1368,6 +1368,17 @@ out: | |||
1368 | cinfo->ds->ncommitting = 0; | 1368 | cinfo->ds->ncommitting = 0; |
1369 | return PNFS_ATTEMPTED; | 1369 | return PNFS_ATTEMPTED; |
1370 | } | 1370 | } |
1371 | static struct nfs4_deviceid_node * | ||
1372 | filelayout_alloc_deviceid_node(struct nfs_server *server, | ||
1373 | struct pnfs_device *pdev, gfp_t gfp_flags) | ||
1374 | { | ||
1375 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
1376 | |||
1377 | dsaddr = nfs4_fl_alloc_deviceid_node(server, pdev, gfp_flags); | ||
1378 | if (!dsaddr) | ||
1379 | return NULL; | ||
1380 | return &dsaddr->id_node; | ||
1381 | } | ||
1371 | 1382 | ||
1372 | static void | 1383 | static void |
1373 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) | 1384 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) |
@@ -1420,6 +1431,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
1420 | .commit_pagelist = filelayout_commit_pagelist, | 1431 | .commit_pagelist = filelayout_commit_pagelist, |
1421 | .read_pagelist = filelayout_read_pagelist, | 1432 | .read_pagelist = filelayout_read_pagelist, |
1422 | .write_pagelist = filelayout_write_pagelist, | 1433 | .write_pagelist = filelayout_write_pagelist, |
1434 | .alloc_deviceid_node = filelayout_alloc_deviceid_node, | ||
1423 | .free_deviceid_node = filelayout_free_deveiceid_node, | 1435 | .free_deviceid_node = filelayout_free_deveiceid_node, |
1424 | }; | 1436 | }; |
1425 | 1437 | ||
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index ffbddf2219ea..7c9f800c49d7 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h | |||
@@ -147,10 +147,11 @@ u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); | |||
147 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); | 147 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); |
148 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | 148 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, |
149 | u32 ds_idx); | 149 | u32 ds_idx); |
150 | |||
151 | extern struct nfs4_file_layout_dsaddr * | ||
152 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, | ||
153 | struct pnfs_device *pdev, gfp_t gfp_flags); | ||
150 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 154 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
151 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 155 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
152 | struct nfs4_file_layout_dsaddr * | ||
153 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, | ||
154 | struct rpc_cred *cred, gfp_t gfp_flags); | ||
155 | 156 | ||
156 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ | 157 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ |
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 8540516f4d71..9bb806a76d99 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
@@ -484,8 +484,9 @@ out_err: | |||
484 | } | 484 | } |
485 | 485 | ||
486 | /* Decode opaque device data and return the result */ | 486 | /* Decode opaque device data and return the result */ |
487 | static struct nfs4_file_layout_dsaddr* | 487 | struct nfs4_file_layout_dsaddr * |
488 | decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | 488 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, |
489 | gfp_t gfp_flags) | ||
489 | { | 490 | { |
490 | int i; | 491 | int i; |
491 | u32 cnt, num; | 492 | u32 cnt, num; |
@@ -570,10 +571,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
570 | dsaddr->stripe_indices = stripe_indices; | 571 | dsaddr->stripe_indices = stripe_indices; |
571 | stripe_indices = NULL; | 572 | stripe_indices = NULL; |
572 | dsaddr->ds_num = num; | 573 | dsaddr->ds_num = num; |
573 | nfs4_init_deviceid_node(&dsaddr->id_node, | 574 | nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id); |
574 | NFS_SERVER(ino)->pnfs_curr_ld, | ||
575 | NFS_SERVER(ino)->nfs_client, | ||
576 | &pdev->dev_id); | ||
577 | 575 | ||
578 | INIT_LIST_HEAD(&dsaddrs); | 576 | INIT_LIST_HEAD(&dsaddrs); |
579 | 577 | ||
@@ -587,7 +585,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
587 | 585 | ||
588 | mp_count = be32_to_cpup(p); /* multipath count */ | 586 | mp_count = be32_to_cpup(p); /* multipath count */ |
589 | for (j = 0; j < mp_count; j++) { | 587 | for (j = 0; j < mp_count; j++) { |
590 | da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net, | 588 | da = decode_ds_addr(server->nfs_client->cl_net, |
591 | &stream, gfp_flags); | 589 | &stream, gfp_flags); |
592 | if (da) | 590 | if (da) |
593 | list_add_tail(&da->da_node, &dsaddrs); | 591 | list_add_tail(&da->da_node, &dsaddrs); |
@@ -637,102 +635,6 @@ out_err: | |||
637 | return NULL; | 635 | return NULL; |
638 | } | 636 | } |
639 | 637 | ||
640 | /* | ||
641 | * Decode the opaque device specified in 'dev' and add it to the cache of | ||
642 | * available devices. | ||
643 | */ | ||
644 | static struct nfs4_file_layout_dsaddr * | ||
645 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) | ||
646 | { | ||
647 | struct nfs4_deviceid_node *d; | ||
648 | struct nfs4_file_layout_dsaddr *n, *new; | ||
649 | |||
650 | new = decode_device(inode, dev, gfp_flags); | ||
651 | if (!new) { | ||
652 | printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", | ||
653 | __func__); | ||
654 | return NULL; | ||
655 | } | ||
656 | |||
657 | d = nfs4_insert_deviceid_node(&new->id_node); | ||
658 | n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | ||
659 | if (n != new) { | ||
660 | nfs4_fl_free_deviceid(new); | ||
661 | return n; | ||
662 | } | ||
663 | |||
664 | return new; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Retrieve the information for dev_id, add it to the list | ||
669 | * of available devices, and return it. | ||
670 | */ | ||
671 | struct nfs4_file_layout_dsaddr * | ||
672 | filelayout_get_device_info(struct inode *inode, | ||
673 | struct nfs4_deviceid *dev_id, | ||
674 | struct rpc_cred *cred, | ||
675 | gfp_t gfp_flags) | ||
676 | { | ||
677 | struct pnfs_device *pdev = NULL; | ||
678 | u32 max_resp_sz; | ||
679 | int max_pages; | ||
680 | struct page **pages = NULL; | ||
681 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
682 | int rc, i; | ||
683 | struct nfs_server *server = NFS_SERVER(inode); | ||
684 | |||
685 | /* | ||
686 | * Use the session max response size as the basis for setting | ||
687 | * GETDEVICEINFO's maxcount | ||
688 | */ | ||
689 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
690 | max_pages = nfs_page_array_len(0, max_resp_sz); | ||
691 | dprintk("%s inode %p max_resp_sz %u max_pages %d\n", | ||
692 | __func__, inode, max_resp_sz, max_pages); | ||
693 | |||
694 | pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); | ||
695 | if (pdev == NULL) | ||
696 | return NULL; | ||
697 | |||
698 | pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); | ||
699 | if (pages == NULL) { | ||
700 | kfree(pdev); | ||
701 | return NULL; | ||
702 | } | ||
703 | for (i = 0; i < max_pages; i++) { | ||
704 | pages[i] = alloc_page(gfp_flags); | ||
705 | if (!pages[i]) | ||
706 | goto out_free; | ||
707 | } | ||
708 | |||
709 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
710 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | ||
711 | pdev->pages = pages; | ||
712 | pdev->pgbase = 0; | ||
713 | pdev->pglen = max_resp_sz; | ||
714 | pdev->mincount = 0; | ||
715 | pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
716 | |||
717 | rc = nfs4_proc_getdeviceinfo(server, pdev, cred); | ||
718 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
719 | if (rc) | ||
720 | goto out_free; | ||
721 | |||
722 | /* | ||
723 | * Found new device, need to decode it and then add it to the | ||
724 | * list of known devices for this mountpoint. | ||
725 | */ | ||
726 | dsaddr = decode_and_add_device(inode, pdev, gfp_flags); | ||
727 | out_free: | ||
728 | for (i = 0; i < max_pages; i++) | ||
729 | __free_page(pages[i]); | ||
730 | kfree(pages); | ||
731 | kfree(pdev); | ||
732 | dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); | ||
733 | return dsaddr; | ||
734 | } | ||
735 | |||
736 | void | 638 | void |
737 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | 639 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
738 | { | 640 | { |
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c index 7cf2c4699b08..777b055063f6 100644 --- a/fs/nfs/fscache-index.c +++ b/fs/nfs/fscache-index.c | |||
@@ -74,11 +74,10 @@ static uint16_t nfs_server_get_key(const void *cookie_netfs_data, | |||
74 | struct nfs_server_key *key = buffer; | 74 | struct nfs_server_key *key = buffer; |
75 | uint16_t len = sizeof(struct nfs_server_key); | 75 | uint16_t len = sizeof(struct nfs_server_key); |
76 | 76 | ||
77 | memset(key, 0, len); | ||
77 | key->nfsversion = clp->rpc_ops->version; | 78 | key->nfsversion = clp->rpc_ops->version; |
78 | key->family = clp->cl_addr.ss_family; | 79 | key->family = clp->cl_addr.ss_family; |
79 | 80 | ||
80 | memset(key, 0, len); | ||
81 | |||
82 | switch (clp->cl_addr.ss_family) { | 81 | switch (clp->cl_addr.ss_family) { |
83 | case AF_INET: | 82 | case AF_INET: |
84 | key->port = sin->sin_port; | 83 | key->port = sin->sin_port; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 577a36f0a510..141c9f4a40de 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -505,7 +505,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
505 | attr->ia_valid &= ~ATTR_MODE; | 505 | attr->ia_valid &= ~ATTR_MODE; |
506 | 506 | ||
507 | if (attr->ia_valid & ATTR_SIZE) { | 507 | if (attr->ia_valid & ATTR_SIZE) { |
508 | if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) | 508 | BUG_ON(!S_ISREG(inode->i_mode)); |
509 | |||
510 | if (attr->ia_size == i_size_read(inode)) | ||
509 | attr->ia_valid &= ~ATTR_SIZE; | 511 | attr->ia_valid &= ~ATTR_SIZE; |
510 | } | 512 | } |
511 | 513 | ||
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9056622d2230..14ae6f20a172 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -218,13 +218,6 @@ static inline void nfs_fs_proc_exit(void) | |||
218 | int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); | 218 | int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); |
219 | #endif | 219 | #endif |
220 | 220 | ||
221 | /* nfs3client.c */ | ||
222 | #if IS_ENABLED(CONFIG_NFS_V3) | ||
223 | struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); | ||
224 | struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, | ||
225 | struct nfs_fattr *, rpc_authflavor_t); | ||
226 | #endif | ||
227 | |||
228 | /* callback_xdr.c */ | 221 | /* callback_xdr.c */ |
229 | extern struct svc_version nfs4_callback_version1; | 222 | extern struct svc_version nfs4_callback_version1; |
230 | extern struct svc_version nfs4_callback_version4; | 223 | extern struct svc_version nfs4_callback_version4; |
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h new file mode 100644 index 000000000000..333ae4068506 --- /dev/null +++ b/fs/nfs/nfs3_fs.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Anna Schumaker. | ||
3 | * | ||
4 | * NFSv3-specific filesystem definitions and declarations | ||
5 | */ | ||
6 | #ifndef __LINUX_FS_NFS_NFS3_FS_H | ||
7 | #define __LINUX_FS_NFS_NFS3_FS_H | ||
8 | |||
9 | /* | ||
10 | * nfs3acl.c | ||
11 | */ | ||
12 | #ifdef CONFIG_NFS_V3_ACL | ||
13 | extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); | ||
14 | extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type); | ||
15 | extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | ||
16 | struct posix_acl *dfacl); | ||
17 | extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); | ||
18 | extern const struct xattr_handler *nfs3_xattr_handlers[]; | ||
19 | #else | ||
20 | static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | ||
21 | struct posix_acl *dfacl) | ||
22 | { | ||
23 | return 0; | ||
24 | } | ||
25 | #define nfs3_listxattr NULL | ||
26 | #endif /* CONFIG_NFS_V3_ACL */ | ||
27 | |||
28 | /* nfs3client.c */ | ||
29 | struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); | ||
30 | struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, | ||
31 | struct nfs_fattr *, rpc_authflavor_t); | ||
32 | |||
33 | |||
34 | #endif /* __LINUX_FS_NFS_NFS3_FS_H */ | ||
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 24c6898159cc..658e586ca438 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/nfsacl.h> | 7 | #include <linux/nfsacl.h> |
8 | 8 | ||
9 | #include "internal.h" | 9 | #include "internal.h" |
10 | #include "nfs3_fs.h" | ||
10 | 11 | ||
11 | #define NFSDBG_FACILITY NFSDBG_PROC | 12 | #define NFSDBG_FACILITY NFSDBG_PROC |
12 | 13 | ||
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index b3fc65ef39ca..8c1b437c5403 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/nfs_fs.h> | 1 | #include <linux/nfs_fs.h> |
2 | #include <linux/nfs_mount.h> | 2 | #include <linux/nfs_mount.h> |
3 | #include "internal.h" | 3 | #include "internal.h" |
4 | #include "nfs3_fs.h" | ||
4 | 5 | ||
5 | #ifdef CONFIG_NFS_V3_ACL | 6 | #ifdef CONFIG_NFS_V3_ACL |
6 | static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; | 7 | static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 809670eba52a..524f9f837408 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include "iostat.h" | 23 | #include "iostat.h" |
24 | #include "internal.h" | 24 | #include "internal.h" |
25 | #include "nfs3_fs.h" | ||
25 | 26 | ||
26 | #define NFSDBG_FACILITY NFSDBG_PROC | 27 | #define NFSDBG_FACILITY NFSDBG_PROC |
27 | 28 | ||
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index d6a98949af19..6af29c2da352 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/nfs_fs.h> | 5 | #include <linux/nfs_fs.h> |
6 | #include "internal.h" | 6 | #include "internal.h" |
7 | #include "nfs3_fs.h" | ||
7 | #include "nfs.h" | 8 | #include "nfs.h" |
8 | 9 | ||
9 | static struct nfs_subversion nfs_v3 = { | 10 | static struct nfs_subversion nfs_v3 = { |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6ca0c8e7a945..5aa55c132aa2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -77,7 +77,7 @@ struct nfs4_opendata; | |||
77 | static int _nfs4_proc_open(struct nfs4_opendata *data); | 77 | static int _nfs4_proc_open(struct nfs4_opendata *data); |
78 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | 78 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); |
79 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 79 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
80 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); | 80 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *); |
81 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); | 81 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); |
82 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); | 82 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); |
83 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); | 83 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); |
@@ -314,20 +314,30 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent | |||
314 | kunmap_atomic(start); | 314 | kunmap_atomic(start); |
315 | } | 315 | } |
316 | 316 | ||
317 | static long nfs4_update_delay(long *timeout) | ||
318 | { | ||
319 | long ret; | ||
320 | if (!timeout) | ||
321 | return NFS4_POLL_RETRY_MAX; | ||
322 | if (*timeout <= 0) | ||
323 | *timeout = NFS4_POLL_RETRY_MIN; | ||
324 | if (*timeout > NFS4_POLL_RETRY_MAX) | ||
325 | *timeout = NFS4_POLL_RETRY_MAX; | ||
326 | ret = *timeout; | ||
327 | *timeout <<= 1; | ||
328 | return ret; | ||
329 | } | ||
330 | |||
317 | static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) | 331 | static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) |
318 | { | 332 | { |
319 | int res = 0; | 333 | int res = 0; |
320 | 334 | ||
321 | might_sleep(); | 335 | might_sleep(); |
322 | 336 | ||
323 | if (*timeout <= 0) | 337 | freezable_schedule_timeout_killable_unsafe( |
324 | *timeout = NFS4_POLL_RETRY_MIN; | 338 | nfs4_update_delay(timeout)); |
325 | if (*timeout > NFS4_POLL_RETRY_MAX) | ||
326 | *timeout = NFS4_POLL_RETRY_MAX; | ||
327 | freezable_schedule_timeout_killable_unsafe(*timeout); | ||
328 | if (fatal_signal_pending(current)) | 339 | if (fatal_signal_pending(current)) |
329 | res = -ERESTARTSYS; | 340 | res = -ERESTARTSYS; |
330 | *timeout <<= 1; | ||
331 | return res; | 341 | return res; |
332 | } | 342 | } |
333 | 343 | ||
@@ -1307,15 +1317,13 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) | |||
1307 | int ret = -EAGAIN; | 1317 | int ret = -EAGAIN; |
1308 | 1318 | ||
1309 | for (;;) { | 1319 | for (;;) { |
1320 | spin_lock(&state->owner->so_lock); | ||
1310 | if (can_open_cached(state, fmode, open_mode)) { | 1321 | if (can_open_cached(state, fmode, open_mode)) { |
1311 | spin_lock(&state->owner->so_lock); | 1322 | update_open_stateflags(state, fmode); |
1312 | if (can_open_cached(state, fmode, open_mode)) { | ||
1313 | update_open_stateflags(state, fmode); | ||
1314 | spin_unlock(&state->owner->so_lock); | ||
1315 | goto out_return_state; | ||
1316 | } | ||
1317 | spin_unlock(&state->owner->so_lock); | 1323 | spin_unlock(&state->owner->so_lock); |
1324 | goto out_return_state; | ||
1318 | } | 1325 | } |
1326 | spin_unlock(&state->owner->so_lock); | ||
1319 | rcu_read_lock(); | 1327 | rcu_read_lock(); |
1320 | delegation = rcu_dereference(nfsi->delegation); | 1328 | delegation = rcu_dereference(nfsi->delegation); |
1321 | if (!can_open_delegated(delegation, fmode)) { | 1329 | if (!can_open_delegated(delegation, fmode)) { |
@@ -2589,7 +2597,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
2589 | if (calldata->arg.fmode == 0) | 2597 | if (calldata->arg.fmode == 0) |
2590 | break; | 2598 | break; |
2591 | default: | 2599 | default: |
2592 | if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { | 2600 | if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { |
2593 | rpc_restart_call_prepare(task); | 2601 | rpc_restart_call_prepare(task); |
2594 | goto out_release; | 2602 | goto out_release; |
2595 | } | 2603 | } |
@@ -3217,7 +3225,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
3217 | struct nfs4_label *label = NULL; | 3225 | struct nfs4_label *label = NULL; |
3218 | int status; | 3226 | int status; |
3219 | 3227 | ||
3220 | if (pnfs_ld_layoutret_on_setattr(inode)) | 3228 | if (pnfs_ld_layoutret_on_setattr(inode) && |
3229 | sattr->ia_valid & ATTR_SIZE && | ||
3230 | sattr->ia_size < i_size_read(inode)) | ||
3221 | pnfs_commit_and_return_layout(inode); | 3231 | pnfs_commit_and_return_layout(inode); |
3222 | 3232 | ||
3223 | nfs_fattr_init(fattr); | 3233 | nfs_fattr_init(fattr); |
@@ -3576,7 +3586,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) | |||
3576 | 3586 | ||
3577 | if (!nfs4_sequence_done(task, &res->seq_res)) | 3587 | if (!nfs4_sequence_done(task, &res->seq_res)) |
3578 | return 0; | 3588 | return 0; |
3579 | if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) | 3589 | if (nfs4_async_handle_error(task, res->server, NULL, |
3590 | &data->timeout) == -EAGAIN) | ||
3580 | return 0; | 3591 | return 0; |
3581 | update_changeattr(dir, &res->cinfo); | 3592 | update_changeattr(dir, &res->cinfo); |
3582 | return 1; | 3593 | return 1; |
@@ -3609,7 +3620,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | |||
3609 | 3620 | ||
3610 | if (!nfs4_sequence_done(task, &res->seq_res)) | 3621 | if (!nfs4_sequence_done(task, &res->seq_res)) |
3611 | return 0; | 3622 | return 0; |
3612 | if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) | 3623 | if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) |
3613 | return 0; | 3624 | return 0; |
3614 | 3625 | ||
3615 | update_changeattr(old_dir, &res->old_cinfo); | 3626 | update_changeattr(old_dir, &res->old_cinfo); |
@@ -4113,7 +4124,8 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) | |||
4113 | 4124 | ||
4114 | trace_nfs4_read(hdr, task->tk_status); | 4125 | trace_nfs4_read(hdr, task->tk_status); |
4115 | if (nfs4_async_handle_error(task, server, | 4126 | if (nfs4_async_handle_error(task, server, |
4116 | hdr->args.context->state) == -EAGAIN) { | 4127 | hdr->args.context->state, |
4128 | NULL) == -EAGAIN) { | ||
4117 | rpc_restart_call_prepare(task); | 4129 | rpc_restart_call_prepare(task); |
4118 | return -EAGAIN; | 4130 | return -EAGAIN; |
4119 | } | 4131 | } |
@@ -4181,10 +4193,11 @@ static int nfs4_write_done_cb(struct rpc_task *task, | |||
4181 | struct nfs_pgio_header *hdr) | 4193 | struct nfs_pgio_header *hdr) |
4182 | { | 4194 | { |
4183 | struct inode *inode = hdr->inode; | 4195 | struct inode *inode = hdr->inode; |
4184 | 4196 | ||
4185 | trace_nfs4_write(hdr, task->tk_status); | 4197 | trace_nfs4_write(hdr, task->tk_status); |
4186 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), | 4198 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), |
4187 | hdr->args.context->state) == -EAGAIN) { | 4199 | hdr->args.context->state, |
4200 | NULL) == -EAGAIN) { | ||
4188 | rpc_restart_call_prepare(task); | 4201 | rpc_restart_call_prepare(task); |
4189 | return -EAGAIN; | 4202 | return -EAGAIN; |
4190 | } | 4203 | } |
@@ -4264,7 +4277,8 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da | |||
4264 | struct inode *inode = data->inode; | 4277 | struct inode *inode = data->inode; |
4265 | 4278 | ||
4266 | trace_nfs4_commit(data, task->tk_status); | 4279 | trace_nfs4_commit(data, task->tk_status); |
4267 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { | 4280 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), |
4281 | NULL, NULL) == -EAGAIN) { | ||
4268 | rpc_restart_call_prepare(task); | 4282 | rpc_restart_call_prepare(task); |
4269 | return -EAGAIN; | 4283 | return -EAGAIN; |
4270 | } | 4284 | } |
@@ -4817,7 +4831,8 @@ out: | |||
4817 | 4831 | ||
4818 | 4832 | ||
4819 | static int | 4833 | static int |
4820 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) | 4834 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, |
4835 | struct nfs4_state *state, long *timeout) | ||
4821 | { | 4836 | { |
4822 | struct nfs_client *clp = server->nfs_client; | 4837 | struct nfs_client *clp = server->nfs_client; |
4823 | 4838 | ||
@@ -4867,6 +4882,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
4867 | #endif /* CONFIG_NFS_V4_1 */ | 4882 | #endif /* CONFIG_NFS_V4_1 */ |
4868 | case -NFS4ERR_DELAY: | 4883 | case -NFS4ERR_DELAY: |
4869 | nfs_inc_server_stats(server, NFSIOS_DELAY); | 4884 | nfs_inc_server_stats(server, NFSIOS_DELAY); |
4885 | rpc_delay(task, nfs4_update_delay(timeout)); | ||
4886 | goto restart_call; | ||
4870 | case -NFS4ERR_GRACE: | 4887 | case -NFS4ERR_GRACE: |
4871 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | 4888 | rpc_delay(task, NFS4_POLL_RETRY_MAX); |
4872 | case -NFS4ERR_RETRY_UNCACHED_REP: | 4889 | case -NFS4ERR_RETRY_UNCACHED_REP: |
@@ -5107,8 +5124,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
5107 | pnfs_roc_set_barrier(data->inode, data->roc_barrier); | 5124 | pnfs_roc_set_barrier(data->inode, data->roc_barrier); |
5108 | break; | 5125 | break; |
5109 | default: | 5126 | default: |
5110 | if (nfs4_async_handle_error(task, data->res.server, NULL) == | 5127 | if (nfs4_async_handle_error(task, data->res.server, |
5111 | -EAGAIN) { | 5128 | NULL, NULL) == -EAGAIN) { |
5112 | rpc_restart_call_prepare(task); | 5129 | rpc_restart_call_prepare(task); |
5113 | return; | 5130 | return; |
5114 | } | 5131 | } |
@@ -5372,7 +5389,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
5372 | case -NFS4ERR_EXPIRED: | 5389 | case -NFS4ERR_EXPIRED: |
5373 | break; | 5390 | break; |
5374 | default: | 5391 | default: |
5375 | if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) | 5392 | if (nfs4_async_handle_error(task, calldata->server, |
5393 | NULL, NULL) == -EAGAIN) | ||
5376 | rpc_restart_call_prepare(task); | 5394 | rpc_restart_call_prepare(task); |
5377 | } | 5395 | } |
5378 | nfs_release_seqid(calldata->arg.seqid); | 5396 | nfs_release_seqid(calldata->arg.seqid); |
@@ -5978,7 +5996,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) | |||
5978 | break; | 5996 | break; |
5979 | case -NFS4ERR_LEASE_MOVED: | 5997 | case -NFS4ERR_LEASE_MOVED: |
5980 | case -NFS4ERR_DELAY: | 5998 | case -NFS4ERR_DELAY: |
5981 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) | 5999 | if (nfs4_async_handle_error(task, server, |
6000 | NULL, NULL) == -EAGAIN) | ||
5982 | rpc_restart_call_prepare(task); | 6001 | rpc_restart_call_prepare(task); |
5983 | } | 6002 | } |
5984 | } | 6003 | } |
@@ -7353,7 +7372,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr | |||
7353 | int ret = 0; | 7372 | int ret = 0; |
7354 | 7373 | ||
7355 | if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) | 7374 | if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) |
7356 | return 0; | 7375 | return -EAGAIN; |
7357 | task = _nfs41_proc_sequence(clp, cred, false); | 7376 | task = _nfs41_proc_sequence(clp, cred, false); |
7358 | if (IS_ERR(task)) | 7377 | if (IS_ERR(task)) |
7359 | ret = PTR_ERR(task); | 7378 | ret = PTR_ERR(task); |
@@ -7583,14 +7602,19 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | |||
7583 | } else { | 7602 | } else { |
7584 | LIST_HEAD(head); | 7603 | LIST_HEAD(head); |
7585 | 7604 | ||
7605 | /* | ||
7606 | * Mark the bad layout state as invalid, then retry | ||
7607 | * with the current stateid. | ||
7608 | */ | ||
7586 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); | 7609 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); |
7587 | spin_unlock(&inode->i_lock); | 7610 | spin_unlock(&inode->i_lock); |
7588 | /* Mark the bad layout state as invalid, then | ||
7589 | * retry using the open stateid. */ | ||
7590 | pnfs_free_lseg_list(&head); | 7611 | pnfs_free_lseg_list(&head); |
7612 | |||
7613 | task->tk_status = 0; | ||
7614 | rpc_restart_call_prepare(task); | ||
7591 | } | 7615 | } |
7592 | } | 7616 | } |
7593 | if (nfs4_async_handle_error(task, server, state) == -EAGAIN) | 7617 | if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) |
7594 | rpc_restart_call_prepare(task); | 7618 | rpc_restart_call_prepare(task); |
7595 | out: | 7619 | out: |
7596 | dprintk("<-- %s\n", __func__); | 7620 | dprintk("<-- %s\n", __func__); |
@@ -7750,7 +7774,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
7750 | case 0: | 7774 | case 0: |
7751 | break; | 7775 | break; |
7752 | case -NFS4ERR_DELAY: | 7776 | case -NFS4ERR_DELAY: |
7753 | if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN) | 7777 | if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) |
7754 | break; | 7778 | break; |
7755 | rpc_restart_call_prepare(task); | 7779 | rpc_restart_call_prepare(task); |
7756 | return; | 7780 | return; |
@@ -7809,54 +7833,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||
7809 | return status; | 7833 | return status; |
7810 | } | 7834 | } |
7811 | 7835 | ||
7812 | /* | ||
7813 | * Retrieve the list of Data Server devices from the MDS. | ||
7814 | */ | ||
7815 | static int _nfs4_getdevicelist(struct nfs_server *server, | ||
7816 | const struct nfs_fh *fh, | ||
7817 | struct pnfs_devicelist *devlist) | ||
7818 | { | ||
7819 | struct nfs4_getdevicelist_args args = { | ||
7820 | .fh = fh, | ||
7821 | .layoutclass = server->pnfs_curr_ld->id, | ||
7822 | }; | ||
7823 | struct nfs4_getdevicelist_res res = { | ||
7824 | .devlist = devlist, | ||
7825 | }; | ||
7826 | struct rpc_message msg = { | ||
7827 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST], | ||
7828 | .rpc_argp = &args, | ||
7829 | .rpc_resp = &res, | ||
7830 | }; | ||
7831 | int status; | ||
7832 | |||
7833 | dprintk("--> %s\n", __func__); | ||
7834 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, | ||
7835 | &res.seq_res, 0); | ||
7836 | dprintk("<-- %s status=%d\n", __func__, status); | ||
7837 | return status; | ||
7838 | } | ||
7839 | |||
7840 | int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
7841 | const struct nfs_fh *fh, | ||
7842 | struct pnfs_devicelist *devlist) | ||
7843 | { | ||
7844 | struct nfs4_exception exception = { }; | ||
7845 | int err; | ||
7846 | |||
7847 | do { | ||
7848 | err = nfs4_handle_exception(server, | ||
7849 | _nfs4_getdevicelist(server, fh, devlist), | ||
7850 | &exception); | ||
7851 | } while (exception.retry); | ||
7852 | |||
7853 | dprintk("%s: err=%d, num_devs=%u\n", __func__, | ||
7854 | err, devlist->num_devs); | ||
7855 | |||
7856 | return err; | ||
7857 | } | ||
7858 | EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); | ||
7859 | |||
7860 | static int | 7836 | static int |
7861 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, | 7837 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, |
7862 | struct pnfs_device *pdev, | 7838 | struct pnfs_device *pdev, |
@@ -7929,7 +7905,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) | |||
7929 | case 0: | 7905 | case 0: |
7930 | break; | 7906 | break; |
7931 | default: | 7907 | default: |
7932 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | 7908 | if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) { |
7933 | rpc_restart_call_prepare(task); | 7909 | rpc_restart_call_prepare(task); |
7934 | return; | 7910 | return; |
7935 | } | 7911 | } |
@@ -8225,7 +8201,7 @@ static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata) | |||
8225 | 8201 | ||
8226 | switch (task->tk_status) { | 8202 | switch (task->tk_status) { |
8227 | case -NFS4ERR_DELAY: | 8203 | case -NFS4ERR_DELAY: |
8228 | if (nfs4_async_handle_error(task, data->server, NULL) == -EAGAIN) | 8204 | if (nfs4_async_handle_error(task, data->server, NULL, NULL) == -EAGAIN) |
8229 | rpc_restart_call_prepare(task); | 8205 | rpc_restart_call_prepare(task); |
8230 | } | 8206 | } |
8231 | } | 8207 | } |
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 1720d32ffa54..e1ba58c3d1ad 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c | |||
@@ -88,10 +88,18 @@ nfs4_renew_state(struct work_struct *work) | |||
88 | } | 88 | } |
89 | nfs_expire_all_delegations(clp); | 89 | nfs_expire_all_delegations(clp); |
90 | } else { | 90 | } else { |
91 | int ret; | ||
92 | |||
91 | /* Queue an asynchronous RENEW. */ | 93 | /* Queue an asynchronous RENEW. */ |
92 | ops->sched_state_renewal(clp, cred, renew_flags); | 94 | ret = ops->sched_state_renewal(clp, cred, renew_flags); |
93 | put_rpccred(cred); | 95 | put_rpccred(cred); |
94 | goto out_exp; | 96 | switch (ret) { |
97 | default: | ||
98 | goto out_exp; | ||
99 | case -EAGAIN: | ||
100 | case -ENOMEM: | ||
101 | break; | ||
102 | } | ||
95 | } | 103 | } |
96 | } else { | 104 | } else { |
97 | dprintk("%s: failed to call renewd. Reason: lease not expired \n", | 105 | dprintk("%s: failed to call renewd. Reason: lease not expired \n", |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 22fe35104c0c..5194933ed419 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1705,7 +1705,8 @@ restart: | |||
1705 | if (status < 0) { | 1705 | if (status < 0) { |
1706 | set_bit(ops->owner_flag_bit, &sp->so_flags); | 1706 | set_bit(ops->owner_flag_bit, &sp->so_flags); |
1707 | nfs4_put_state_owner(sp); | 1707 | nfs4_put_state_owner(sp); |
1708 | return nfs4_recovery_handle_error(clp, status); | 1708 | status = nfs4_recovery_handle_error(clp, status); |
1709 | return (status != 0) ? status : -EAGAIN; | ||
1709 | } | 1710 | } |
1710 | 1711 | ||
1711 | nfs4_put_state_owner(sp); | 1712 | nfs4_put_state_owner(sp); |
@@ -1714,7 +1715,7 @@ restart: | |||
1714 | spin_unlock(&clp->cl_lock); | 1715 | spin_unlock(&clp->cl_lock); |
1715 | } | 1716 | } |
1716 | rcu_read_unlock(); | 1717 | rcu_read_unlock(); |
1717 | return status; | 1718 | return 0; |
1718 | } | 1719 | } |
1719 | 1720 | ||
1720 | static int nfs4_check_lease(struct nfs_client *clp) | 1721 | static int nfs4_check_lease(struct nfs_client *clp) |
@@ -1761,7 +1762,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) | |||
1761 | break; | 1762 | break; |
1762 | case -NFS4ERR_STALE_CLIENTID: | 1763 | case -NFS4ERR_STALE_CLIENTID: |
1763 | clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); | 1764 | clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); |
1764 | nfs4_state_clear_reclaim_reboot(clp); | ||
1765 | nfs4_state_start_reclaim_reboot(clp); | 1765 | nfs4_state_start_reclaim_reboot(clp); |
1766 | break; | 1766 | break; |
1767 | case -NFS4ERR_CLID_INUSE: | 1767 | case -NFS4ERR_CLID_INUSE: |
@@ -2345,6 +2345,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
2345 | status = nfs4_check_lease(clp); | 2345 | status = nfs4_check_lease(clp); |
2346 | if (status < 0) | 2346 | if (status < 0) |
2347 | goto out_error; | 2347 | goto out_error; |
2348 | continue; | ||
2348 | } | 2349 | } |
2349 | 2350 | ||
2350 | if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { | 2351 | if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { |
@@ -2366,14 +2367,11 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
2366 | section = "reclaim reboot"; | 2367 | section = "reclaim reboot"; |
2367 | status = nfs4_do_reclaim(clp, | 2368 | status = nfs4_do_reclaim(clp, |
2368 | clp->cl_mvops->reboot_recovery_ops); | 2369 | clp->cl_mvops->reboot_recovery_ops); |
2369 | if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || | 2370 | if (status == -EAGAIN) |
2370 | test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | ||
2371 | continue; | ||
2372 | nfs4_state_end_reclaim_reboot(clp); | ||
2373 | if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) | ||
2374 | continue; | 2371 | continue; |
2375 | if (status < 0) | 2372 | if (status < 0) |
2376 | goto out_error; | 2373 | goto out_error; |
2374 | nfs4_state_end_reclaim_reboot(clp); | ||
2377 | } | 2375 | } |
2378 | 2376 | ||
2379 | /* Now recover expired state... */ | 2377 | /* Now recover expired state... */ |
@@ -2381,9 +2379,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
2381 | section = "reclaim nograce"; | 2379 | section = "reclaim nograce"; |
2382 | status = nfs4_do_reclaim(clp, | 2380 | status = nfs4_do_reclaim(clp, |
2383 | clp->cl_mvops->nograce_recovery_ops); | 2381 | clp->cl_mvops->nograce_recovery_ops); |
2384 | if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || | 2382 | if (status == -EAGAIN) |
2385 | test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || | ||
2386 | test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) | ||
2387 | continue; | 2383 | continue; |
2388 | if (status < 0) | 2384 | if (status < 0) |
2389 | goto out_error; | 2385 | goto out_error; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e13b59d8d9aa..005d03c5d274 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -362,25 +362,19 @@ static int nfs4_stat_to_errno(int); | |||
362 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) | 362 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) |
363 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) | 363 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) |
364 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) | 364 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) |
365 | #define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \ | 365 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + \ |
366 | encode_verifier_maxsz) | 366 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE) + \ |
367 | #define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \ | 367 | 1 /* layout type */ + \ |
368 | 2 /* nfs_cookie4 gdlr_cookie */ + \ | 368 | 1 /* maxcount */ + \ |
369 | decode_verifier_maxsz \ | 369 | 1 /* bitmap size */ + \ |
370 | /* verifier4 gdlr_verifier */ + \ | 370 | 1 /* notification bitmap length */ + \ |
371 | 1 /* gdlr_deviceid_list count */ + \ | 371 | 1 /* notification bitmap, word 0 */) |
372 | XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \ | ||
373 | NFS4_DEVICEID4_SIZE) \ | ||
374 | /* gdlr_deviceid_list */ + \ | ||
375 | 1 /* bool gdlr_eof */) | ||
376 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ | ||
377 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) | ||
378 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ | 372 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ |
379 | 1 /* layout type */ + \ | 373 | 1 /* layout type */ + \ |
380 | 1 /* opaque devaddr4 length */ + \ | 374 | 1 /* opaque devaddr4 length */ + \ |
381 | /* devaddr4 payload is read into page */ \ | 375 | /* devaddr4 payload is read into page */ \ |
382 | 1 /* notification bitmap length */ + \ | 376 | 1 /* notification bitmap length */ + \ |
383 | 1 /* notification bitmap */) | 377 | 1 /* notification bitmap, word 0 */) |
384 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ | 378 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ |
385 | encode_stateid_maxsz) | 379 | encode_stateid_maxsz) |
386 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ | 380 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ |
@@ -395,7 +389,10 @@ static int nfs4_stat_to_errno(int); | |||
395 | 2 /* last byte written */ + \ | 389 | 2 /* last byte written */ + \ |
396 | 1 /* nt_timechanged (false) */ + \ | 390 | 1 /* nt_timechanged (false) */ + \ |
397 | 1 /* layoutupdate4 layout type */ + \ | 391 | 1 /* layoutupdate4 layout type */ + \ |
398 | 1 /* NULL filelayout layoutupdate4 payload */) | 392 | 1 /* layoutupdate4 opaqueue len */) |
393 | /* the actual content of layoutupdate4 should | ||
394 | be allocated by drivers and spliced in | ||
395 | using xdr_write_pages */ | ||
399 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) | 396 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) |
400 | #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ | 397 | #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ |
401 | encode_stateid_maxsz + \ | 398 | encode_stateid_maxsz + \ |
@@ -809,14 +806,6 @@ static int nfs4_stat_to_errno(int); | |||
809 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ | 806 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ |
810 | decode_sequence_maxsz + \ | 807 | decode_sequence_maxsz + \ |
811 | decode_reclaim_complete_maxsz) | 808 | decode_reclaim_complete_maxsz) |
812 | #define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \ | ||
813 | encode_sequence_maxsz + \ | ||
814 | encode_putfh_maxsz + \ | ||
815 | encode_getdevicelist_maxsz) | ||
816 | #define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \ | ||
817 | decode_sequence_maxsz + \ | ||
818 | decode_putfh_maxsz + \ | ||
819 | decode_getdevicelist_maxsz) | ||
820 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ | 809 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ |
821 | encode_sequence_maxsz +\ | 810 | encode_sequence_maxsz +\ |
822 | encode_getdeviceinfo_maxsz) | 811 | encode_getdeviceinfo_maxsz) |
@@ -1927,24 +1916,6 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1927 | 1916 | ||
1928 | #ifdef CONFIG_NFS_V4_1 | 1917 | #ifdef CONFIG_NFS_V4_1 |
1929 | static void | 1918 | static void |
1930 | encode_getdevicelist(struct xdr_stream *xdr, | ||
1931 | const struct nfs4_getdevicelist_args *args, | ||
1932 | struct compound_hdr *hdr) | ||
1933 | { | ||
1934 | __be32 *p; | ||
1935 | nfs4_verifier dummy = { | ||
1936 | .data = "dummmmmy", | ||
1937 | }; | ||
1938 | |||
1939 | encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr); | ||
1940 | p = reserve_space(xdr, 16); | ||
1941 | *p++ = cpu_to_be32(args->layoutclass); | ||
1942 | *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); | ||
1943 | xdr_encode_hyper(p, 0ULL); /* cookie */ | ||
1944 | encode_nfs4_verifier(xdr, &dummy); | ||
1945 | } | ||
1946 | |||
1947 | static void | ||
1948 | encode_getdeviceinfo(struct xdr_stream *xdr, | 1919 | encode_getdeviceinfo(struct xdr_stream *xdr, |
1949 | const struct nfs4_getdeviceinfo_args *args, | 1920 | const struct nfs4_getdeviceinfo_args *args, |
1950 | struct compound_hdr *hdr) | 1921 | struct compound_hdr *hdr) |
@@ -1952,12 +1923,15 @@ encode_getdeviceinfo(struct xdr_stream *xdr, | |||
1952 | __be32 *p; | 1923 | __be32 *p; |
1953 | 1924 | ||
1954 | encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); | 1925 | encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); |
1955 | p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE); | 1926 | p = reserve_space(xdr, NFS4_DEVICEID4_SIZE + 4 + 4); |
1956 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | 1927 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, |
1957 | NFS4_DEVICEID4_SIZE); | 1928 | NFS4_DEVICEID4_SIZE); |
1958 | *p++ = cpu_to_be32(args->pdev->layout_type); | 1929 | *p++ = cpu_to_be32(args->pdev->layout_type); |
1959 | *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */ | 1930 | *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */ |
1960 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | 1931 | |
1932 | p = reserve_space(xdr, 4 + 4); | ||
1933 | *p++ = cpu_to_be32(1); /* bitmap length */ | ||
1934 | *p++ = cpu_to_be32(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE); | ||
1961 | } | 1935 | } |
1962 | 1936 | ||
1963 | static void | 1937 | static void |
@@ -1990,7 +1964,7 @@ encode_layoutget(struct xdr_stream *xdr, | |||
1990 | static int | 1964 | static int |
1991 | encode_layoutcommit(struct xdr_stream *xdr, | 1965 | encode_layoutcommit(struct xdr_stream *xdr, |
1992 | struct inode *inode, | 1966 | struct inode *inode, |
1993 | const struct nfs4_layoutcommit_args *args, | 1967 | struct nfs4_layoutcommit_args *args, |
1994 | struct compound_hdr *hdr) | 1968 | struct compound_hdr *hdr) |
1995 | { | 1969 | { |
1996 | __be32 *p; | 1970 | __be32 *p; |
@@ -2011,11 +1985,16 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
2011 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ | 1985 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ |
2012 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ | 1986 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ |
2013 | 1987 | ||
2014 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) | 1988 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) { |
2015 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( | 1989 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( |
2016 | NFS_I(inode)->layout, xdr, args); | 1990 | NFS_I(inode)->layout, xdr, args); |
2017 | else | 1991 | } else { |
2018 | encode_uint32(xdr, 0); /* no layout-type payload */ | 1992 | encode_uint32(xdr, args->layoutupdate_len); |
1993 | if (args->layoutupdate_pages) { | ||
1994 | xdr_write_pages(xdr, args->layoutupdate_pages, 0, | ||
1995 | args->layoutupdate_len); | ||
1996 | } | ||
1997 | } | ||
2019 | 1998 | ||
2020 | return 0; | 1999 | return 0; |
2021 | } | 2000 | } |
@@ -2893,24 +2872,6 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, | |||
2893 | } | 2872 | } |
2894 | 2873 | ||
2895 | /* | 2874 | /* |
2896 | * Encode GETDEVICELIST request | ||
2897 | */ | ||
2898 | static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, | ||
2899 | struct xdr_stream *xdr, | ||
2900 | struct nfs4_getdevicelist_args *args) | ||
2901 | { | ||
2902 | struct compound_hdr hdr = { | ||
2903 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2904 | }; | ||
2905 | |||
2906 | encode_compound_hdr(xdr, req, &hdr); | ||
2907 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2908 | encode_putfh(xdr, args->fh, &hdr); | ||
2909 | encode_getdevicelist(xdr, args, &hdr); | ||
2910 | encode_nops(&hdr); | ||
2911 | } | ||
2912 | |||
2913 | /* | ||
2914 | * Encode GETDEVICEINFO request | 2875 | * Encode GETDEVICEINFO request |
2915 | */ | 2876 | */ |
2916 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, | 2877 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, |
@@ -5765,54 +5726,6 @@ out_overflow: | |||
5765 | } | 5726 | } |
5766 | 5727 | ||
5767 | #if defined(CONFIG_NFS_V4_1) | 5728 | #if defined(CONFIG_NFS_V4_1) |
5768 | /* | ||
5769 | * TODO: Need to handle case when EOF != true; | ||
5770 | */ | ||
5771 | static int decode_getdevicelist(struct xdr_stream *xdr, | ||
5772 | struct pnfs_devicelist *res) | ||
5773 | { | ||
5774 | __be32 *p; | ||
5775 | int status, i; | ||
5776 | nfs4_verifier verftemp; | ||
5777 | |||
5778 | status = decode_op_hdr(xdr, OP_GETDEVICELIST); | ||
5779 | if (status) | ||
5780 | return status; | ||
5781 | |||
5782 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | ||
5783 | if (unlikely(!p)) | ||
5784 | goto out_overflow; | ||
5785 | |||
5786 | /* TODO: Skip cookie for now */ | ||
5787 | p += 2; | ||
5788 | |||
5789 | /* Read verifier */ | ||
5790 | p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE); | ||
5791 | |||
5792 | res->num_devs = be32_to_cpup(p); | ||
5793 | |||
5794 | dprintk("%s: num_dev %d\n", __func__, res->num_devs); | ||
5795 | |||
5796 | if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { | ||
5797 | printk(KERN_ERR "NFS: %s too many result dev_num %u\n", | ||
5798 | __func__, res->num_devs); | ||
5799 | return -EIO; | ||
5800 | } | ||
5801 | |||
5802 | p = xdr_inline_decode(xdr, | ||
5803 | res->num_devs * NFS4_DEVICEID4_SIZE + 4); | ||
5804 | if (unlikely(!p)) | ||
5805 | goto out_overflow; | ||
5806 | for (i = 0; i < res->num_devs; i++) | ||
5807 | p = xdr_decode_opaque_fixed(p, res->dev_id[i].data, | ||
5808 | NFS4_DEVICEID4_SIZE); | ||
5809 | res->eof = be32_to_cpup(p); | ||
5810 | return 0; | ||
5811 | out_overflow: | ||
5812 | print_overflow_msg(__func__, xdr); | ||
5813 | return -EIO; | ||
5814 | } | ||
5815 | |||
5816 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | 5729 | static int decode_getdeviceinfo(struct xdr_stream *xdr, |
5817 | struct pnfs_device *pdev) | 5730 | struct pnfs_device *pdev) |
5818 | { | 5731 | { |
@@ -5862,9 +5775,16 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, | |||
5862 | p = xdr_inline_decode(xdr, 4 * len); | 5775 | p = xdr_inline_decode(xdr, 4 * len); |
5863 | if (unlikely(!p)) | 5776 | if (unlikely(!p)) |
5864 | goto out_overflow; | 5777 | goto out_overflow; |
5865 | for (i = 0; i < len; i++, p++) { | 5778 | |
5866 | if (be32_to_cpup(p)) { | 5779 | if (be32_to_cpup(p++) & |
5867 | dprintk("%s: notifications not supported\n", | 5780 | ~(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE)) { |
5781 | dprintk("%s: unsupported notification\n", | ||
5782 | __func__); | ||
5783 | } | ||
5784 | |||
5785 | for (i = 1; i < len; i++) { | ||
5786 | if (be32_to_cpup(p++)) { | ||
5787 | dprintk("%s: unsupported notification\n", | ||
5868 | __func__); | 5788 | __func__); |
5869 | return -EIO; | 5789 | return -EIO; |
5870 | } | 5790 | } |
@@ -7097,32 +7017,6 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, | |||
7097 | } | 7017 | } |
7098 | 7018 | ||
7099 | /* | 7019 | /* |
7100 | * Decode GETDEVICELIST response | ||
7101 | */ | ||
7102 | static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, | ||
7103 | struct xdr_stream *xdr, | ||
7104 | struct nfs4_getdevicelist_res *res) | ||
7105 | { | ||
7106 | struct compound_hdr hdr; | ||
7107 | int status; | ||
7108 | |||
7109 | dprintk("encoding getdevicelist!\n"); | ||
7110 | |||
7111 | status = decode_compound_hdr(xdr, &hdr); | ||
7112 | if (status != 0) | ||
7113 | goto out; | ||
7114 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
7115 | if (status != 0) | ||
7116 | goto out; | ||
7117 | status = decode_putfh(xdr); | ||
7118 | if (status != 0) | ||
7119 | goto out; | ||
7120 | status = decode_getdevicelist(xdr, res->devlist); | ||
7121 | out: | ||
7122 | return status; | ||
7123 | } | ||
7124 | |||
7125 | /* | ||
7126 | * Decode GETDEVINFO response | 7020 | * Decode GETDEVINFO response |
7127 | */ | 7021 | */ |
7128 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, | 7022 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, |
@@ -7490,7 +7384,6 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
7490 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), | 7384 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), |
7491 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), | 7385 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), |
7492 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), | 7386 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), |
7493 | PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), | ||
7494 | PROC(BIND_CONN_TO_SESSION, | 7387 | PROC(BIND_CONN_TO_SESSION, |
7495 | enc_bind_conn_to_session, dec_bind_conn_to_session), | 7388 | enc_bind_conn_to_session, dec_bind_conn_to_session), |
7496 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), | 7389 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index ae05278b3761..c6e4bda63000 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -60,52 +60,6 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d) | |||
60 | kfree(de); | 60 | kfree(de); |
61 | } | 61 | } |
62 | 62 | ||
63 | static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, | ||
64 | const struct nfs4_deviceid *d_id) | ||
65 | { | ||
66 | struct nfs4_deviceid_node *d; | ||
67 | struct objio_dev_ent *de; | ||
68 | |||
69 | d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); | ||
70 | if (!d) | ||
71 | return NULL; | ||
72 | |||
73 | de = container_of(d, struct objio_dev_ent, id_node); | ||
74 | return de; | ||
75 | } | ||
76 | |||
77 | static struct objio_dev_ent * | ||
78 | _dev_list_add(const struct nfs_server *nfss, | ||
79 | const struct nfs4_deviceid *d_id, struct osd_dev *od, | ||
80 | gfp_t gfp_flags) | ||
81 | { | ||
82 | struct nfs4_deviceid_node *d; | ||
83 | struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); | ||
84 | struct objio_dev_ent *n; | ||
85 | |||
86 | if (!de) { | ||
87 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | ||
88 | return NULL; | ||
89 | } | ||
90 | |||
91 | dprintk("%s: Adding od=%p\n", __func__, od); | ||
92 | nfs4_init_deviceid_node(&de->id_node, | ||
93 | nfss->pnfs_curr_ld, | ||
94 | nfss->nfs_client, | ||
95 | d_id); | ||
96 | de->od.od = od; | ||
97 | |||
98 | d = nfs4_insert_deviceid_node(&de->id_node); | ||
99 | n = container_of(d, struct objio_dev_ent, id_node); | ||
100 | if (n != de) { | ||
101 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od); | ||
102 | objio_free_deviceid_node(&de->id_node); | ||
103 | de = n; | ||
104 | } | ||
105 | |||
106 | return de; | ||
107 | } | ||
108 | |||
109 | struct objio_segment { | 63 | struct objio_segment { |
110 | struct pnfs_layout_segment lseg; | 64 | struct pnfs_layout_segment lseg; |
111 | 65 | ||
@@ -130,29 +84,24 @@ struct objio_state { | |||
130 | 84 | ||
131 | /* Send and wait for a get_device_info of devices in the layout, | 85 | /* Send and wait for a get_device_info of devices in the layout, |
132 | then look them up with the osd_initiator library */ | 86 | then look them up with the osd_initiator library */ |
133 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, | 87 | struct nfs4_deviceid_node * |
134 | struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id, | 88 | objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, |
135 | gfp_t gfp_flags) | 89 | gfp_t gfp_flags) |
136 | { | 90 | { |
137 | struct pnfs_osd_deviceaddr *deviceaddr; | 91 | struct pnfs_osd_deviceaddr *deviceaddr; |
138 | struct objio_dev_ent *ode; | 92 | struct objio_dev_ent *ode = NULL; |
139 | struct osd_dev *od; | 93 | struct osd_dev *od; |
140 | struct osd_dev_info odi; | 94 | struct osd_dev_info odi; |
141 | bool retry_flag = true; | 95 | bool retry_flag = true; |
96 | __be32 *p; | ||
142 | int err; | 97 | int err; |
143 | 98 | ||
144 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); | 99 | deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags); |
145 | if (ode) { | 100 | if (!deviceaddr) |
146 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ | 101 | return NULL; |
147 | return 0; | ||
148 | } | ||
149 | 102 | ||
150 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); | 103 | p = page_address(pdev->pages[0]); |
151 | if (unlikely(err)) { | 104 | pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p); |
152 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", | ||
153 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); | ||
154 | return err; | ||
155 | } | ||
156 | 105 | ||
157 | odi.systemid_len = deviceaddr->oda_systemid.len; | 106 | odi.systemid_len = deviceaddr->oda_systemid.len; |
158 | if (odi.systemid_len > sizeof(odi.systemid)) { | 107 | if (odi.systemid_len > sizeof(odi.systemid)) { |
@@ -188,14 +137,24 @@ retry_lookup: | |||
188 | goto out; | 137 | goto out; |
189 | } | 138 | } |
190 | 139 | ||
191 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, | ||
192 | gfp_flags); | ||
193 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ | ||
194 | dprintk("Adding new dev_id(%llx:%llx)\n", | 140 | dprintk("Adding new dev_id(%llx:%llx)\n", |
195 | _DEVID_LO(d_id), _DEVID_HI(d_id)); | 141 | _DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id)); |
142 | |||
143 | ode = kzalloc(sizeof(*ode), gfp_flags); | ||
144 | if (!ode) { | ||
145 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | ||
146 | goto out; | ||
147 | } | ||
148 | |||
149 | nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id); | ||
150 | kfree(deviceaddr); | ||
151 | |||
152 | ode->od.od = od; | ||
153 | return &ode->id_node; | ||
154 | |||
196 | out: | 155 | out: |
197 | objlayout_put_deviceinfo(deviceaddr); | 156 | kfree(deviceaddr); |
198 | return err; | 157 | return NULL; |
199 | } | 158 | } |
200 | 159 | ||
201 | static void copy_single_comp(struct ore_components *oc, unsigned c, | 160 | static void copy_single_comp(struct ore_components *oc, unsigned c, |
@@ -254,6 +213,7 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
254 | struct xdr_stream *xdr, | 213 | struct xdr_stream *xdr, |
255 | gfp_t gfp_flags) | 214 | gfp_t gfp_flags) |
256 | { | 215 | { |
216 | struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode); | ||
257 | struct objio_segment *objio_seg; | 217 | struct objio_segment *objio_seg; |
258 | struct pnfs_osd_xdr_decode_layout_iter iter; | 218 | struct pnfs_osd_xdr_decode_layout_iter iter; |
259 | struct pnfs_osd_layout layout; | 219 | struct pnfs_osd_layout layout; |
@@ -283,13 +243,21 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
283 | objio_seg->oc.first_dev = layout.olo_comps_index; | 243 | objio_seg->oc.first_dev = layout.olo_comps_index; |
284 | cur_comp = 0; | 244 | cur_comp = 0; |
285 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { | 245 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { |
246 | struct nfs4_deviceid_node *d; | ||
247 | struct objio_dev_ent *ode; | ||
248 | |||
286 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); | 249 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); |
287 | err = objio_devices_lookup(pnfslay, objio_seg, cur_comp, | 250 | |
288 | &src_comp.oc_object_id.oid_device_id, | 251 | d = nfs4_find_get_deviceid(server, |
289 | gfp_flags); | 252 | &src_comp.oc_object_id.oid_device_id, |
290 | if (err) | 253 | pnfslay->plh_lc_cred, gfp_flags); |
254 | if (!d) { | ||
255 | err = -ENXIO; | ||
291 | goto err; | 256 | goto err; |
292 | ++cur_comp; | 257 | } |
258 | |||
259 | ode = container_of(d, struct objio_dev_ent, id_node); | ||
260 | objio_seg->oc.ods[cur_comp++] = &ode->od; | ||
293 | } | 261 | } |
294 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ | 262 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ |
295 | if (unlikely(err)) | 263 | if (unlikely(err)) |
@@ -653,6 +621,7 @@ static struct pnfs_layoutdriver_type objlayout_type = { | |||
653 | .flags = PNFS_LAYOUTRET_ON_SETATTR | | 621 | .flags = PNFS_LAYOUTRET_ON_SETATTR | |
654 | PNFS_LAYOUTRET_ON_ERROR, | 622 | PNFS_LAYOUTRET_ON_ERROR, |
655 | 623 | ||
624 | .max_deviceinfo_size = PAGE_SIZE, | ||
656 | .owner = THIS_MODULE, | 625 | .owner = THIS_MODULE, |
657 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, | 626 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, |
658 | .free_layout_hdr = objlayout_free_layout_hdr, | 627 | .free_layout_hdr = objlayout_free_layout_hdr, |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 697a16d11fac..c89357c7a914 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -574,76 +574,6 @@ loop_done: | |||
574 | dprintk("%s: Return\n", __func__); | 574 | dprintk("%s: Return\n", __func__); |
575 | } | 575 | } |
576 | 576 | ||
577 | |||
578 | /* | ||
579 | * Get Device Info API for io engines | ||
580 | */ | ||
581 | struct objlayout_deviceinfo { | ||
582 | struct page *page; | ||
583 | struct pnfs_osd_deviceaddr da; /* This must be last */ | ||
584 | }; | ||
585 | |||
586 | /* Initialize and call nfs_getdeviceinfo, then decode and return a | ||
587 | * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo() | ||
588 | * should be called. | ||
589 | */ | ||
590 | int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
591 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
592 | gfp_t gfp_flags) | ||
593 | { | ||
594 | struct objlayout_deviceinfo *odi; | ||
595 | struct pnfs_device pd; | ||
596 | struct page *page, **pages; | ||
597 | u32 *p; | ||
598 | int err; | ||
599 | |||
600 | page = alloc_page(gfp_flags); | ||
601 | if (!page) | ||
602 | return -ENOMEM; | ||
603 | |||
604 | pages = &page; | ||
605 | pd.pages = pages; | ||
606 | |||
607 | memcpy(&pd.dev_id, d_id, sizeof(*d_id)); | ||
608 | pd.layout_type = LAYOUT_OSD2_OBJECTS; | ||
609 | pd.pages = &page; | ||
610 | pd.pgbase = 0; | ||
611 | pd.pglen = PAGE_SIZE; | ||
612 | pd.mincount = 0; | ||
613 | pd.maxcount = PAGE_SIZE; | ||
614 | |||
615 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd, | ||
616 | pnfslay->plh_lc_cred); | ||
617 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); | ||
618 | if (err) | ||
619 | goto err_out; | ||
620 | |||
621 | p = page_address(page); | ||
622 | odi = kzalloc(sizeof(*odi), gfp_flags); | ||
623 | if (!odi) { | ||
624 | err = -ENOMEM; | ||
625 | goto err_out; | ||
626 | } | ||
627 | pnfs_osd_xdr_decode_deviceaddr(&odi->da, p); | ||
628 | odi->page = page; | ||
629 | *deviceaddr = &odi->da; | ||
630 | return 0; | ||
631 | |||
632 | err_out: | ||
633 | __free_page(page); | ||
634 | return err; | ||
635 | } | ||
636 | |||
637 | void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) | ||
638 | { | ||
639 | struct objlayout_deviceinfo *odi = container_of(deviceaddr, | ||
640 | struct objlayout_deviceinfo, | ||
641 | da); | ||
642 | |||
643 | __free_page(odi->page); | ||
644 | kfree(odi); | ||
645 | } | ||
646 | |||
647 | enum { | 577 | enum { |
648 | OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, | 578 | OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, |
649 | OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, | 579 | OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index fd13f1d2f136..3a0828d57339 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
@@ -149,11 +149,6 @@ extern void objlayout_read_done(struct objlayout_io_res *oir, | |||
149 | extern void objlayout_write_done(struct objlayout_io_res *oir, | 149 | extern void objlayout_write_done(struct objlayout_io_res *oir, |
150 | ssize_t status, bool sync); | 150 | ssize_t status, bool sync); |
151 | 151 | ||
152 | extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
153 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
154 | gfp_t gfp_flags); | ||
155 | extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); | ||
156 | |||
157 | /* | 152 | /* |
158 | * exported generic objects function vectors | 153 | * exported generic objects function vectors |
159 | */ | 154 | */ |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index be7cbce6e4c7..94e16ec88312 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -481,6 +481,14 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | |||
481 | return 0; | 481 | return 0; |
482 | } | 482 | } |
483 | 483 | ||
484 | /* | ||
485 | * Limit the request size so that we can still allocate a page array | ||
486 | * for it without upsetting the slab allocator. | ||
487 | */ | ||
488 | if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * | ||
489 | sizeof(struct page) > PAGE_SIZE) | ||
490 | return 0; | ||
491 | |||
484 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); | 492 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); |
485 | } | 493 | } |
486 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | 494 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a3851debf8a2..76de7f568119 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -594,6 +594,9 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, | |||
594 | dprintk("%s freeing layout for inode %lu\n", __func__, | 594 | dprintk("%s freeing layout for inode %lu\n", __func__, |
595 | lo->plh_inode->i_ino); | 595 | lo->plh_inode->i_ino); |
596 | inode = lo->plh_inode; | 596 | inode = lo->plh_inode; |
597 | |||
598 | pnfs_layoutcommit_inode(inode, false); | ||
599 | |||
597 | spin_lock(&inode->i_lock); | 600 | spin_lock(&inode->i_lock); |
598 | list_del_init(&lo->plh_bulk_destroy); | 601 | list_del_init(&lo->plh_bulk_destroy); |
599 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | 602 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ |
@@ -682,17 +685,6 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2) | |||
682 | return (s32)(s1 - s2) > 0; | 685 | return (s32)(s1 - s2) > 0; |
683 | } | 686 | } |
684 | 687 | ||
685 | static void | ||
686 | pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo, | ||
687 | const nfs4_stateid *new, | ||
688 | struct list_head *free_me_list) | ||
689 | { | ||
690 | if (nfs4_stateid_match_other(&lo->plh_stateid, new)) | ||
691 | return; | ||
692 | /* Layout is new! Kill existing layout segments */ | ||
693 | pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL); | ||
694 | } | ||
695 | |||
696 | /* update lo->plh_stateid with new if is more recent */ | 688 | /* update lo->plh_stateid with new if is more recent */ |
697 | void | 689 | void |
698 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | 690 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, |
@@ -749,7 +741,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | |||
749 | status = -EAGAIN; | 741 | status = -EAGAIN; |
750 | } else if (!nfs4_valid_open_stateid(open_state)) { | 742 | } else if (!nfs4_valid_open_stateid(open_state)) { |
751 | status = -EBADF; | 743 | status = -EBADF; |
752 | } else if (list_empty(&lo->plh_segs)) { | 744 | } else if (list_empty(&lo->plh_segs) || |
745 | test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { | ||
753 | int seq; | 746 | int seq; |
754 | 747 | ||
755 | do { | 748 | do { |
@@ -864,6 +857,16 @@ _pnfs_return_layout(struct inode *ino) | |||
864 | empty = list_empty(&lo->plh_segs); | 857 | empty = list_empty(&lo->plh_segs); |
865 | pnfs_clear_layoutcommit(ino, &tmp_list); | 858 | pnfs_clear_layoutcommit(ino, &tmp_list); |
866 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | 859 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); |
860 | |||
861 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { | ||
862 | struct pnfs_layout_range range = { | ||
863 | .iomode = IOMODE_ANY, | ||
864 | .offset = 0, | ||
865 | .length = NFS4_MAX_UINT64, | ||
866 | }; | ||
867 | NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); | ||
868 | } | ||
869 | |||
867 | /* Don't send a LAYOUTRETURN if list was initially empty */ | 870 | /* Don't send a LAYOUTRETURN if list was initially empty */ |
868 | if (empty) { | 871 | if (empty) { |
869 | spin_unlock(&ino->i_lock); | 872 | spin_unlock(&ino->i_lock); |
@@ -871,6 +874,8 @@ _pnfs_return_layout(struct inode *ino) | |||
871 | dprintk("NFS: %s no layout segments to return\n", __func__); | 874 | dprintk("NFS: %s no layout segments to return\n", __func__); |
872 | goto out; | 875 | goto out; |
873 | } | 876 | } |
877 | |||
878 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
874 | lo->plh_block_lgets++; | 879 | lo->plh_block_lgets++; |
875 | spin_unlock(&ino->i_lock); | 880 | spin_unlock(&ino->i_lock); |
876 | pnfs_free_lseg_list(&tmp_list); | 881 | pnfs_free_lseg_list(&tmp_list); |
@@ -1358,25 +1363,41 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1358 | goto out; | 1363 | goto out; |
1359 | } | 1364 | } |
1360 | 1365 | ||
1366 | init_lseg(lo, lseg); | ||
1367 | lseg->pls_range = res->range; | ||
1368 | |||
1361 | spin_lock(&ino->i_lock); | 1369 | spin_lock(&ino->i_lock); |
1362 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | 1370 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
1363 | dprintk("%s forget reply due to recall\n", __func__); | 1371 | dprintk("%s forget reply due to recall\n", __func__); |
1364 | goto out_forget_reply; | 1372 | goto out_forget_reply; |
1365 | } | 1373 | } |
1366 | 1374 | ||
1367 | if (pnfs_layoutgets_blocked(lo, 1) || | 1375 | if (pnfs_layoutgets_blocked(lo, 1)) { |
1368 | pnfs_layout_stateid_blocked(lo, &res->stateid)) { | ||
1369 | dprintk("%s forget reply due to state\n", __func__); | 1376 | dprintk("%s forget reply due to state\n", __func__); |
1370 | goto out_forget_reply; | 1377 | goto out_forget_reply; |
1371 | } | 1378 | } |
1372 | 1379 | ||
1373 | /* Check that the new stateid matches the old stateid */ | 1380 | if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { |
1374 | pnfs_verify_layout_stateid(lo, &res->stateid, &free_me); | 1381 | /* existing state ID, make sure the sequence number matches. */ |
1375 | /* Done processing layoutget. Set the layout stateid */ | 1382 | if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { |
1376 | pnfs_set_layout_stateid(lo, &res->stateid, false); | 1383 | dprintk("%s forget reply due to sequence\n", __func__); |
1384 | goto out_forget_reply; | ||
1385 | } | ||
1386 | pnfs_set_layout_stateid(lo, &res->stateid, false); | ||
1387 | } else { | ||
1388 | /* | ||
1389 | * We got an entirely new state ID. Mark all segments for the | ||
1390 | * inode invalid, and don't bother validating the stateid | ||
1391 | * sequence number. | ||
1392 | */ | ||
1393 | pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL); | ||
1394 | |||
1395 | nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); | ||
1396 | lo->plh_barrier = be32_to_cpu(res->stateid.seqid); | ||
1397 | } | ||
1398 | |||
1399 | clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
1377 | 1400 | ||
1378 | init_lseg(lo, lseg); | ||
1379 | lseg->pls_range = res->range; | ||
1380 | pnfs_get_lseg(lseg); | 1401 | pnfs_get_lseg(lseg); |
1381 | pnfs_layout_insert_lseg(lo, lseg); | 1402 | pnfs_layout_insert_lseg(lo, lseg); |
1382 | 1403 | ||
@@ -1797,6 +1818,35 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) | |||
1797 | } | 1818 | } |
1798 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | 1819 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); |
1799 | 1820 | ||
1821 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) | ||
1822 | { | ||
1823 | struct inode *inode = data->inode; | ||
1824 | struct nfs_inode *nfsi = NFS_I(inode); | ||
1825 | bool mark_as_dirty = false; | ||
1826 | |||
1827 | spin_lock(&inode->i_lock); | ||
1828 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | ||
1829 | mark_as_dirty = true; | ||
1830 | dprintk("%s: Set layoutcommit for inode %lu ", | ||
1831 | __func__, inode->i_ino); | ||
1832 | } | ||
1833 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { | ||
1834 | /* references matched in nfs4_layoutcommit_release */ | ||
1835 | pnfs_get_lseg(data->lseg); | ||
1836 | } | ||
1837 | if (data->lwb > nfsi->layout->plh_lwb) | ||
1838 | nfsi->layout->plh_lwb = data->lwb; | ||
1839 | spin_unlock(&inode->i_lock); | ||
1840 | dprintk("%s: lseg %p end_pos %llu\n", | ||
1841 | __func__, data->lseg, nfsi->layout->plh_lwb); | ||
1842 | |||
1843 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | ||
1844 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | ||
1845 | if (mark_as_dirty) | ||
1846 | mark_inode_dirty_sync(inode); | ||
1847 | } | ||
1848 | EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); | ||
1849 | |||
1800 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | 1850 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) |
1801 | { | 1851 | { |
1802 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); | 1852 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); |
@@ -1817,6 +1867,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | |||
1817 | int | 1867 | int |
1818 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) | 1868 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) |
1819 | { | 1869 | { |
1870 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||
1820 | struct nfs4_layoutcommit_data *data; | 1871 | struct nfs4_layoutcommit_data *data; |
1821 | struct nfs_inode *nfsi = NFS_I(inode); | 1872 | struct nfs_inode *nfsi = NFS_I(inode); |
1822 | loff_t end_pos; | 1873 | loff_t end_pos; |
@@ -1867,6 +1918,20 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1867 | data->args.lastbytewritten = end_pos - 1; | 1918 | data->args.lastbytewritten = end_pos - 1; |
1868 | data->res.server = NFS_SERVER(inode); | 1919 | data->res.server = NFS_SERVER(inode); |
1869 | 1920 | ||
1921 | if (ld->prepare_layoutcommit) { | ||
1922 | status = ld->prepare_layoutcommit(&data->args); | ||
1923 | if (status) { | ||
1924 | spin_lock(&inode->i_lock); | ||
1925 | if (end_pos < nfsi->layout->plh_lwb) | ||
1926 | nfsi->layout->plh_lwb = end_pos; | ||
1927 | spin_unlock(&inode->i_lock); | ||
1928 | put_rpccred(data->cred); | ||
1929 | set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); | ||
1930 | goto clear_layoutcommitting; | ||
1931 | } | ||
1932 | } | ||
1933 | |||
1934 | |||
1870 | status = nfs4_proc_layoutcommit(data, sync); | 1935 | status = nfs4_proc_layoutcommit(data, sync); |
1871 | out: | 1936 | out: |
1872 | if (status) | 1937 | if (status) |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aca3dff5dae6..693ce42ec683 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -65,12 +65,15 @@ enum { | |||
65 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ | 65 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ |
66 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ | 66 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ |
67 | NFS_LAYOUT_RETURN, /* Return this layout ASAP */ | 67 | NFS_LAYOUT_RETURN, /* Return this layout ASAP */ |
68 | NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ | ||
68 | }; | 69 | }; |
69 | 70 | ||
70 | enum layoutdriver_policy_flags { | 71 | enum layoutdriver_policy_flags { |
71 | /* Should the pNFS client commit and return the layout upon a setattr */ | 72 | /* Should the pNFS client commit and return the layout upon truncate to |
73 | * a smaller size */ | ||
72 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, | 74 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, |
73 | PNFS_LAYOUTRET_ON_ERROR = 1 << 1, | 75 | PNFS_LAYOUTRET_ON_ERROR = 1 << 1, |
76 | PNFS_READ_WHOLE_PAGE = 1 << 2, | ||
74 | }; | 77 | }; |
75 | 78 | ||
76 | struct nfs4_deviceid_node; | 79 | struct nfs4_deviceid_node; |
@@ -82,6 +85,7 @@ struct pnfs_layoutdriver_type { | |||
82 | const char *name; | 85 | const char *name; |
83 | struct module *owner; | 86 | struct module *owner; |
84 | unsigned flags; | 87 | unsigned flags; |
88 | unsigned max_deviceinfo_size; | ||
85 | 89 | ||
86 | int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); | 90 | int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); |
87 | int (*clear_layoutdriver) (struct nfs_server *); | 91 | int (*clear_layoutdriver) (struct nfs_server *); |
@@ -92,6 +96,9 @@ struct pnfs_layoutdriver_type { | |||
92 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | 96 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); |
93 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | 97 | void (*free_lseg) (struct pnfs_layout_segment *lseg); |
94 | 98 | ||
99 | void (*return_range) (struct pnfs_layout_hdr *lo, | ||
100 | struct pnfs_layout_range *range); | ||
101 | |||
95 | /* test for nfs page cache coalescing */ | 102 | /* test for nfs page cache coalescing */ |
96 | const struct nfs_pageio_ops *pg_read_ops; | 103 | const struct nfs_pageio_ops *pg_read_ops; |
97 | const struct nfs_pageio_ops *pg_write_ops; | 104 | const struct nfs_pageio_ops *pg_write_ops; |
@@ -121,14 +128,17 @@ struct pnfs_layoutdriver_type { | |||
121 | enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); | 128 | enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); |
122 | 129 | ||
123 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); | 130 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); |
131 | struct nfs4_deviceid_node * (*alloc_deviceid_node) | ||
132 | (struct nfs_server *server, struct pnfs_device *pdev, | ||
133 | gfp_t gfp_flags); | ||
124 | 134 | ||
125 | void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, | 135 | void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, |
126 | struct xdr_stream *xdr, | 136 | struct xdr_stream *xdr, |
127 | const struct nfs4_layoutreturn_args *args); | 137 | const struct nfs4_layoutreturn_args *args); |
128 | 138 | ||
129 | void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); | 139 | void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); |
130 | 140 | int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args); | |
131 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, | 141 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, |
132 | struct xdr_stream *xdr, | 142 | struct xdr_stream *xdr, |
133 | const struct nfs4_layoutcommit_args *args); | 143 | const struct nfs4_layoutcommit_args *args); |
134 | }; | 144 | }; |
@@ -171,9 +181,6 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | |||
171 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | 181 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); |
172 | 182 | ||
173 | /* nfs4proc.c */ | 183 | /* nfs4proc.c */ |
174 | extern int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
175 | const struct nfs_fh *fh, | ||
176 | struct pnfs_devicelist *devlist); | ||
177 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 184 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
178 | struct pnfs_device *dev, | 185 | struct pnfs_device *dev, |
179 | struct rpc_cred *cred); | 186 | struct rpc_cred *cred); |
@@ -219,6 +226,7 @@ void pnfs_roc_release(struct inode *ino); | |||
219 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 226 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
220 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); | 227 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); |
221 | void pnfs_set_layoutcommit(struct nfs_pgio_header *); | 228 | void pnfs_set_layoutcommit(struct nfs_pgio_header *); |
229 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); | ||
222 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 230 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
223 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 231 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
224 | int _pnfs_return_layout(struct inode *); | 232 | int _pnfs_return_layout(struct inode *); |
@@ -255,11 +263,12 @@ struct nfs4_deviceid_node { | |||
255 | atomic_t ref; | 263 | atomic_t ref; |
256 | }; | 264 | }; |
257 | 265 | ||
258 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 266 | struct nfs4_deviceid_node * |
267 | nfs4_find_get_deviceid(struct nfs_server *server, | ||
268 | const struct nfs4_deviceid *id, struct rpc_cred *cred, | ||
269 | gfp_t gfp_mask); | ||
259 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 270 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); |
260 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, | 271 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *, |
261 | const struct pnfs_layoutdriver_type *, | ||
262 | const struct nfs_client *, | ||
263 | const struct nfs4_deviceid *); | 272 | const struct nfs4_deviceid *); |
264 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); | 273 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); |
265 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); | 274 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); |
@@ -267,6 +276,13 @@ void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); | |||
267 | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); | 276 | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); |
268 | void nfs4_deviceid_purge_client(const struct nfs_client *); | 277 | void nfs4_deviceid_purge_client(const struct nfs_client *); |
269 | 278 | ||
279 | static inline struct nfs4_deviceid_node * | ||
280 | nfs4_get_deviceid(struct nfs4_deviceid_node *d) | ||
281 | { | ||
282 | atomic_inc(&d->ref); | ||
283 | return d; | ||
284 | } | ||
285 | |||
270 | static inline struct pnfs_layout_segment * | 286 | static inline struct pnfs_layout_segment * |
271 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) | 287 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) |
272 | { | 288 | { |
@@ -368,6 +384,14 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) | |||
368 | } | 384 | } |
369 | 385 | ||
370 | static inline bool | 386 | static inline bool |
387 | pnfs_ld_read_whole_page(struct inode *inode) | ||
388 | { | ||
389 | if (!pnfs_enabled_sb(NFS_SERVER(inode))) | ||
390 | return false; | ||
391 | return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; | ||
392 | } | ||
393 | |||
394 | static inline bool | ||
371 | pnfs_layoutcommit_outstanding(struct inode *inode) | 395 | pnfs_layoutcommit_outstanding(struct inode *inode) |
372 | { | 396 | { |
373 | struct nfs_inode *nfsi = NFS_I(inode); | 397 | struct nfs_inode *nfsi = NFS_I(inode); |
@@ -443,6 +467,12 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) | |||
443 | } | 467 | } |
444 | 468 | ||
445 | static inline bool | 469 | static inline bool |
470 | pnfs_ld_read_whole_page(struct inode *inode) | ||
471 | { | ||
472 | return false; | ||
473 | } | ||
474 | |||
475 | static inline bool | ||
446 | pnfs_roc(struct inode *ino) | 476 | pnfs_roc(struct inode *ino) |
447 | { | 477 | { |
448 | return false; | 478 | return false; |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 6da209bd9408..aa2ec0015183 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -29,6 +29,9 @@ | |||
29 | */ | 29 | */ |
30 | 30 | ||
31 | #include <linux/export.h> | 31 | #include <linux/export.h> |
32 | #include <linux/nfs_fs.h> | ||
33 | #include "nfs4session.h" | ||
34 | #include "internal.h" | ||
32 | #include "pnfs.h" | 35 | #include "pnfs.h" |
33 | 36 | ||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS | 37 | #define NFSDBG_FACILITY NFSDBG_PNFS |
@@ -89,6 +92,74 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
89 | return NULL; | 92 | return NULL; |
90 | } | 93 | } |
91 | 94 | ||
95 | static struct nfs4_deviceid_node * | ||
96 | nfs4_get_device_info(struct nfs_server *server, | ||
97 | const struct nfs4_deviceid *dev_id, | ||
98 | struct rpc_cred *cred, gfp_t gfp_flags) | ||
99 | { | ||
100 | struct nfs4_deviceid_node *d = NULL; | ||
101 | struct pnfs_device *pdev = NULL; | ||
102 | struct page **pages = NULL; | ||
103 | u32 max_resp_sz; | ||
104 | int max_pages; | ||
105 | int rc, i; | ||
106 | |||
107 | /* | ||
108 | * Use the session max response size as the basis for setting | ||
109 | * GETDEVICEINFO's maxcount | ||
110 | */ | ||
111 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
112 | if (server->pnfs_curr_ld->max_deviceinfo_size && | ||
113 | server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz) | ||
114 | max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size; | ||
115 | max_pages = nfs_page_array_len(0, max_resp_sz); | ||
116 | dprintk("%s: server %p max_resp_sz %u max_pages %d\n", | ||
117 | __func__, server, max_resp_sz, max_pages); | ||
118 | |||
119 | pdev = kzalloc(sizeof(*pdev), gfp_flags); | ||
120 | if (!pdev) | ||
121 | return NULL; | ||
122 | |||
123 | pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); | ||
124 | if (!pages) | ||
125 | goto out_free_pdev; | ||
126 | |||
127 | for (i = 0; i < max_pages; i++) { | ||
128 | pages[i] = alloc_page(gfp_flags); | ||
129 | if (!pages[i]) | ||
130 | goto out_free_pages; | ||
131 | } | ||
132 | |||
133 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
134 | pdev->layout_type = server->pnfs_curr_ld->id; | ||
135 | pdev->pages = pages; | ||
136 | pdev->pgbase = 0; | ||
137 | pdev->pglen = max_resp_sz; | ||
138 | pdev->mincount = 0; | ||
139 | pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
140 | |||
141 | rc = nfs4_proc_getdeviceinfo(server, pdev, cred); | ||
142 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
143 | if (rc) | ||
144 | goto out_free_pages; | ||
145 | |||
146 | /* | ||
147 | * Found new device, need to decode it and then add it to the | ||
148 | * list of known devices for this mountpoint. | ||
149 | */ | ||
150 | d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, | ||
151 | gfp_flags); | ||
152 | |||
153 | out_free_pages: | ||
154 | for (i = 0; i < max_pages; i++) | ||
155 | __free_page(pages[i]); | ||
156 | kfree(pages); | ||
157 | out_free_pdev: | ||
158 | kfree(pdev); | ||
159 | dprintk("<-- %s d %p\n", __func__, d); | ||
160 | return d; | ||
161 | } | ||
162 | |||
92 | /* | 163 | /* |
93 | * Lookup a deviceid in cache and get a reference count on it if found | 164 | * Lookup a deviceid in cache and get a reference count on it if found |
94 | * | 165 | * |
@@ -96,14 +167,14 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
96 | * @id deviceid to look up | 167 | * @id deviceid to look up |
97 | */ | 168 | */ |
98 | static struct nfs4_deviceid_node * | 169 | static struct nfs4_deviceid_node * |
99 | _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | 170 | __nfs4_find_get_deviceid(struct nfs_server *server, |
100 | const struct nfs_client *clp, const struct nfs4_deviceid *id, | 171 | const struct nfs4_deviceid *id, long hash) |
101 | long hash) | ||
102 | { | 172 | { |
103 | struct nfs4_deviceid_node *d; | 173 | struct nfs4_deviceid_node *d; |
104 | 174 | ||
105 | rcu_read_lock(); | 175 | rcu_read_lock(); |
106 | d = _lookup_deviceid(ld, clp, id, hash); | 176 | d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id, |
177 | hash); | ||
107 | if (d != NULL) | 178 | if (d != NULL) |
108 | atomic_inc(&d->ref); | 179 | atomic_inc(&d->ref); |
109 | rcu_read_unlock(); | 180 | rcu_read_unlock(); |
@@ -111,10 +182,33 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
111 | } | 182 | } |
112 | 183 | ||
113 | struct nfs4_deviceid_node * | 184 | struct nfs4_deviceid_node * |
114 | nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | 185 | nfs4_find_get_deviceid(struct nfs_server *server, |
115 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | 186 | const struct nfs4_deviceid *id, struct rpc_cred *cred, |
187 | gfp_t gfp_mask) | ||
116 | { | 188 | { |
117 | return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); | 189 | long hash = nfs4_deviceid_hash(id); |
190 | struct nfs4_deviceid_node *d, *new; | ||
191 | |||
192 | d = __nfs4_find_get_deviceid(server, id, hash); | ||
193 | if (d) | ||
194 | return d; | ||
195 | |||
196 | new = nfs4_get_device_info(server, id, cred, gfp_mask); | ||
197 | if (!new) | ||
198 | return new; | ||
199 | |||
200 | spin_lock(&nfs4_deviceid_lock); | ||
201 | d = __nfs4_find_get_deviceid(server, id, hash); | ||
202 | if (d) { | ||
203 | spin_unlock(&nfs4_deviceid_lock); | ||
204 | server->pnfs_curr_ld->free_deviceid_node(new); | ||
205 | return d; | ||
206 | } | ||
207 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); | ||
208 | atomic_inc(&new->ref); | ||
209 | spin_unlock(&nfs4_deviceid_lock); | ||
210 | |||
211 | return new; | ||
118 | } | 212 | } |
119 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); | 213 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); |
120 | 214 | ||
@@ -151,15 +245,13 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
151 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); | 245 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); |
152 | 246 | ||
153 | void | 247 | void |
154 | nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | 248 | nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, struct nfs_server *server, |
155 | const struct pnfs_layoutdriver_type *ld, | ||
156 | const struct nfs_client *nfs_client, | ||
157 | const struct nfs4_deviceid *id) | 249 | const struct nfs4_deviceid *id) |
158 | { | 250 | { |
159 | INIT_HLIST_NODE(&d->node); | 251 | INIT_HLIST_NODE(&d->node); |
160 | INIT_HLIST_NODE(&d->tmpnode); | 252 | INIT_HLIST_NODE(&d->tmpnode); |
161 | d->ld = ld; | 253 | d->ld = server->pnfs_curr_ld; |
162 | d->nfs_client = nfs_client; | 254 | d->nfs_client = server->nfs_client; |
163 | d->flags = 0; | 255 | d->flags = 0; |
164 | d->deviceid = *id; | 256 | d->deviceid = *id; |
165 | atomic_set(&d->ref, 1); | 257 | atomic_set(&d->ref, 1); |
@@ -167,39 +259,6 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | |||
167 | EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); | 259 | EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); |
168 | 260 | ||
169 | /* | 261 | /* |
170 | * Uniquely initialize and insert a deviceid node into cache | ||
171 | * | ||
172 | * @new new deviceid node | ||
173 | * Note that the caller must set up the following members: | ||
174 | * new->ld | ||
175 | * new->nfs_client | ||
176 | * new->deviceid | ||
177 | * | ||
178 | * @ret the inserted node, if none found, otherwise, the found entry. | ||
179 | */ | ||
180 | struct nfs4_deviceid_node * | ||
181 | nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) | ||
182 | { | ||
183 | struct nfs4_deviceid_node *d; | ||
184 | long hash; | ||
185 | |||
186 | spin_lock(&nfs4_deviceid_lock); | ||
187 | hash = nfs4_deviceid_hash(&new->deviceid); | ||
188 | d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash); | ||
189 | if (d) { | ||
190 | spin_unlock(&nfs4_deviceid_lock); | ||
191 | return d; | ||
192 | } | ||
193 | |||
194 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); | ||
195 | spin_unlock(&nfs4_deviceid_lock); | ||
196 | atomic_inc(&new->ref); | ||
197 | |||
198 | return new; | ||
199 | } | ||
200 | EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); | ||
201 | |||
202 | /* | ||
203 | * Dereference a deviceid node and delete it when its reference count drops | 262 | * Dereference a deviceid node and delete it when its reference count drops |
204 | * to zero. | 263 | * to zero. |
205 | * | 264 | * |
@@ -299,4 +358,3 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) | |||
299 | } | 358 | } |
300 | rcu_read_unlock(); | 359 | rcu_read_unlock(); |
301 | } | 360 | } |
302 | |||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e4499d5b51e8..31a11b0e885d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2065,11 +2065,6 @@ static int nfs23_validate_mount_data(void *options, | |||
2065 | return NFS_TEXT_DATA; | 2065 | return NFS_TEXT_DATA; |
2066 | } | 2066 | } |
2067 | 2067 | ||
2068 | #if !IS_ENABLED(CONFIG_NFS_V3) | ||
2069 | if (args->version == 3) | ||
2070 | goto out_v3_not_compiled; | ||
2071 | #endif /* !CONFIG_NFS_V3 */ | ||
2072 | |||
2073 | return 0; | 2068 | return 0; |
2074 | 2069 | ||
2075 | out_no_data: | 2070 | out_no_data: |
@@ -2085,12 +2080,6 @@ out_no_sec: | |||
2085 | dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); | 2080 | dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); |
2086 | return -EINVAL; | 2081 | return -EINVAL; |
2087 | 2082 | ||
2088 | #if !IS_ENABLED(CONFIG_NFS_V3) | ||
2089 | out_v3_not_compiled: | ||
2090 | dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); | ||
2091 | return -EPROTONOSUPPORT; | ||
2092 | #endif /* !CONFIG_NFS_V3 */ | ||
2093 | |||
2094 | out_nomem: | 2083 | out_nomem: |
2095 | dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); | 2084 | dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); |
2096 | return -ENOMEM; | 2085 | return -ENOMEM; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 175d5d073ccf..12493846a2d3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -49,6 +49,9 @@ static const struct nfs_rw_ops nfs_rw_write_ops; | |||
49 | static void nfs_clear_request_commit(struct nfs_page *req); | 49 | static void nfs_clear_request_commit(struct nfs_page *req); |
50 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, | 50 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, |
51 | struct inode *inode); | 51 | struct inode *inode); |
52 | static struct nfs_page * | ||
53 | nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | ||
54 | struct page *page); | ||
52 | 55 | ||
53 | static struct kmem_cache *nfs_wdata_cachep; | 56 | static struct kmem_cache *nfs_wdata_cachep; |
54 | static mempool_t *nfs_wdata_mempool; | 57 | static mempool_t *nfs_wdata_mempool; |
@@ -95,38 +98,6 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) | |||
95 | } | 98 | } |
96 | 99 | ||
97 | /* | 100 | /* |
98 | * nfs_page_search_commits_for_head_request_locked | ||
99 | * | ||
100 | * Search through commit lists on @inode for the head request for @page. | ||
101 | * Must be called while holding the inode (which is cinfo) lock. | ||
102 | * | ||
103 | * Returns the head request if found, or NULL if not found. | ||
104 | */ | ||
105 | static struct nfs_page * | ||
106 | nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | ||
107 | struct page *page) | ||
108 | { | ||
109 | struct nfs_page *freq, *t; | ||
110 | struct nfs_commit_info cinfo; | ||
111 | struct inode *inode = &nfsi->vfs_inode; | ||
112 | |||
113 | nfs_init_cinfo_from_inode(&cinfo, inode); | ||
114 | |||
115 | /* search through pnfs commit lists */ | ||
116 | freq = pnfs_search_commit_reqs(inode, &cinfo, page); | ||
117 | if (freq) | ||
118 | return freq->wb_head; | ||
119 | |||
120 | /* Linearly search the commit list for the correct request */ | ||
121 | list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { | ||
122 | if (freq->wb_page == page) | ||
123 | return freq->wb_head; | ||
124 | } | ||
125 | |||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * nfs_page_find_head_request_locked - find head request associated with @page | 101 | * nfs_page_find_head_request_locked - find head request associated with @page |
131 | * | 102 | * |
132 | * must be called while holding the inode lock. | 103 | * must be called while holding the inode lock. |
@@ -271,11 +242,14 @@ static void nfs_mark_uptodate(struct nfs_page *req) | |||
271 | 242 | ||
272 | static int wb_priority(struct writeback_control *wbc) | 243 | static int wb_priority(struct writeback_control *wbc) |
273 | { | 244 | { |
245 | int ret = 0; | ||
274 | if (wbc->for_reclaim) | 246 | if (wbc->for_reclaim) |
275 | return FLUSH_HIGHPRI | FLUSH_STABLE; | 247 | return FLUSH_HIGHPRI | FLUSH_STABLE; |
248 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
249 | ret = FLUSH_COND_STABLE; | ||
276 | if (wbc->for_kupdate || wbc->for_background) | 250 | if (wbc->for_kupdate || wbc->for_background) |
277 | return FLUSH_LOWPRI | FLUSH_COND_STABLE; | 251 | ret |= FLUSH_LOWPRI; |
278 | return FLUSH_COND_STABLE; | 252 | return ret; |
279 | } | 253 | } |
280 | 254 | ||
281 | /* | 255 | /* |
@@ -731,6 +705,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
731 | if (likely(!PageSwapCache(head->wb_page))) { | 705 | if (likely(!PageSwapCache(head->wb_page))) { |
732 | set_page_private(head->wb_page, 0); | 706 | set_page_private(head->wb_page, 0); |
733 | ClearPagePrivate(head->wb_page); | 707 | ClearPagePrivate(head->wb_page); |
708 | smp_mb__after_atomic(); | ||
709 | wake_up_page(head->wb_page, PG_private); | ||
734 | clear_bit(PG_MAPPED, &head->wb_flags); | 710 | clear_bit(PG_MAPPED, &head->wb_flags); |
735 | } | 711 | } |
736 | nfsi->npages--; | 712 | nfsi->npages--; |
@@ -749,7 +725,38 @@ nfs_mark_request_dirty(struct nfs_page *req) | |||
749 | __set_page_dirty_nobuffers(req->wb_page); | 725 | __set_page_dirty_nobuffers(req->wb_page); |
750 | } | 726 | } |
751 | 727 | ||
752 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | 728 | /* |
729 | * nfs_page_search_commits_for_head_request_locked | ||
730 | * | ||
731 | * Search through commit lists on @inode for the head request for @page. | ||
732 | * Must be called while holding the inode (which is cinfo) lock. | ||
733 | * | ||
734 | * Returns the head request if found, or NULL if not found. | ||
735 | */ | ||
736 | static struct nfs_page * | ||
737 | nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | ||
738 | struct page *page) | ||
739 | { | ||
740 | struct nfs_page *freq, *t; | ||
741 | struct nfs_commit_info cinfo; | ||
742 | struct inode *inode = &nfsi->vfs_inode; | ||
743 | |||
744 | nfs_init_cinfo_from_inode(&cinfo, inode); | ||
745 | |||
746 | /* search through pnfs commit lists */ | ||
747 | freq = pnfs_search_commit_reqs(inode, &cinfo, page); | ||
748 | if (freq) | ||
749 | return freq->wb_head; | ||
750 | |||
751 | /* Linearly search the commit list for the correct request */ | ||
752 | list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { | ||
753 | if (freq->wb_page == page) | ||
754 | return freq->wb_head; | ||
755 | } | ||
756 | |||
757 | return NULL; | ||
758 | } | ||
759 | |||
753 | /** | 760 | /** |
754 | * nfs_request_add_commit_list - add request to a commit list | 761 | * nfs_request_add_commit_list - add request to a commit list |
755 | * @req: pointer to a struct nfs_page | 762 | * @req: pointer to a struct nfs_page |
@@ -867,36 +874,6 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr) | |||
867 | return hdr->verf.committed != NFS_FILE_SYNC; | 874 | return hdr->verf.committed != NFS_FILE_SYNC; |
868 | } | 875 | } |
869 | 876 | ||
870 | #else | ||
871 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, | ||
872 | struct inode *inode) | ||
873 | { | ||
874 | } | ||
875 | |||
876 | void nfs_init_cinfo(struct nfs_commit_info *cinfo, | ||
877 | struct inode *inode, | ||
878 | struct nfs_direct_req *dreq) | ||
879 | { | ||
880 | } | ||
881 | |||
882 | void | ||
883 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | ||
884 | struct nfs_commit_info *cinfo) | ||
885 | { | ||
886 | } | ||
887 | |||
888 | static void | ||
889 | nfs_clear_request_commit(struct nfs_page *req) | ||
890 | { | ||
891 | } | ||
892 | |||
893 | int nfs_write_need_commit(struct nfs_pgio_header *hdr) | ||
894 | { | ||
895 | return 0; | ||
896 | } | ||
897 | |||
898 | #endif | ||
899 | |||
900 | static void nfs_write_completion(struct nfs_pgio_header *hdr) | 877 | static void nfs_write_completion(struct nfs_pgio_header *hdr) |
901 | { | 878 | { |
902 | struct nfs_commit_info cinfo; | 879 | struct nfs_commit_info cinfo; |
@@ -932,7 +909,6 @@ out: | |||
932 | hdr->release(hdr); | 909 | hdr->release(hdr); |
933 | } | 910 | } |
934 | 911 | ||
935 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
936 | unsigned long | 912 | unsigned long |
937 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | 913 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) |
938 | { | 914 | { |
@@ -989,19 +965,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, | |||
989 | return ret; | 965 | return ret; |
990 | } | 966 | } |
991 | 967 | ||
992 | #else | ||
993 | unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | ||
994 | { | ||
995 | return 0; | ||
996 | } | ||
997 | |||
998 | int nfs_scan_commit(struct inode *inode, struct list_head *dst, | ||
999 | struct nfs_commit_info *cinfo) | ||
1000 | { | ||
1001 | return 0; | ||
1002 | } | ||
1003 | #endif | ||
1004 | |||
1005 | /* | 968 | /* |
1006 | * Search for an existing write request, and attempt to update | 969 | * Search for an existing write request, and attempt to update |
1007 | * it to reflect a new dirty region on a given page. | 970 | * it to reflect a new dirty region on a given page. |
@@ -1394,7 +1357,6 @@ static int nfs_writeback_done(struct rpc_task *task, | |||
1394 | return status; | 1357 | return status; |
1395 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); | 1358 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); |
1396 | 1359 | ||
1397 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
1398 | if (hdr->res.verf->committed < hdr->args.stable && | 1360 | if (hdr->res.verf->committed < hdr->args.stable && |
1399 | task->tk_status >= 0) { | 1361 | task->tk_status >= 0) { |
1400 | /* We tried a write call, but the server did not | 1362 | /* We tried a write call, but the server did not |
@@ -1416,7 +1378,6 @@ static int nfs_writeback_done(struct rpc_task *task, | |||
1416 | complain = jiffies + 300 * HZ; | 1378 | complain = jiffies + 300 * HZ; |
1417 | } | 1379 | } |
1418 | } | 1380 | } |
1419 | #endif | ||
1420 | 1381 | ||
1421 | /* Deal with the suid/sgid bit corner case */ | 1382 | /* Deal with the suid/sgid bit corner case */ |
1422 | if (nfs_should_remove_suid(inode)) | 1383 | if (nfs_should_remove_suid(inode)) |
@@ -1469,7 +1430,6 @@ static void nfs_writeback_result(struct rpc_task *task, | |||
1469 | } | 1430 | } |
1470 | 1431 | ||
1471 | 1432 | ||
1472 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
1473 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) | 1433 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) |
1474 | { | 1434 | { |
1475 | int ret; | 1435 | int ret; |
@@ -1538,6 +1498,18 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, | |||
1538 | } | 1498 | } |
1539 | EXPORT_SYMBOL_GPL(nfs_initiate_commit); | 1499 | EXPORT_SYMBOL_GPL(nfs_initiate_commit); |
1540 | 1500 | ||
1501 | static loff_t nfs_get_lwb(struct list_head *head) | ||
1502 | { | ||
1503 | loff_t lwb = 0; | ||
1504 | struct nfs_page *req; | ||
1505 | |||
1506 | list_for_each_entry(req, head, wb_list) | ||
1507 | if (lwb < (req_offset(req) + req->wb_bytes)) | ||
1508 | lwb = req_offset(req) + req->wb_bytes; | ||
1509 | |||
1510 | return lwb; | ||
1511 | } | ||
1512 | |||
1541 | /* | 1513 | /* |
1542 | * Set up the argument/result storage required for the RPC call. | 1514 | * Set up the argument/result storage required for the RPC call. |
1543 | */ | 1515 | */ |
@@ -1557,6 +1529,9 @@ void nfs_init_commit(struct nfs_commit_data *data, | |||
1557 | data->inode = inode; | 1529 | data->inode = inode; |
1558 | data->cred = first->wb_context->cred; | 1530 | data->cred = first->wb_context->cred; |
1559 | data->lseg = lseg; /* reference transferred */ | 1531 | data->lseg = lseg; /* reference transferred */ |
1532 | /* only set lwb for pnfs commit */ | ||
1533 | if (lseg) | ||
1534 | data->lwb = nfs_get_lwb(&data->pages); | ||
1560 | data->mds_ops = &nfs_commit_ops; | 1535 | data->mds_ops = &nfs_commit_ops; |
1561 | data->completion_ops = cinfo->completion_ops; | 1536 | data->completion_ops = cinfo->completion_ops; |
1562 | data->dreq = cinfo->dreq; | 1537 | data->dreq = cinfo->dreq; |
@@ -1636,6 +1611,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1636 | struct nfs_page *req; | 1611 | struct nfs_page *req; |
1637 | int status = data->task.tk_status; | 1612 | int status = data->task.tk_status; |
1638 | struct nfs_commit_info cinfo; | 1613 | struct nfs_commit_info cinfo; |
1614 | struct nfs_server *nfss; | ||
1639 | 1615 | ||
1640 | while (!list_empty(&data->pages)) { | 1616 | while (!list_empty(&data->pages)) { |
1641 | req = nfs_list_entry(data->pages.next); | 1617 | req = nfs_list_entry(data->pages.next); |
@@ -1669,6 +1645,10 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1669 | next: | 1645 | next: |
1670 | nfs_unlock_and_release_request(req); | 1646 | nfs_unlock_and_release_request(req); |
1671 | } | 1647 | } |
1648 | nfss = NFS_SERVER(data->inode); | ||
1649 | if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) | ||
1650 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | ||
1651 | |||
1672 | nfs_init_cinfo(&cinfo, data->inode, data->dreq); | 1652 | nfs_init_cinfo(&cinfo, data->inode, data->dreq); |
1673 | if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) | 1653 | if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) |
1674 | nfs_commit_clear_lock(NFS_I(data->inode)); | 1654 | nfs_commit_clear_lock(NFS_I(data->inode)); |
@@ -1778,12 +1758,6 @@ out_mark_dirty: | |||
1778 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 1758 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); |
1779 | return ret; | 1759 | return ret; |
1780 | } | 1760 | } |
1781 | #else | ||
1782 | static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) | ||
1783 | { | ||
1784 | return 0; | ||
1785 | } | ||
1786 | #endif | ||
1787 | 1761 | ||
1788 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1762 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1789 | { | 1763 | { |
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile index f689ed82af3a..d153ca3ea577 100644 --- a/fs/nfs_common/Makefile +++ b/fs/nfs_common/Makefile | |||
@@ -3,5 +3,6 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o | 5 | obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o |
6 | |||
7 | nfs_acl-objs := nfsacl.o | 6 | nfs_acl-objs := nfsacl.o |
7 | |||
8 | obj-$(CONFIG_GRACE_PERIOD) += grace.o | ||
diff --git a/fs/lockd/grace.c b/fs/nfs_common/grace.c index 6d1ee7204c88..ae6e58ea4de5 100644 --- a/fs/lockd/grace.c +++ b/fs/nfs_common/grace.c | |||
@@ -1,17 +1,20 @@ | |||
1 | /* | 1 | /* |
2 | * Common code for control of lockd and nfsv4 grace periods. | 2 | * Common code for control of lockd and nfsv4 grace periods. |
3 | * | ||
4 | * Transplanted from lockd code | ||
3 | */ | 5 | */ |
4 | 6 | ||
5 | #include <linux/module.h> | 7 | #include <linux/module.h> |
6 | #include <linux/lockd/bind.h> | ||
7 | #include <net/net_namespace.h> | 8 | #include <net/net_namespace.h> |
9 | #include <net/netns/generic.h> | ||
10 | #include <linux/fs.h> | ||
8 | 11 | ||
9 | #include "netns.h" | 12 | static int grace_net_id; |
10 | |||
11 | static DEFINE_SPINLOCK(grace_lock); | 13 | static DEFINE_SPINLOCK(grace_lock); |
12 | 14 | ||
13 | /** | 15 | /** |
14 | * locks_start_grace | 16 | * locks_start_grace |
17 | * @net: net namespace that this lock manager belongs to | ||
15 | * @lm: who this grace period is for | 18 | * @lm: who this grace period is for |
16 | * | 19 | * |
17 | * A grace period is a period during which locks should not be given | 20 | * A grace period is a period during which locks should not be given |
@@ -21,18 +24,20 @@ static DEFINE_SPINLOCK(grace_lock); | |||
21 | * | 24 | * |
22 | * This function is called to start a grace period. | 25 | * This function is called to start a grace period. |
23 | */ | 26 | */ |
24 | void locks_start_grace(struct net *net, struct lock_manager *lm) | 27 | void |
28 | locks_start_grace(struct net *net, struct lock_manager *lm) | ||
25 | { | 29 | { |
26 | struct lockd_net *ln = net_generic(net, lockd_net_id); | 30 | struct list_head *grace_list = net_generic(net, grace_net_id); |
27 | 31 | ||
28 | spin_lock(&grace_lock); | 32 | spin_lock(&grace_lock); |
29 | list_add(&lm->list, &ln->grace_list); | 33 | list_add(&lm->list, grace_list); |
30 | spin_unlock(&grace_lock); | 34 | spin_unlock(&grace_lock); |
31 | } | 35 | } |
32 | EXPORT_SYMBOL_GPL(locks_start_grace); | 36 | EXPORT_SYMBOL_GPL(locks_start_grace); |
33 | 37 | ||
34 | /** | 38 | /** |
35 | * locks_end_grace | 39 | * locks_end_grace |
40 | * @net: net namespace that this lock manager belongs to | ||
36 | * @lm: who this grace period is for | 41 | * @lm: who this grace period is for |
37 | * | 42 | * |
38 | * Call this function to state that the given lock manager is ready to | 43 | * Call this function to state that the given lock manager is ready to |
@@ -41,7 +46,8 @@ EXPORT_SYMBOL_GPL(locks_start_grace); | |||
41 | * Note that callers count on it being safe to call this more than once, | 46 | * Note that callers count on it being safe to call this more than once, |
42 | * and the second call should be a no-op. | 47 | * and the second call should be a no-op. |
43 | */ | 48 | */ |
44 | void locks_end_grace(struct lock_manager *lm) | 49 | void |
50 | locks_end_grace(struct lock_manager *lm) | ||
45 | { | 51 | { |
46 | spin_lock(&grace_lock); | 52 | spin_lock(&grace_lock); |
47 | list_del_init(&lm->list); | 53 | list_del_init(&lm->list); |
@@ -56,10 +62,52 @@ EXPORT_SYMBOL_GPL(locks_end_grace); | |||
56 | * to answer ordinary lock requests, and when they should accept only | 62 | * to answer ordinary lock requests, and when they should accept only |
57 | * lock reclaims. | 63 | * lock reclaims. |
58 | */ | 64 | */ |
59 | int locks_in_grace(struct net *net) | 65 | int |
66 | locks_in_grace(struct net *net) | ||
60 | { | 67 | { |
61 | struct lockd_net *ln = net_generic(net, lockd_net_id); | 68 | struct list_head *grace_list = net_generic(net, grace_net_id); |
62 | 69 | ||
63 | return !list_empty(&ln->grace_list); | 70 | return !list_empty(grace_list); |
64 | } | 71 | } |
65 | EXPORT_SYMBOL_GPL(locks_in_grace); | 72 | EXPORT_SYMBOL_GPL(locks_in_grace); |
73 | |||
74 | static int __net_init | ||
75 | grace_init_net(struct net *net) | ||
76 | { | ||
77 | struct list_head *grace_list = net_generic(net, grace_net_id); | ||
78 | |||
79 | INIT_LIST_HEAD(grace_list); | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static void __net_exit | ||
84 | grace_exit_net(struct net *net) | ||
85 | { | ||
86 | struct list_head *grace_list = net_generic(net, grace_net_id); | ||
87 | |||
88 | BUG_ON(!list_empty(grace_list)); | ||
89 | } | ||
90 | |||
91 | static struct pernet_operations grace_net_ops = { | ||
92 | .init = grace_init_net, | ||
93 | .exit = grace_exit_net, | ||
94 | .id = &grace_net_id, | ||
95 | .size = sizeof(struct list_head), | ||
96 | }; | ||
97 | |||
98 | static int __init | ||
99 | init_grace(void) | ||
100 | { | ||
101 | return register_pernet_subsys(&grace_net_ops); | ||
102 | } | ||
103 | |||
104 | static void __exit | ||
105 | exit_grace(void) | ||
106 | { | ||
107 | unregister_pernet_subsys(&grace_net_ops); | ||
108 | } | ||
109 | |||
110 | MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>"); | ||
111 | MODULE_LICENSE("GPL"); | ||
112 | module_init(init_grace) | ||
113 | module_exit(exit_grace) | ||
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f994e750e0d1..73395156bdb4 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -71,6 +71,7 @@ config NFSD_V4 | |||
71 | select FS_POSIX_ACL | 71 | select FS_POSIX_ACL |
72 | select SUNRPC_GSS | 72 | select SUNRPC_GSS |
73 | select CRYPTO | 73 | select CRYPTO |
74 | select GRACE_PERIOD | ||
74 | help | 75 | help |
75 | This option enables support in your system's NFS server for | 76 | This option enables support in your system's NFS server for |
76 | version 4 of the NFS protocol (RFC 3530). | 77 | version 4 of the NFS protocol (RFC 3530). |
@@ -94,9 +95,6 @@ config NFSD_V4_SECURITY_LABEL | |||
94 | If you do not wish to enable fine-grained security labels SELinux or | 95 | If you do not wish to enable fine-grained security labels SELinux or |
95 | Smack policies on NFSv4 files, say N. | 96 | Smack policies on NFSv4 files, say N. |
96 | 97 | ||
97 | WARNING: there is still a chance of backwards-incompatible protocol changes. | ||
98 | For now we recommend "Y" only for developers and testers. | ||
99 | |||
100 | config NFSD_FAULT_INJECTION | 98 | config NFSD_FAULT_INJECTION |
101 | bool "NFS server manual fault injection" | 99 | bool "NFS server manual fault injection" |
102 | depends on NFSD_V4 && DEBUG_KERNEL | 100 | depends on NFSD_V4 && DEBUG_KERNEL |
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index b582f9ab6b2a..dd96a3830004 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h | |||
@@ -18,7 +18,6 @@ | |||
18 | * is much larger than a sockaddr_in6. | 18 | * is much larger than a sockaddr_in6. |
19 | */ | 19 | */ |
20 | struct svc_cacherep { | 20 | struct svc_cacherep { |
21 | struct hlist_node c_hash; | ||
22 | struct list_head c_lru; | 21 | struct list_head c_lru; |
23 | 22 | ||
24 | unsigned char c_state, /* unused, inprog, done */ | 23 | unsigned char c_state, /* unused, inprog, done */ |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 72ffd7cce3c3..30a739d896ff 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -1145,6 +1145,7 @@ static struct flags { | |||
1145 | { NFSEXP_ALLSQUASH, {"all_squash", ""}}, | 1145 | { NFSEXP_ALLSQUASH, {"all_squash", ""}}, |
1146 | { NFSEXP_ASYNC, {"async", "sync"}}, | 1146 | { NFSEXP_ASYNC, {"async", "sync"}}, |
1147 | { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}}, | 1147 | { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}}, |
1148 | { NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}}, | ||
1148 | { NFSEXP_NOHIDE, {"nohide", ""}}, | 1149 | { NFSEXP_NOHIDE, {"nohide", ""}}, |
1149 | { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, | 1150 | { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, |
1150 | { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, | 1151 | { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index fa2525b2e9d7..12f2aab4f614 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -223,11 +223,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, | |||
223 | newfhp = fh_init(&resp->fh, NFS3_FHSIZE); | 223 | newfhp = fh_init(&resp->fh, NFS3_FHSIZE); |
224 | attr = &argp->attrs; | 224 | attr = &argp->attrs; |
225 | 225 | ||
226 | /* Get the directory inode */ | ||
227 | nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE); | ||
228 | if (nfserr) | ||
229 | RETURN_STATUS(nfserr); | ||
230 | |||
231 | /* Unfudge the mode bits */ | 226 | /* Unfudge the mode bits */ |
232 | attr->ia_mode &= ~S_IFMT; | 227 | attr->ia_mode &= ~S_IFMT; |
233 | if (!(attr->ia_valid & ATTR_MODE)) { | 228 | if (!(attr->ia_valid & ATTR_MODE)) { |
@@ -471,6 +466,14 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, | |||
471 | resp->buflen = resp->count; | 466 | resp->buflen = resp->count; |
472 | resp->rqstp = rqstp; | 467 | resp->rqstp = rqstp; |
473 | offset = argp->cookie; | 468 | offset = argp->cookie; |
469 | |||
470 | nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP); | ||
471 | if (nfserr) | ||
472 | RETURN_STATUS(nfserr); | ||
473 | |||
474 | if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS) | ||
475 | RETURN_STATUS(nfserr_notsupp); | ||
476 | |||
474 | nfserr = nfsd_readdir(rqstp, &resp->fh, | 477 | nfserr = nfsd_readdir(rqstp, &resp->fh, |
475 | &offset, | 478 | &offset, |
476 | &resp->common, | 479 | &resp->common, |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e0be57b0f79b..ed2b1151b171 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -49,12 +49,6 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); | |||
49 | 49 | ||
50 | /* Index of predefined Linux callback client operations */ | 50 | /* Index of predefined Linux callback client operations */ |
51 | 51 | ||
52 | enum { | ||
53 | NFSPROC4_CLNT_CB_NULL = 0, | ||
54 | NFSPROC4_CLNT_CB_RECALL, | ||
55 | NFSPROC4_CLNT_CB_SEQUENCE, | ||
56 | }; | ||
57 | |||
58 | struct nfs4_cb_compound_hdr { | 52 | struct nfs4_cb_compound_hdr { |
59 | /* args */ | 53 | /* args */ |
60 | u32 ident; /* minorversion 0 only */ | 54 | u32 ident; /* minorversion 0 only */ |
@@ -494,7 +488,7 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
494 | static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, | 488 | static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, |
495 | const struct nfsd4_callback *cb) | 489 | const struct nfsd4_callback *cb) |
496 | { | 490 | { |
497 | const struct nfs4_delegation *args = cb->cb_op; | 491 | const struct nfs4_delegation *dp = cb_to_delegation(cb); |
498 | struct nfs4_cb_compound_hdr hdr = { | 492 | struct nfs4_cb_compound_hdr hdr = { |
499 | .ident = cb->cb_clp->cl_cb_ident, | 493 | .ident = cb->cb_clp->cl_cb_ident, |
500 | .minorversion = cb->cb_minorversion, | 494 | .minorversion = cb->cb_minorversion, |
@@ -502,7 +496,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
502 | 496 | ||
503 | encode_cb_compound4args(xdr, &hdr); | 497 | encode_cb_compound4args(xdr, &hdr); |
504 | encode_cb_sequence4args(xdr, cb, &hdr); | 498 | encode_cb_sequence4args(xdr, cb, &hdr); |
505 | encode_cb_recall4args(xdr, args, &hdr); | 499 | encode_cb_recall4args(xdr, dp, &hdr); |
506 | encode_cb_nops(&hdr); | 500 | encode_cb_nops(&hdr); |
507 | } | 501 | } |
508 | 502 | ||
@@ -746,27 +740,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { | |||
746 | 740 | ||
747 | static struct workqueue_struct *callback_wq; | 741 | static struct workqueue_struct *callback_wq; |
748 | 742 | ||
749 | static void run_nfsd4_cb(struct nfsd4_callback *cb) | ||
750 | { | ||
751 | queue_work(callback_wq, &cb->cb_work); | ||
752 | } | ||
753 | |||
754 | static void do_probe_callback(struct nfs4_client *clp) | ||
755 | { | ||
756 | struct nfsd4_callback *cb = &clp->cl_cb_null; | ||
757 | |||
758 | cb->cb_op = NULL; | ||
759 | cb->cb_clp = clp; | ||
760 | |||
761 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; | ||
762 | cb->cb_msg.rpc_argp = NULL; | ||
763 | cb->cb_msg.rpc_resp = NULL; | ||
764 | |||
765 | cb->cb_ops = &nfsd4_cb_probe_ops; | ||
766 | |||
767 | run_nfsd4_cb(cb); | ||
768 | } | ||
769 | |||
770 | /* | 743 | /* |
771 | * Poke the callback thread to process any updates to the callback | 744 | * Poke the callback thread to process any updates to the callback |
772 | * parameters, and send a null probe. | 745 | * parameters, and send a null probe. |
@@ -775,7 +748,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp) | |||
775 | { | 748 | { |
776 | clp->cl_cb_state = NFSD4_CB_UNKNOWN; | 749 | clp->cl_cb_state = NFSD4_CB_UNKNOWN; |
777 | set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); | 750 | set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); |
778 | do_probe_callback(clp); | 751 | nfsd4_run_cb(&clp->cl_cb_null); |
779 | } | 752 | } |
780 | 753 | ||
781 | void nfsd4_probe_callback_sync(struct nfs4_client *clp) | 754 | void nfsd4_probe_callback_sync(struct nfs4_client *clp) |
@@ -847,23 +820,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) | |||
847 | rpc_wake_up_next(&clp->cl_cb_waitq); | 820 | rpc_wake_up_next(&clp->cl_cb_waitq); |
848 | dprintk("%s: freed slot, new seqid=%d\n", __func__, | 821 | dprintk("%s: freed slot, new seqid=%d\n", __func__, |
849 | clp->cl_cb_session->se_cb_seq_nr); | 822 | clp->cl_cb_session->se_cb_seq_nr); |
850 | |||
851 | /* We're done looking into the sequence information */ | ||
852 | task->tk_msg.rpc_resp = NULL; | ||
853 | } | 823 | } |
854 | } | ||
855 | |||
856 | |||
857 | static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | ||
858 | { | ||
859 | struct nfsd4_callback *cb = calldata; | ||
860 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | ||
861 | struct nfs4_client *clp = cb->cb_clp; | ||
862 | struct rpc_clnt *current_rpc_client = clp->cl_cb_client; | ||
863 | |||
864 | nfsd4_cb_done(task, calldata); | ||
865 | 824 | ||
866 | if (current_rpc_client != task->tk_client) { | 825 | if (clp->cl_cb_client != task->tk_client) { |
867 | /* We're shutting down or changing cl_cb_client; leave | 826 | /* We're shutting down or changing cl_cb_client; leave |
868 | * it to nfsd4_process_cb_update to restart the call if | 827 | * it to nfsd4_process_cb_update to restart the call if |
869 | * necessary. */ | 828 | * necessary. */ |
@@ -872,47 +831,42 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | |||
872 | 831 | ||
873 | if (cb->cb_done) | 832 | if (cb->cb_done) |
874 | return; | 833 | return; |
875 | switch (task->tk_status) { | 834 | |
835 | switch (cb->cb_ops->done(cb, task)) { | ||
876 | case 0: | 836 | case 0: |
877 | cb->cb_done = true; | 837 | task->tk_status = 0; |
838 | rpc_restart_call_prepare(task); | ||
878 | return; | 839 | return; |
879 | case -EBADHANDLE: | 840 | case 1: |
880 | case -NFS4ERR_BAD_STATEID: | ||
881 | /* Race: client probably got cb_recall | ||
882 | * before open reply granting delegation */ | ||
883 | break; | 841 | break; |
884 | default: | 842 | case -1: |
885 | /* Network partition? */ | 843 | /* Network partition? */ |
886 | nfsd4_mark_cb_down(clp, task->tk_status); | 844 | nfsd4_mark_cb_down(clp, task->tk_status); |
845 | break; | ||
846 | default: | ||
847 | BUG(); | ||
887 | } | 848 | } |
888 | if (dp->dl_retries--) { | ||
889 | rpc_delay(task, 2*HZ); | ||
890 | task->tk_status = 0; | ||
891 | rpc_restart_call_prepare(task); | ||
892 | return; | ||
893 | } | ||
894 | nfsd4_mark_cb_down(clp, task->tk_status); | ||
895 | cb->cb_done = true; | 849 | cb->cb_done = true; |
896 | } | 850 | } |
897 | 851 | ||
898 | static void nfsd4_cb_recall_release(void *calldata) | 852 | static void nfsd4_cb_release(void *calldata) |
899 | { | 853 | { |
900 | struct nfsd4_callback *cb = calldata; | 854 | struct nfsd4_callback *cb = calldata; |
901 | struct nfs4_client *clp = cb->cb_clp; | 855 | struct nfs4_client *clp = cb->cb_clp; |
902 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | ||
903 | 856 | ||
904 | if (cb->cb_done) { | 857 | if (cb->cb_done) { |
905 | spin_lock(&clp->cl_lock); | 858 | spin_lock(&clp->cl_lock); |
906 | list_del(&cb->cb_per_client); | 859 | list_del(&cb->cb_per_client); |
907 | spin_unlock(&clp->cl_lock); | 860 | spin_unlock(&clp->cl_lock); |
908 | nfs4_put_stid(&dp->dl_stid); | 861 | |
862 | cb->cb_ops->release(cb); | ||
909 | } | 863 | } |
910 | } | 864 | } |
911 | 865 | ||
912 | static const struct rpc_call_ops nfsd4_cb_recall_ops = { | 866 | static const struct rpc_call_ops nfsd4_cb_ops = { |
913 | .rpc_call_prepare = nfsd4_cb_prepare, | 867 | .rpc_call_prepare = nfsd4_cb_prepare, |
914 | .rpc_call_done = nfsd4_cb_recall_done, | 868 | .rpc_call_done = nfsd4_cb_done, |
915 | .rpc_release = nfsd4_cb_recall_release, | 869 | .rpc_release = nfsd4_cb_release, |
916 | }; | 870 | }; |
917 | 871 | ||
918 | int nfsd4_create_callback_queue(void) | 872 | int nfsd4_create_callback_queue(void) |
@@ -937,16 +891,10 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) | |||
937 | * instead, nfsd4_run_cb_null() will detect the killed | 891 | * instead, nfsd4_run_cb_null() will detect the killed |
938 | * client, destroy the rpc client, and stop: | 892 | * client, destroy the rpc client, and stop: |
939 | */ | 893 | */ |
940 | do_probe_callback(clp); | 894 | nfsd4_run_cb(&clp->cl_cb_null); |
941 | flush_workqueue(callback_wq); | 895 | flush_workqueue(callback_wq); |
942 | } | 896 | } |
943 | 897 | ||
944 | static void nfsd4_release_cb(struct nfsd4_callback *cb) | ||
945 | { | ||
946 | if (cb->cb_ops->rpc_release) | ||
947 | cb->cb_ops->rpc_release(cb); | ||
948 | } | ||
949 | |||
950 | /* requires cl_lock: */ | 898 | /* requires cl_lock: */ |
951 | static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) | 899 | static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) |
952 | { | 900 | { |
@@ -1009,63 +957,49 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) | |||
1009 | } | 957 | } |
1010 | /* Yay, the callback channel's back! Restart any callbacks: */ | 958 | /* Yay, the callback channel's back! Restart any callbacks: */ |
1011 | list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) | 959 | list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) |
1012 | run_nfsd4_cb(cb); | 960 | queue_work(callback_wq, &cb->cb_work); |
1013 | } | 961 | } |
1014 | 962 | ||
1015 | static void | 963 | static void |
1016 | nfsd4_run_callback_rpc(struct nfsd4_callback *cb) | 964 | nfsd4_run_cb_work(struct work_struct *work) |
1017 | { | 965 | { |
966 | struct nfsd4_callback *cb = | ||
967 | container_of(work, struct nfsd4_callback, cb_work); | ||
1018 | struct nfs4_client *clp = cb->cb_clp; | 968 | struct nfs4_client *clp = cb->cb_clp; |
1019 | struct rpc_clnt *clnt; | 969 | struct rpc_clnt *clnt; |
1020 | 970 | ||
971 | if (cb->cb_ops && cb->cb_ops->prepare) | ||
972 | cb->cb_ops->prepare(cb); | ||
973 | |||
1021 | if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) | 974 | if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) |
1022 | nfsd4_process_cb_update(cb); | 975 | nfsd4_process_cb_update(cb); |
1023 | 976 | ||
1024 | clnt = clp->cl_cb_client; | 977 | clnt = clp->cl_cb_client; |
1025 | if (!clnt) { | 978 | if (!clnt) { |
1026 | /* Callback channel broken, or client killed; give up: */ | 979 | /* Callback channel broken, or client killed; give up: */ |
1027 | nfsd4_release_cb(cb); | 980 | if (cb->cb_ops && cb->cb_ops->release) |
981 | cb->cb_ops->release(cb); | ||
1028 | return; | 982 | return; |
1029 | } | 983 | } |
1030 | cb->cb_msg.rpc_cred = clp->cl_cb_cred; | 984 | cb->cb_msg.rpc_cred = clp->cl_cb_cred; |
1031 | rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, | 985 | rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, |
1032 | cb->cb_ops, cb); | 986 | cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); |
1033 | } | 987 | } |
1034 | 988 | ||
1035 | void | 989 | void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, |
1036 | nfsd4_run_cb_null(struct work_struct *w) | 990 | struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op) |
1037 | { | 991 | { |
1038 | struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, | ||
1039 | cb_work); | ||
1040 | nfsd4_run_callback_rpc(cb); | ||
1041 | } | ||
1042 | |||
1043 | void | ||
1044 | nfsd4_run_cb_recall(struct work_struct *w) | ||
1045 | { | ||
1046 | struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, | ||
1047 | cb_work); | ||
1048 | |||
1049 | nfsd4_prepare_cb_recall(cb->cb_op); | ||
1050 | nfsd4_run_callback_rpc(cb); | ||
1051 | } | ||
1052 | |||
1053 | void nfsd4_cb_recall(struct nfs4_delegation *dp) | ||
1054 | { | ||
1055 | struct nfsd4_callback *cb = &dp->dl_recall; | ||
1056 | struct nfs4_client *clp = dp->dl_stid.sc_client; | ||
1057 | |||
1058 | dp->dl_retries = 1; | ||
1059 | cb->cb_op = dp; | ||
1060 | cb->cb_clp = clp; | 992 | cb->cb_clp = clp; |
1061 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; | 993 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op]; |
1062 | cb->cb_msg.rpc_argp = cb; | 994 | cb->cb_msg.rpc_argp = cb; |
1063 | cb->cb_msg.rpc_resp = cb; | 995 | cb->cb_msg.rpc_resp = cb; |
1064 | 996 | cb->cb_ops = ops; | |
1065 | cb->cb_ops = &nfsd4_cb_recall_ops; | 997 | INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); |
1066 | |||
1067 | INIT_LIST_HEAD(&cb->cb_per_client); | 998 | INIT_LIST_HEAD(&cb->cb_per_client); |
1068 | cb->cb_done = true; | 999 | cb->cb_done = true; |
1000 | } | ||
1069 | 1001 | ||
1070 | run_nfsd4_cb(&dp->dl_recall); | 1002 | void nfsd4_run_cb(struct nfsd4_callback *cb) |
1003 | { | ||
1004 | queue_work(callback_wq, &cb->cb_work); | ||
1071 | } | 1005 | } |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index a0ab0a847d69..e1b3d3d472da 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -215,7 +215,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) | |||
215 | memset(&ent, 0, sizeof(ent)); | 215 | memset(&ent, 0, sizeof(ent)); |
216 | 216 | ||
217 | /* Authentication name */ | 217 | /* Authentication name */ |
218 | if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) | 218 | len = qword_get(&buf, buf1, PAGE_SIZE); |
219 | if (len <= 0 || len >= IDMAP_NAMESZ) | ||
219 | goto out; | 220 | goto out; |
220 | memcpy(ent.authname, buf1, sizeof(ent.authname)); | 221 | memcpy(ent.authname, buf1, sizeof(ent.authname)); |
221 | 222 | ||
@@ -245,12 +246,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) | |||
245 | /* Name */ | 246 | /* Name */ |
246 | error = -EINVAL; | 247 | error = -EINVAL; |
247 | len = qword_get(&buf, buf1, PAGE_SIZE); | 248 | len = qword_get(&buf, buf1, PAGE_SIZE); |
248 | if (len < 0) | 249 | if (len < 0 || len >= IDMAP_NAMESZ) |
249 | goto out; | 250 | goto out; |
250 | if (len == 0) | 251 | if (len == 0) |
251 | set_bit(CACHE_NEGATIVE, &ent.h.flags); | 252 | set_bit(CACHE_NEGATIVE, &ent.h.flags); |
252 | else if (len >= IDMAP_NAMESZ) | ||
253 | goto out; | ||
254 | else | 253 | else |
255 | memcpy(ent.name, buf1, sizeof(ent.name)); | 254 | memcpy(ent.name, buf1, sizeof(ent.name)); |
256 | error = -ENOMEM; | 255 | error = -ENOMEM; |
@@ -259,15 +258,12 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) | |||
259 | goto out; | 258 | goto out; |
260 | 259 | ||
261 | cache_put(&res->h, cd); | 260 | cache_put(&res->h, cd); |
262 | |||
263 | error = 0; | 261 | error = 0; |
264 | out: | 262 | out: |
265 | kfree(buf1); | 263 | kfree(buf1); |
266 | |||
267 | return error; | 264 | return error; |
268 | } | 265 | } |
269 | 266 | ||
270 | |||
271 | static struct ent * | 267 | static struct ent * |
272 | idtoname_lookup(struct cache_detail *cd, struct ent *item) | 268 | idtoname_lookup(struct cache_detail *cd, struct ent *item) |
273 | { | 269 | { |
@@ -368,7 +364,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) | |||
368 | { | 364 | { |
369 | struct ent ent, *res; | 365 | struct ent ent, *res; |
370 | char *buf1; | 366 | char *buf1; |
371 | int error = -EINVAL; | 367 | int len, error = -EINVAL; |
372 | 368 | ||
373 | if (buf[buflen - 1] != '\n') | 369 | if (buf[buflen - 1] != '\n') |
374 | return (-EINVAL); | 370 | return (-EINVAL); |
@@ -381,7 +377,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) | |||
381 | memset(&ent, 0, sizeof(ent)); | 377 | memset(&ent, 0, sizeof(ent)); |
382 | 378 | ||
383 | /* Authentication name */ | 379 | /* Authentication name */ |
384 | if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) | 380 | len = qword_get(&buf, buf1, PAGE_SIZE); |
381 | if (len <= 0 || len >= IDMAP_NAMESZ) | ||
385 | goto out; | 382 | goto out; |
386 | memcpy(ent.authname, buf1, sizeof(ent.authname)); | 383 | memcpy(ent.authname, buf1, sizeof(ent.authname)); |
387 | 384 | ||
@@ -392,8 +389,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) | |||
392 | IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; | 389 | IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; |
393 | 390 | ||
394 | /* Name */ | 391 | /* Name */ |
395 | error = qword_get(&buf, buf1, PAGE_SIZE); | 392 | len = qword_get(&buf, buf1, PAGE_SIZE); |
396 | if (error <= 0 || error >= IDMAP_NAMESZ) | 393 | if (len <= 0 || len >= IDMAP_NAMESZ) |
397 | goto out; | 394 | goto out; |
398 | memcpy(ent.name, buf1, sizeof(ent.name)); | 395 | memcpy(ent.name, buf1, sizeof(ent.name)); |
399 | 396 | ||
@@ -421,7 +418,6 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) | |||
421 | error = 0; | 418 | error = 0; |
422 | out: | 419 | out: |
423 | kfree(buf1); | 420 | kfree(buf1); |
424 | |||
425 | return (error); | 421 | return (error); |
426 | } | 422 | } |
427 | 423 | ||
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5e0dc528a0e8..cdeb3cfd6f32 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1013 | return status; | 1013 | return status; |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | static __be32 | ||
1017 | nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | ||
1018 | struct nfsd4_seek *seek) | ||
1019 | { | ||
1020 | int whence; | ||
1021 | __be32 status; | ||
1022 | struct file *file; | ||
1023 | |||
1024 | status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, | ||
1025 | &seek->seek_stateid, | ||
1026 | RD_STATE, &file); | ||
1027 | if (status) { | ||
1028 | dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); | ||
1029 | return status; | ||
1030 | } | ||
1031 | |||
1032 | switch (seek->seek_whence) { | ||
1033 | case NFS4_CONTENT_DATA: | ||
1034 | whence = SEEK_DATA; | ||
1035 | break; | ||
1036 | case NFS4_CONTENT_HOLE: | ||
1037 | whence = SEEK_HOLE; | ||
1038 | break; | ||
1039 | default: | ||
1040 | status = nfserr_union_notsupp; | ||
1041 | goto out; | ||
1042 | } | ||
1043 | |||
1044 | /* | ||
1045 | * Note: This call does change file->f_pos, but nothing in NFSD | ||
1046 | * should ever file->f_pos. | ||
1047 | */ | ||
1048 | seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence); | ||
1049 | if (seek->seek_pos < 0) | ||
1050 | status = nfserrno(seek->seek_pos); | ||
1051 | else if (seek->seek_pos >= i_size_read(file_inode(file))) | ||
1052 | seek->seek_eof = true; | ||
1053 | |||
1054 | out: | ||
1055 | fput(file); | ||
1056 | return status; | ||
1057 | } | ||
1058 | |||
1016 | /* This routine never returns NFS_OK! If there are no other errors, it | 1059 | /* This routine never returns NFS_OK! If there are no other errors, it |
1017 | * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the | 1060 | * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the |
1018 | * attributes matched. VERIFY is implemented by mapping NFSERR_SAME | 1061 | * attributes matched. VERIFY is implemented by mapping NFSERR_SAME |
@@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1881 | .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, | 1924 | .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, |
1882 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | 1925 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, |
1883 | }, | 1926 | }, |
1927 | |||
1928 | /* NFSv4.2 operations */ | ||
1929 | [OP_SEEK] = { | ||
1930 | .op_func = (nfsd4op_func)nfsd4_seek, | ||
1931 | .op_name = "OP_SEEK", | ||
1932 | }, | ||
1884 | }; | 1933 | }; |
1885 | 1934 | ||
1886 | int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) | 1935 | int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 9c271f42604a..ea95a2bc21b5 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -58,7 +58,7 @@ struct nfsd4_client_tracking_ops { | |||
58 | void (*create)(struct nfs4_client *); | 58 | void (*create)(struct nfs4_client *); |
59 | void (*remove)(struct nfs4_client *); | 59 | void (*remove)(struct nfs4_client *); |
60 | int (*check)(struct nfs4_client *); | 60 | int (*check)(struct nfs4_client *); |
61 | void (*grace_done)(struct nfsd_net *, time_t); | 61 | void (*grace_done)(struct nfsd_net *); |
62 | }; | 62 | }; |
63 | 63 | ||
64 | /* Globals */ | 64 | /* Globals */ |
@@ -188,7 +188,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
188 | 188 | ||
189 | status = mnt_want_write_file(nn->rec_file); | 189 | status = mnt_want_write_file(nn->rec_file); |
190 | if (status) | 190 | if (status) |
191 | return; | 191 | goto out_creds; |
192 | 192 | ||
193 | dir = nn->rec_file->f_path.dentry; | 193 | dir = nn->rec_file->f_path.dentry; |
194 | /* lock the parent */ | 194 | /* lock the parent */ |
@@ -228,6 +228,7 @@ out_unlock: | |||
228 | user_recovery_dirname); | 228 | user_recovery_dirname); |
229 | } | 229 | } |
230 | mnt_drop_write_file(nn->rec_file); | 230 | mnt_drop_write_file(nn->rec_file); |
231 | out_creds: | ||
231 | nfs4_reset_creds(original_cred); | 232 | nfs4_reset_creds(original_cred); |
232 | } | 233 | } |
233 | 234 | ||
@@ -392,7 +393,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) | |||
392 | } | 393 | } |
393 | 394 | ||
394 | static void | 395 | static void |
395 | nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) | 396 | nfsd4_recdir_purge_old(struct nfsd_net *nn) |
396 | { | 397 | { |
397 | int status; | 398 | int status; |
398 | 399 | ||
@@ -479,6 +480,16 @@ nfsd4_init_recdir(struct net *net) | |||
479 | return status; | 480 | return status; |
480 | } | 481 | } |
481 | 482 | ||
483 | static void | ||
484 | nfsd4_shutdown_recdir(struct net *net) | ||
485 | { | ||
486 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | ||
487 | |||
488 | if (!nn->rec_file) | ||
489 | return; | ||
490 | fput(nn->rec_file); | ||
491 | nn->rec_file = NULL; | ||
492 | } | ||
482 | 493 | ||
483 | static int | 494 | static int |
484 | nfs4_legacy_state_init(struct net *net) | 495 | nfs4_legacy_state_init(struct net *net) |
@@ -512,10 +523,13 @@ nfsd4_load_reboot_recovery_data(struct net *net) | |||
512 | int status; | 523 | int status; |
513 | 524 | ||
514 | status = nfsd4_init_recdir(net); | 525 | status = nfsd4_init_recdir(net); |
515 | if (!status) | ||
516 | status = nfsd4_recdir_load(net); | ||
517 | if (status) | 526 | if (status) |
518 | printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); | 527 | return status; |
528 | |||
529 | status = nfsd4_recdir_load(net); | ||
530 | if (status) | ||
531 | nfsd4_shutdown_recdir(net); | ||
532 | |||
519 | return status; | 533 | return status; |
520 | } | 534 | } |
521 | 535 | ||
@@ -546,21 +560,12 @@ err: | |||
546 | } | 560 | } |
547 | 561 | ||
548 | static void | 562 | static void |
549 | nfsd4_shutdown_recdir(struct nfsd_net *nn) | ||
550 | { | ||
551 | if (!nn->rec_file) | ||
552 | return; | ||
553 | fput(nn->rec_file); | ||
554 | nn->rec_file = NULL; | ||
555 | } | ||
556 | |||
557 | static void | ||
558 | nfsd4_legacy_tracking_exit(struct net *net) | 563 | nfsd4_legacy_tracking_exit(struct net *net) |
559 | { | 564 | { |
560 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | 565 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); |
561 | 566 | ||
562 | nfs4_release_reclaim(nn); | 567 | nfs4_release_reclaim(nn); |
563 | nfsd4_shutdown_recdir(nn); | 568 | nfsd4_shutdown_recdir(net); |
564 | nfs4_legacy_state_shutdown(net); | 569 | nfs4_legacy_state_shutdown(net); |
565 | } | 570 | } |
566 | 571 | ||
@@ -1016,7 +1021,7 @@ nfsd4_cld_check(struct nfs4_client *clp) | |||
1016 | } | 1021 | } |
1017 | 1022 | ||
1018 | static void | 1023 | static void |
1019 | nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) | 1024 | nfsd4_cld_grace_done(struct nfsd_net *nn) |
1020 | { | 1025 | { |
1021 | int ret; | 1026 | int ret; |
1022 | struct cld_upcall *cup; | 1027 | struct cld_upcall *cup; |
@@ -1029,7 +1034,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) | |||
1029 | } | 1034 | } |
1030 | 1035 | ||
1031 | cup->cu_msg.cm_cmd = Cld_GraceDone; | 1036 | cup->cu_msg.cm_cmd = Cld_GraceDone; |
1032 | cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; | 1037 | cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time; |
1033 | ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); | 1038 | ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); |
1034 | if (!ret) | 1039 | if (!ret) |
1035 | ret = cup->cu_msg.cm_status; | 1040 | ret = cup->cu_msg.cm_status; |
@@ -1062,6 +1067,8 @@ MODULE_PARM_DESC(cltrack_legacy_disable, | |||
1062 | 1067 | ||
1063 | #define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" | 1068 | #define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" |
1064 | #define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" | 1069 | #define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" |
1070 | #define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION=" | ||
1071 | #define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START=" | ||
1065 | 1072 | ||
1066 | static char * | 1073 | static char * |
1067 | nfsd4_cltrack_legacy_topdir(void) | 1074 | nfsd4_cltrack_legacy_topdir(void) |
@@ -1126,10 +1133,60 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) | |||
1126 | return result; | 1133 | return result; |
1127 | } | 1134 | } |
1128 | 1135 | ||
1136 | static char * | ||
1137 | nfsd4_cltrack_client_has_session(struct nfs4_client *clp) | ||
1138 | { | ||
1139 | int copied; | ||
1140 | size_t len; | ||
1141 | char *result; | ||
1142 | |||
1143 | /* prefix + Y/N character + terminating NULL */ | ||
1144 | len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1; | ||
1145 | |||
1146 | result = kmalloc(len, GFP_KERNEL); | ||
1147 | if (!result) | ||
1148 | return result; | ||
1149 | |||
1150 | copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c", | ||
1151 | clp->cl_minorversion ? 'Y' : 'N'); | ||
1152 | if (copied >= len) { | ||
1153 | /* just return nothing if output was truncated */ | ||
1154 | kfree(result); | ||
1155 | return NULL; | ||
1156 | } | ||
1157 | |||
1158 | return result; | ||
1159 | } | ||
1160 | |||
1161 | static char * | ||
1162 | nfsd4_cltrack_grace_start(time_t grace_start) | ||
1163 | { | ||
1164 | int copied; | ||
1165 | size_t len; | ||
1166 | char *result; | ||
1167 | |||
1168 | /* prefix + max width of int64_t string + terminating NULL */ | ||
1169 | len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1; | ||
1170 | |||
1171 | result = kmalloc(len, GFP_KERNEL); | ||
1172 | if (!result) | ||
1173 | return result; | ||
1174 | |||
1175 | copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld", | ||
1176 | grace_start); | ||
1177 | if (copied >= len) { | ||
1178 | /* just return nothing if output was truncated */ | ||
1179 | kfree(result); | ||
1180 | return NULL; | ||
1181 | } | ||
1182 | |||
1183 | return result; | ||
1184 | } | ||
1185 | |||
1129 | static int | 1186 | static int |
1130 | nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) | 1187 | nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1) |
1131 | { | 1188 | { |
1132 | char *envp[2]; | 1189 | char *envp[3]; |
1133 | char *argv[4]; | 1190 | char *argv[4]; |
1134 | int ret; | 1191 | int ret; |
1135 | 1192 | ||
@@ -1140,10 +1197,12 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) | |||
1140 | 1197 | ||
1141 | dprintk("%s: cmd: %s\n", __func__, cmd); | 1198 | dprintk("%s: cmd: %s\n", __func__, cmd); |
1142 | dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); | 1199 | dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); |
1143 | dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); | 1200 | dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)"); |
1201 | dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)"); | ||
1144 | 1202 | ||
1145 | envp[0] = legacy; | 1203 | envp[0] = env0; |
1146 | envp[1] = NULL; | 1204 | envp[1] = env1; |
1205 | envp[2] = NULL; | ||
1147 | 1206 | ||
1148 | argv[0] = (char *)cltrack_prog; | 1207 | argv[0] = (char *)cltrack_prog; |
1149 | argv[1] = cmd; | 1208 | argv[1] = cmd; |
@@ -1187,28 +1246,78 @@ bin_to_hex_dup(const unsigned char *src, int srclen) | |||
1187 | } | 1246 | } |
1188 | 1247 | ||
1189 | static int | 1248 | static int |
1190 | nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) | 1249 | nfsd4_umh_cltrack_init(struct net *net) |
1191 | { | 1250 | { |
1251 | int ret; | ||
1252 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | ||
1253 | char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time); | ||
1254 | |||
1192 | /* XXX: The usermode helper s not working in container yet. */ | 1255 | /* XXX: The usermode helper s not working in container yet. */ |
1193 | if (net != &init_net) { | 1256 | if (net != &init_net) { |
1194 | WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " | 1257 | WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " |
1195 | "tracking in a container!\n"); | 1258 | "tracking in a container!\n"); |
1196 | return -EINVAL; | 1259 | return -EINVAL; |
1197 | } | 1260 | } |
1198 | return nfsd4_umh_cltrack_upcall("init", NULL, NULL); | 1261 | |
1262 | ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); | ||
1263 | kfree(grace_start); | ||
1264 | return ret; | ||
1265 | } | ||
1266 | |||
1267 | static void | ||
1268 | nfsd4_cltrack_upcall_lock(struct nfs4_client *clp) | ||
1269 | { | ||
1270 | wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK, | ||
1271 | TASK_UNINTERRUPTIBLE); | ||
1272 | } | ||
1273 | |||
1274 | static void | ||
1275 | nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp) | ||
1276 | { | ||
1277 | smp_mb__before_atomic(); | ||
1278 | clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags); | ||
1279 | smp_mb__after_atomic(); | ||
1280 | wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK); | ||
1199 | } | 1281 | } |
1200 | 1282 | ||
1201 | static void | 1283 | static void |
1202 | nfsd4_umh_cltrack_create(struct nfs4_client *clp) | 1284 | nfsd4_umh_cltrack_create(struct nfs4_client *clp) |
1203 | { | 1285 | { |
1204 | char *hexid; | 1286 | char *hexid, *has_session, *grace_start; |
1287 | struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); | ||
1288 | |||
1289 | /* | ||
1290 | * With v4.0 clients, there's little difference in outcome between a | ||
1291 | * create and check operation, and we can end up calling into this | ||
1292 | * function multiple times per client (once for each openowner). So, | ||
1293 | * for v4.0 clients skip upcalling once the client has been recorded | ||
1294 | * on stable storage. | ||
1295 | * | ||
1296 | * For v4.1+ clients, the outcome of the two operations is different, | ||
1297 | * so we must ensure that we upcall for the create operation. v4.1+ | ||
1298 | * clients call this on RECLAIM_COMPLETE though, so we should only end | ||
1299 | * up doing a single create upcall per client. | ||
1300 | */ | ||
1301 | if (clp->cl_minorversion == 0 && | ||
1302 | test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) | ||
1303 | return; | ||
1205 | 1304 | ||
1206 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); | 1305 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); |
1207 | if (!hexid) { | 1306 | if (!hexid) { |
1208 | dprintk("%s: can't allocate memory for upcall!\n", __func__); | 1307 | dprintk("%s: can't allocate memory for upcall!\n", __func__); |
1209 | return; | 1308 | return; |
1210 | } | 1309 | } |
1211 | nfsd4_umh_cltrack_upcall("create", hexid, NULL); | 1310 | |
1311 | has_session = nfsd4_cltrack_client_has_session(clp); | ||
1312 | grace_start = nfsd4_cltrack_grace_start(nn->boot_time); | ||
1313 | |||
1314 | nfsd4_cltrack_upcall_lock(clp); | ||
1315 | if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start)) | ||
1316 | set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); | ||
1317 | nfsd4_cltrack_upcall_unlock(clp); | ||
1318 | |||
1319 | kfree(has_session); | ||
1320 | kfree(grace_start); | ||
1212 | kfree(hexid); | 1321 | kfree(hexid); |
1213 | } | 1322 | } |
1214 | 1323 | ||
@@ -1217,12 +1326,21 @@ nfsd4_umh_cltrack_remove(struct nfs4_client *clp) | |||
1217 | { | 1326 | { |
1218 | char *hexid; | 1327 | char *hexid; |
1219 | 1328 | ||
1329 | if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) | ||
1330 | return; | ||
1331 | |||
1220 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); | 1332 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); |
1221 | if (!hexid) { | 1333 | if (!hexid) { |
1222 | dprintk("%s: can't allocate memory for upcall!\n", __func__); | 1334 | dprintk("%s: can't allocate memory for upcall!\n", __func__); |
1223 | return; | 1335 | return; |
1224 | } | 1336 | } |
1225 | nfsd4_umh_cltrack_upcall("remove", hexid, NULL); | 1337 | |
1338 | nfsd4_cltrack_upcall_lock(clp); | ||
1339 | if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) && | ||
1340 | nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0) | ||
1341 | clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); | ||
1342 | nfsd4_cltrack_upcall_unlock(clp); | ||
1343 | |||
1226 | kfree(hexid); | 1344 | kfree(hexid); |
1227 | } | 1345 | } |
1228 | 1346 | ||
@@ -1230,30 +1348,45 @@ static int | |||
1230 | nfsd4_umh_cltrack_check(struct nfs4_client *clp) | 1348 | nfsd4_umh_cltrack_check(struct nfs4_client *clp) |
1231 | { | 1349 | { |
1232 | int ret; | 1350 | int ret; |
1233 | char *hexid, *legacy; | 1351 | char *hexid, *has_session, *legacy; |
1352 | |||
1353 | if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) | ||
1354 | return 0; | ||
1234 | 1355 | ||
1235 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); | 1356 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); |
1236 | if (!hexid) { | 1357 | if (!hexid) { |
1237 | dprintk("%s: can't allocate memory for upcall!\n", __func__); | 1358 | dprintk("%s: can't allocate memory for upcall!\n", __func__); |
1238 | return -ENOMEM; | 1359 | return -ENOMEM; |
1239 | } | 1360 | } |
1361 | |||
1362 | has_session = nfsd4_cltrack_client_has_session(clp); | ||
1240 | legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); | 1363 | legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); |
1241 | ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); | 1364 | |
1365 | nfsd4_cltrack_upcall_lock(clp); | ||
1366 | if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) { | ||
1367 | ret = 0; | ||
1368 | } else { | ||
1369 | ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy); | ||
1370 | if (ret == 0) | ||
1371 | set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); | ||
1372 | } | ||
1373 | nfsd4_cltrack_upcall_unlock(clp); | ||
1374 | kfree(has_session); | ||
1242 | kfree(legacy); | 1375 | kfree(legacy); |
1243 | kfree(hexid); | 1376 | kfree(hexid); |
1377 | |||
1244 | return ret; | 1378 | return ret; |
1245 | } | 1379 | } |
1246 | 1380 | ||
1247 | static void | 1381 | static void |
1248 | nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, | 1382 | nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) |
1249 | time_t boot_time) | ||
1250 | { | 1383 | { |
1251 | char *legacy; | 1384 | char *legacy; |
1252 | char timestr[22]; /* FIXME: better way to determine max size? */ | 1385 | char timestr[22]; /* FIXME: better way to determine max size? */ |
1253 | 1386 | ||
1254 | sprintf(timestr, "%ld", boot_time); | 1387 | sprintf(timestr, "%ld", nn->boot_time); |
1255 | legacy = nfsd4_cltrack_legacy_topdir(); | 1388 | legacy = nfsd4_cltrack_legacy_topdir(); |
1256 | nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); | 1389 | nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL); |
1257 | kfree(legacy); | 1390 | kfree(legacy); |
1258 | } | 1391 | } |
1259 | 1392 | ||
@@ -1356,10 +1489,10 @@ nfsd4_client_record_check(struct nfs4_client *clp) | |||
1356 | } | 1489 | } |
1357 | 1490 | ||
1358 | void | 1491 | void |
1359 | nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) | 1492 | nfsd4_record_grace_done(struct nfsd_net *nn) |
1360 | { | 1493 | { |
1361 | if (nn->client_tracking_ops) | 1494 | if (nn->client_tracking_ops) |
1362 | nn->client_tracking_ops->grace_done(nn, boot_time); | 1495 | nn->client_tracking_ops->grace_done(nn); |
1363 | } | 1496 | } |
1364 | 1497 | ||
1365 | static int | 1498 | static int |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2e80a59e7e91..5c0cac173068 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -96,6 +96,8 @@ static struct kmem_cache *deleg_slab; | |||
96 | 96 | ||
97 | static void free_session(struct nfsd4_session *); | 97 | static void free_session(struct nfsd4_session *); |
98 | 98 | ||
99 | static struct nfsd4_callback_ops nfsd4_cb_recall_ops; | ||
100 | |||
99 | static bool is_session_dead(struct nfsd4_session *ses) | 101 | static bool is_session_dead(struct nfsd4_session *ses) |
100 | { | 102 | { |
101 | return ses->se_flags & NFS4_SESSION_DEAD; | 103 | return ses->se_flags & NFS4_SESSION_DEAD; |
@@ -645,7 +647,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) | |||
645 | INIT_LIST_HEAD(&dp->dl_perclnt); | 647 | INIT_LIST_HEAD(&dp->dl_perclnt); |
646 | INIT_LIST_HEAD(&dp->dl_recall_lru); | 648 | INIT_LIST_HEAD(&dp->dl_recall_lru); |
647 | dp->dl_type = NFS4_OPEN_DELEGATE_READ; | 649 | dp->dl_type = NFS4_OPEN_DELEGATE_READ; |
648 | INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); | 650 | dp->dl_retries = 1; |
651 | nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, | ||
652 | &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); | ||
649 | return dp; | 653 | return dp; |
650 | out_dec: | 654 | out_dec: |
651 | atomic_long_dec(&num_delegations); | 655 | atomic_long_dec(&num_delegations); |
@@ -673,15 +677,20 @@ nfs4_put_stid(struct nfs4_stid *s) | |||
673 | 677 | ||
674 | static void nfs4_put_deleg_lease(struct nfs4_file *fp) | 678 | static void nfs4_put_deleg_lease(struct nfs4_file *fp) |
675 | { | 679 | { |
676 | lockdep_assert_held(&state_lock); | 680 | struct file *filp = NULL; |
681 | struct file_lock *fl; | ||
677 | 682 | ||
678 | if (!fp->fi_lease) | 683 | spin_lock(&fp->fi_lock); |
679 | return; | 684 | if (fp->fi_lease && atomic_dec_and_test(&fp->fi_delegees)) { |
680 | if (atomic_dec_and_test(&fp->fi_delegees)) { | 685 | swap(filp, fp->fi_deleg_file); |
681 | vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); | 686 | fl = fp->fi_lease; |
682 | fp->fi_lease = NULL; | 687 | fp->fi_lease = NULL; |
683 | fput(fp->fi_deleg_file); | 688 | } |
684 | fp->fi_deleg_file = NULL; | 689 | spin_unlock(&fp->fi_lock); |
690 | |||
691 | if (filp) { | ||
692 | vfs_setlease(filp, F_UNLCK, &fl); | ||
693 | fput(filp); | ||
685 | } | 694 | } |
686 | } | 695 | } |
687 | 696 | ||
@@ -717,8 +726,6 @@ unhash_delegation_locked(struct nfs4_delegation *dp) | |||
717 | list_del_init(&dp->dl_recall_lru); | 726 | list_del_init(&dp->dl_recall_lru); |
718 | list_del_init(&dp->dl_perfile); | 727 | list_del_init(&dp->dl_perfile); |
719 | spin_unlock(&fp->fi_lock); | 728 | spin_unlock(&fp->fi_lock); |
720 | if (fp) | ||
721 | nfs4_put_deleg_lease(fp); | ||
722 | } | 729 | } |
723 | 730 | ||
724 | static void destroy_delegation(struct nfs4_delegation *dp) | 731 | static void destroy_delegation(struct nfs4_delegation *dp) |
@@ -726,6 +733,7 @@ static void destroy_delegation(struct nfs4_delegation *dp) | |||
726 | spin_lock(&state_lock); | 733 | spin_lock(&state_lock); |
727 | unhash_delegation_locked(dp); | 734 | unhash_delegation_locked(dp); |
728 | spin_unlock(&state_lock); | 735 | spin_unlock(&state_lock); |
736 | nfs4_put_deleg_lease(dp->dl_stid.sc_file); | ||
729 | nfs4_put_stid(&dp->dl_stid); | 737 | nfs4_put_stid(&dp->dl_stid); |
730 | } | 738 | } |
731 | 739 | ||
@@ -735,6 +743,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) | |||
735 | 743 | ||
736 | WARN_ON(!list_empty(&dp->dl_recall_lru)); | 744 | WARN_ON(!list_empty(&dp->dl_recall_lru)); |
737 | 745 | ||
746 | nfs4_put_deleg_lease(dp->dl_stid.sc_file); | ||
747 | |||
738 | if (clp->cl_minorversion == 0) | 748 | if (clp->cl_minorversion == 0) |
739 | nfs4_put_stid(&dp->dl_stid); | 749 | nfs4_put_stid(&dp->dl_stid); |
740 | else { | 750 | else { |
@@ -1635,6 +1645,7 @@ __destroy_client(struct nfs4_client *clp) | |||
1635 | while (!list_empty(&reaplist)) { | 1645 | while (!list_empty(&reaplist)) { |
1636 | dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); | 1646 | dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); |
1637 | list_del_init(&dp->dl_recall_lru); | 1647 | list_del_init(&dp->dl_recall_lru); |
1648 | nfs4_put_deleg_lease(dp->dl_stid.sc_file); | ||
1638 | nfs4_put_stid(&dp->dl_stid); | 1649 | nfs4_put_stid(&dp->dl_stid); |
1639 | } | 1650 | } |
1640 | while (!list_empty(&clp->cl_revoked)) { | 1651 | while (!list_empty(&clp->cl_revoked)) { |
@@ -1862,7 +1873,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, | |||
1862 | free_client(clp); | 1873 | free_client(clp); |
1863 | return NULL; | 1874 | return NULL; |
1864 | } | 1875 | } |
1865 | INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); | 1876 | nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); |
1866 | clp->cl_time = get_seconds(); | 1877 | clp->cl_time = get_seconds(); |
1867 | clear_bit(0, &clp->cl_cb_slot_busy); | 1878 | clear_bit(0, &clp->cl_cb_slot_busy); |
1868 | copy_verf(clp, verf); | 1879 | copy_verf(clp, verf); |
@@ -3349,8 +3360,9 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) | |||
3349 | return ret; | 3360 | return ret; |
3350 | } | 3361 | } |
3351 | 3362 | ||
3352 | void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) | 3363 | static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) |
3353 | { | 3364 | { |
3365 | struct nfs4_delegation *dp = cb_to_delegation(cb); | ||
3354 | struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, | 3366 | struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, |
3355 | nfsd_net_id); | 3367 | nfsd_net_id); |
3356 | 3368 | ||
@@ -3371,6 +3383,43 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) | |||
3371 | spin_unlock(&state_lock); | 3383 | spin_unlock(&state_lock); |
3372 | } | 3384 | } |
3373 | 3385 | ||
3386 | static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, | ||
3387 | struct rpc_task *task) | ||
3388 | { | ||
3389 | struct nfs4_delegation *dp = cb_to_delegation(cb); | ||
3390 | |||
3391 | switch (task->tk_status) { | ||
3392 | case 0: | ||
3393 | return 1; | ||
3394 | case -EBADHANDLE: | ||
3395 | case -NFS4ERR_BAD_STATEID: | ||
3396 | /* | ||
3397 | * Race: client probably got cb_recall before open reply | ||
3398 | * granting delegation. | ||
3399 | */ | ||
3400 | if (dp->dl_retries--) { | ||
3401 | rpc_delay(task, 2 * HZ); | ||
3402 | return 0; | ||
3403 | } | ||
3404 | /*FALLTHRU*/ | ||
3405 | default: | ||
3406 | return -1; | ||
3407 | } | ||
3408 | } | ||
3409 | |||
3410 | static void nfsd4_cb_recall_release(struct nfsd4_callback *cb) | ||
3411 | { | ||
3412 | struct nfs4_delegation *dp = cb_to_delegation(cb); | ||
3413 | |||
3414 | nfs4_put_stid(&dp->dl_stid); | ||
3415 | } | ||
3416 | |||
3417 | static struct nfsd4_callback_ops nfsd4_cb_recall_ops = { | ||
3418 | .prepare = nfsd4_cb_recall_prepare, | ||
3419 | .done = nfsd4_cb_recall_done, | ||
3420 | .release = nfsd4_cb_recall_release, | ||
3421 | }; | ||
3422 | |||
3374 | static void nfsd_break_one_deleg(struct nfs4_delegation *dp) | 3423 | static void nfsd_break_one_deleg(struct nfs4_delegation *dp) |
3375 | { | 3424 | { |
3376 | /* | 3425 | /* |
@@ -3381,7 +3430,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) | |||
3381 | * it's safe to take a reference. | 3430 | * it's safe to take a reference. |
3382 | */ | 3431 | */ |
3383 | atomic_inc(&dp->dl_stid.sc_count); | 3432 | atomic_inc(&dp->dl_stid.sc_count); |
3384 | nfsd4_cb_recall(dp); | 3433 | nfsd4_run_cb(&dp->dl_recall); |
3385 | } | 3434 | } |
3386 | 3435 | ||
3387 | /* Called from break_lease() with i_lock held. */ | 3436 | /* Called from break_lease() with i_lock held. */ |
@@ -3759,7 +3808,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) | |||
3759 | fl = locks_alloc_lock(); | 3808 | fl = locks_alloc_lock(); |
3760 | if (!fl) | 3809 | if (!fl) |
3761 | return NULL; | 3810 | return NULL; |
3762 | locks_init_lock(fl); | ||
3763 | fl->fl_lmops = &nfsd_lease_mng_ops; | 3811 | fl->fl_lmops = &nfsd_lease_mng_ops; |
3764 | fl->fl_flags = FL_DELEG; | 3812 | fl->fl_flags = FL_DELEG; |
3765 | fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; | 3813 | fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; |
@@ -4107,7 +4155,7 @@ out: | |||
4107 | return status; | 4155 | return status; |
4108 | } | 4156 | } |
4109 | 4157 | ||
4110 | static void | 4158 | void |
4111 | nfsd4_end_grace(struct nfsd_net *nn) | 4159 | nfsd4_end_grace(struct nfsd_net *nn) |
4112 | { | 4160 | { |
4113 | /* do nothing if grace period already ended */ | 4161 | /* do nothing if grace period already ended */ |
@@ -4116,14 +4164,28 @@ nfsd4_end_grace(struct nfsd_net *nn) | |||
4116 | 4164 | ||
4117 | dprintk("NFSD: end of grace period\n"); | 4165 | dprintk("NFSD: end of grace period\n"); |
4118 | nn->grace_ended = true; | 4166 | nn->grace_ended = true; |
4119 | nfsd4_record_grace_done(nn, nn->boot_time); | 4167 | /* |
4168 | * If the server goes down again right now, an NFSv4 | ||
4169 | * client will still be allowed to reclaim after it comes back up, | ||
4170 | * even if it hasn't yet had a chance to reclaim state this time. | ||
4171 | * | ||
4172 | */ | ||
4173 | nfsd4_record_grace_done(nn); | ||
4174 | /* | ||
4175 | * At this point, NFSv4 clients can still reclaim. But if the | ||
4176 | * server crashes, any that have not yet reclaimed will be out | ||
4177 | * of luck on the next boot. | ||
4178 | * | ||
4179 | * (NFSv4.1+ clients are considered to have reclaimed once they | ||
4180 | * call RECLAIM_COMPLETE. NFSv4.0 clients are considered to | ||
4181 | * have reclaimed after their first OPEN.) | ||
4182 | */ | ||
4120 | locks_end_grace(&nn->nfsd4_manager); | 4183 | locks_end_grace(&nn->nfsd4_manager); |
4121 | /* | 4184 | /* |
4122 | * Now that every NFSv4 client has had the chance to recover and | 4185 | * At this point, and once lockd and/or any other containers |
4123 | * to see the (possibly new, possibly shorter) lease time, we | 4186 | * exit their grace period, further reclaims will fail and |
4124 | * can safely set the next grace time to the current lease time: | 4187 | * regular locking can resume. |
4125 | */ | 4188 | */ |
4126 | nn->nfsd4_grace = nn->nfsd4_lease; | ||
4127 | } | 4189 | } |
4128 | 4190 | ||
4129 | static time_t | 4191 | static time_t |
@@ -5210,7 +5272,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
5210 | } | 5272 | } |
5211 | 5273 | ||
5212 | fp = lock_stp->st_stid.sc_file; | 5274 | fp = lock_stp->st_stid.sc_file; |
5213 | locks_init_lock(file_lock); | ||
5214 | switch (lock->lk_type) { | 5275 | switch (lock->lk_type) { |
5215 | case NFS4_READ_LT: | 5276 | case NFS4_READ_LT: |
5216 | case NFS4_READW_LT: | 5277 | case NFS4_READW_LT: |
@@ -5354,7 +5415,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
5354 | status = nfserr_jukebox; | 5415 | status = nfserr_jukebox; |
5355 | goto out; | 5416 | goto out; |
5356 | } | 5417 | } |
5357 | locks_init_lock(file_lock); | 5418 | |
5358 | switch (lockt->lt_type) { | 5419 | switch (lockt->lt_type) { |
5359 | case NFS4_READ_LT: | 5420 | case NFS4_READ_LT: |
5360 | case NFS4_READW_LT: | 5421 | case NFS4_READW_LT: |
@@ -5432,7 +5493,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
5432 | status = nfserr_jukebox; | 5493 | status = nfserr_jukebox; |
5433 | goto fput; | 5494 | goto fput; |
5434 | } | 5495 | } |
5435 | locks_init_lock(file_lock); | 5496 | |
5436 | file_lock->fl_type = F_UNLCK; | 5497 | file_lock->fl_type = F_UNLCK; |
5437 | file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); | 5498 | file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); |
5438 | file_lock->fl_pid = current->tgid; | 5499 | file_lock->fl_pid = current->tgid; |
@@ -5645,6 +5706,9 @@ nfs4_check_open_reclaim(clientid_t *clid, | |||
5645 | if (status) | 5706 | if (status) |
5646 | return nfserr_reclaim_bad; | 5707 | return nfserr_reclaim_bad; |
5647 | 5708 | ||
5709 | if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags)) | ||
5710 | return nfserr_no_grace; | ||
5711 | |||
5648 | if (nfsd4_client_record_check(cstate->clp)) | 5712 | if (nfsd4_client_record_check(cstate->clp)) |
5649 | return nfserr_reclaim_bad; | 5713 | return nfserr_reclaim_bad; |
5650 | 5714 | ||
@@ -6342,10 +6406,10 @@ nfs4_state_start_net(struct net *net) | |||
6342 | ret = nfs4_state_create_net(net); | 6406 | ret = nfs4_state_create_net(net); |
6343 | if (ret) | 6407 | if (ret) |
6344 | return ret; | 6408 | return ret; |
6345 | nfsd4_client_tracking_init(net); | ||
6346 | nn->boot_time = get_seconds(); | 6409 | nn->boot_time = get_seconds(); |
6347 | locks_start_grace(net, &nn->nfsd4_manager); | ||
6348 | nn->grace_ended = false; | 6410 | nn->grace_ended = false; |
6411 | locks_start_grace(net, &nn->nfsd4_manager); | ||
6412 | nfsd4_client_tracking_init(net); | ||
6349 | printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", | 6413 | printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", |
6350 | nn->nfsd4_grace, net); | 6414 | nn->nfsd4_grace, net); |
6351 | queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); | 6415 | queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); |
@@ -6402,6 +6466,7 @@ nfs4_state_shutdown_net(struct net *net) | |||
6402 | list_for_each_safe(pos, next, &reaplist) { | 6466 | list_for_each_safe(pos, next, &reaplist) { |
6403 | dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); | 6467 | dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); |
6404 | list_del_init(&dp->dl_recall_lru); | 6468 | list_del_init(&dp->dl_recall_lru); |
6469 | nfs4_put_deleg_lease(dp->dl_stid.sc_file); | ||
6405 | nfs4_put_stid(&dp->dl_stid); | 6470 | nfs4_put_stid(&dp->dl_stid); |
6406 | } | 6471 | } |
6407 | 6472 | ||
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b01f6e100ee8..eeea7a90eb87 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -31,13 +31,6 @@ | |||
31 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | 31 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
32 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 32 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
33 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 33 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
34 | * | ||
35 | * TODO: Neil Brown made the following observation: We currently | ||
36 | * initially reserve NFSD_BUFSIZE space on the transmit queue and | ||
37 | * never release any of that until the request is complete. | ||
38 | * It would be good to calculate a new maximum response size while | ||
39 | * decoding the COMPOUND, and call svc_reserve with this number | ||
40 | * at the end of nfs4svc_decode_compoundargs. | ||
41 | */ | 34 | */ |
42 | 35 | ||
43 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
@@ -1521,6 +1514,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str | |||
1521 | } | 1514 | } |
1522 | 1515 | ||
1523 | static __be32 | 1516 | static __be32 |
1517 | nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) | ||
1518 | { | ||
1519 | DECODE_HEAD; | ||
1520 | |||
1521 | status = nfsd4_decode_stateid(argp, &seek->seek_stateid); | ||
1522 | if (status) | ||
1523 | return status; | ||
1524 | |||
1525 | READ_BUF(8 + 4); | ||
1526 | p = xdr_decode_hyper(p, &seek->seek_offset); | ||
1527 | seek->seek_whence = be32_to_cpup(p); | ||
1528 | |||
1529 | DECODE_TAIL; | ||
1530 | } | ||
1531 | |||
1532 | static __be32 | ||
1524 | nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) | 1533 | nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) |
1525 | { | 1534 | { |
1526 | return nfs_ok; | 1535 | return nfs_ok; |
@@ -1593,6 +1602,20 @@ static nfsd4_dec nfsd4_dec_ops[] = { | |||
1593 | [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, | 1602 | [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, |
1594 | [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, | 1603 | [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, |
1595 | [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, | 1604 | [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, |
1605 | |||
1606 | /* new operations for NFSv4.2 */ | ||
1607 | [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1608 | [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1609 | [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1610 | [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1611 | [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1612 | [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1613 | [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1614 | [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1615 | [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1616 | [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1617 | [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, | ||
1618 | [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1596 | }; | 1619 | }; |
1597 | 1620 | ||
1598 | static inline bool | 1621 | static inline bool |
@@ -1670,6 +1693,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
1670 | readbytes += nfsd4_max_reply(argp->rqstp, op); | 1693 | readbytes += nfsd4_max_reply(argp->rqstp, op); |
1671 | } else | 1694 | } else |
1672 | max_reply += nfsd4_max_reply(argp->rqstp, op); | 1695 | max_reply += nfsd4_max_reply(argp->rqstp, op); |
1696 | /* | ||
1697 | * OP_LOCK may return a conflicting lock. (Special case | ||
1698 | * because it will just skip encoding this if it runs | ||
1699 | * out of xdr buffer space, and it is the only operation | ||
1700 | * that behaves this way.) | ||
1701 | */ | ||
1702 | if (op->opnum == OP_LOCK) | ||
1703 | max_reply += NFS4_OPAQUE_LIMIT; | ||
1673 | 1704 | ||
1674 | if (op->status) { | 1705 | if (op->status) { |
1675 | argp->opcnt = i+1; | 1706 | argp->opcnt = i+1; |
@@ -3764,6 +3795,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
3764 | } | 3795 | } |
3765 | 3796 | ||
3766 | static __be32 | 3797 | static __be32 |
3798 | nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
3799 | struct nfsd4_seek *seek) | ||
3800 | { | ||
3801 | __be32 *p; | ||
3802 | |||
3803 | if (nfserr) | ||
3804 | return nfserr; | ||
3805 | |||
3806 | p = xdr_reserve_space(&resp->xdr, 4 + 8); | ||
3807 | *p++ = cpu_to_be32(seek->seek_eof); | ||
3808 | p = xdr_encode_hyper(p, seek->seek_pos); | ||
3809 | |||
3810 | return nfserr; | ||
3811 | } | ||
3812 | |||
3813 | static __be32 | ||
3767 | nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) | 3814 | nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) |
3768 | { | 3815 | { |
3769 | return nfserr; | 3816 | return nfserr; |
@@ -3835,6 +3882,20 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
3835 | [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, | 3882 | [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, |
3836 | [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, | 3883 | [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, |
3837 | [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, | 3884 | [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, |
3885 | |||
3886 | /* NFSv4.2 operations */ | ||
3887 | [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, | ||
3888 | [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop, | ||
3889 | [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, | ||
3890 | [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, | ||
3891 | [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, | ||
3892 | [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, | ||
3893 | [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, | ||
3894 | [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, | ||
3895 | [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop, | ||
3896 | [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, | ||
3897 | [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, | ||
3898 | [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, | ||
3838 | }; | 3899 | }; |
3839 | 3900 | ||
3840 | /* | 3901 | /* |
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index ff9567633245..122f69185ef5 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c | |||
@@ -27,8 +27,12 @@ | |||
27 | */ | 27 | */ |
28 | #define TARGET_BUCKET_SIZE 64 | 28 | #define TARGET_BUCKET_SIZE 64 |
29 | 29 | ||
30 | static struct hlist_head * cache_hash; | 30 | struct nfsd_drc_bucket { |
31 | static struct list_head lru_head; | 31 | struct list_head lru_head; |
32 | spinlock_t cache_lock; | ||
33 | }; | ||
34 | |||
35 | static struct nfsd_drc_bucket *drc_hashtbl; | ||
32 | static struct kmem_cache *drc_slab; | 36 | static struct kmem_cache *drc_slab; |
33 | 37 | ||
34 | /* max number of entries allowed in the cache */ | 38 | /* max number of entries allowed in the cache */ |
@@ -36,6 +40,7 @@ static unsigned int max_drc_entries; | |||
36 | 40 | ||
37 | /* number of significant bits in the hash value */ | 41 | /* number of significant bits in the hash value */ |
38 | static unsigned int maskbits; | 42 | static unsigned int maskbits; |
43 | static unsigned int drc_hashsize; | ||
39 | 44 | ||
40 | /* | 45 | /* |
41 | * Stats and other tracking of on the duplicate reply cache. All of these and | 46 | * Stats and other tracking of on the duplicate reply cache. All of these and |
@@ -43,7 +48,7 @@ static unsigned int maskbits; | |||
43 | */ | 48 | */ |
44 | 49 | ||
45 | /* total number of entries */ | 50 | /* total number of entries */ |
46 | static unsigned int num_drc_entries; | 51 | static atomic_t num_drc_entries; |
47 | 52 | ||
48 | /* cache misses due only to checksum comparison failures */ | 53 | /* cache misses due only to checksum comparison failures */ |
49 | static unsigned int payload_misses; | 54 | static unsigned int payload_misses; |
@@ -75,7 +80,6 @@ static struct shrinker nfsd_reply_cache_shrinker = { | |||
75 | * A cache entry is "single use" if c_state == RC_INPROG | 80 | * A cache entry is "single use" if c_state == RC_INPROG |
76 | * Otherwise, it when accessing _prev or _next, the lock must be held. | 81 | * Otherwise, it when accessing _prev or _next, the lock must be held. |
77 | */ | 82 | */ |
78 | static DEFINE_SPINLOCK(cache_lock); | ||
79 | static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); | 83 | static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); |
80 | 84 | ||
81 | /* | 85 | /* |
@@ -116,6 +120,12 @@ nfsd_hashsize(unsigned int limit) | |||
116 | return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); | 120 | return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); |
117 | } | 121 | } |
118 | 122 | ||
123 | static u32 | ||
124 | nfsd_cache_hash(__be32 xid) | ||
125 | { | ||
126 | return hash_32(be32_to_cpu(xid), maskbits); | ||
127 | } | ||
128 | |||
119 | static struct svc_cacherep * | 129 | static struct svc_cacherep * |
120 | nfsd_reply_cache_alloc(void) | 130 | nfsd_reply_cache_alloc(void) |
121 | { | 131 | { |
@@ -126,7 +136,6 @@ nfsd_reply_cache_alloc(void) | |||
126 | rp->c_state = RC_UNUSED; | 136 | rp->c_state = RC_UNUSED; |
127 | rp->c_type = RC_NOCACHE; | 137 | rp->c_type = RC_NOCACHE; |
128 | INIT_LIST_HEAD(&rp->c_lru); | 138 | INIT_LIST_HEAD(&rp->c_lru); |
129 | INIT_HLIST_NODE(&rp->c_hash); | ||
130 | } | 139 | } |
131 | return rp; | 140 | return rp; |
132 | } | 141 | } |
@@ -138,29 +147,27 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) | |||
138 | drc_mem_usage -= rp->c_replvec.iov_len; | 147 | drc_mem_usage -= rp->c_replvec.iov_len; |
139 | kfree(rp->c_replvec.iov_base); | 148 | kfree(rp->c_replvec.iov_base); |
140 | } | 149 | } |
141 | if (!hlist_unhashed(&rp->c_hash)) | ||
142 | hlist_del(&rp->c_hash); | ||
143 | list_del(&rp->c_lru); | 150 | list_del(&rp->c_lru); |
144 | --num_drc_entries; | 151 | atomic_dec(&num_drc_entries); |
145 | drc_mem_usage -= sizeof(*rp); | 152 | drc_mem_usage -= sizeof(*rp); |
146 | kmem_cache_free(drc_slab, rp); | 153 | kmem_cache_free(drc_slab, rp); |
147 | } | 154 | } |
148 | 155 | ||
149 | static void | 156 | static void |
150 | nfsd_reply_cache_free(struct svc_cacherep *rp) | 157 | nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) |
151 | { | 158 | { |
152 | spin_lock(&cache_lock); | 159 | spin_lock(&b->cache_lock); |
153 | nfsd_reply_cache_free_locked(rp); | 160 | nfsd_reply_cache_free_locked(rp); |
154 | spin_unlock(&cache_lock); | 161 | spin_unlock(&b->cache_lock); |
155 | } | 162 | } |
156 | 163 | ||
157 | int nfsd_reply_cache_init(void) | 164 | int nfsd_reply_cache_init(void) |
158 | { | 165 | { |
159 | unsigned int hashsize; | 166 | unsigned int hashsize; |
167 | unsigned int i; | ||
160 | 168 | ||
161 | INIT_LIST_HEAD(&lru_head); | ||
162 | max_drc_entries = nfsd_cache_size_limit(); | 169 | max_drc_entries = nfsd_cache_size_limit(); |
163 | num_drc_entries = 0; | 170 | atomic_set(&num_drc_entries, 0); |
164 | hashsize = nfsd_hashsize(max_drc_entries); | 171 | hashsize = nfsd_hashsize(max_drc_entries); |
165 | maskbits = ilog2(hashsize); | 172 | maskbits = ilog2(hashsize); |
166 | 173 | ||
@@ -170,9 +177,14 @@ int nfsd_reply_cache_init(void) | |||
170 | if (!drc_slab) | 177 | if (!drc_slab) |
171 | goto out_nomem; | 178 | goto out_nomem; |
172 | 179 | ||
173 | cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); | 180 | drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); |
174 | if (!cache_hash) | 181 | if (!drc_hashtbl) |
175 | goto out_nomem; | 182 | goto out_nomem; |
183 | for (i = 0; i < hashsize; i++) { | ||
184 | INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); | ||
185 | spin_lock_init(&drc_hashtbl[i].cache_lock); | ||
186 | } | ||
187 | drc_hashsize = hashsize; | ||
176 | 188 | ||
177 | return 0; | 189 | return 0; |
178 | out_nomem: | 190 | out_nomem: |
@@ -184,17 +196,22 @@ out_nomem: | |||
184 | void nfsd_reply_cache_shutdown(void) | 196 | void nfsd_reply_cache_shutdown(void) |
185 | { | 197 | { |
186 | struct svc_cacherep *rp; | 198 | struct svc_cacherep *rp; |
199 | unsigned int i; | ||
187 | 200 | ||
188 | unregister_shrinker(&nfsd_reply_cache_shrinker); | 201 | unregister_shrinker(&nfsd_reply_cache_shrinker); |
189 | cancel_delayed_work_sync(&cache_cleaner); | 202 | cancel_delayed_work_sync(&cache_cleaner); |
190 | 203 | ||
191 | while (!list_empty(&lru_head)) { | 204 | for (i = 0; i < drc_hashsize; i++) { |
192 | rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); | 205 | struct list_head *head = &drc_hashtbl[i].lru_head; |
193 | nfsd_reply_cache_free_locked(rp); | 206 | while (!list_empty(head)) { |
207 | rp = list_first_entry(head, struct svc_cacherep, c_lru); | ||
208 | nfsd_reply_cache_free_locked(rp); | ||
209 | } | ||
194 | } | 210 | } |
195 | 211 | ||
196 | kfree (cache_hash); | 212 | kfree (drc_hashtbl); |
197 | cache_hash = NULL; | 213 | drc_hashtbl = NULL; |
214 | drc_hashsize = 0; | ||
198 | 215 | ||
199 | if (drc_slab) { | 216 | if (drc_slab) { |
200 | kmem_cache_destroy(drc_slab); | 217 | kmem_cache_destroy(drc_slab); |
@@ -207,61 +224,63 @@ void nfsd_reply_cache_shutdown(void) | |||
207 | * not already scheduled. | 224 | * not already scheduled. |
208 | */ | 225 | */ |
209 | static void | 226 | static void |
210 | lru_put_end(struct svc_cacherep *rp) | 227 | lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) |
211 | { | 228 | { |
212 | rp->c_timestamp = jiffies; | 229 | rp->c_timestamp = jiffies; |
213 | list_move_tail(&rp->c_lru, &lru_head); | 230 | list_move_tail(&rp->c_lru, &b->lru_head); |
214 | schedule_delayed_work(&cache_cleaner, RC_EXPIRE); | 231 | schedule_delayed_work(&cache_cleaner, RC_EXPIRE); |
215 | } | 232 | } |
216 | 233 | ||
217 | /* | ||
218 | * Move a cache entry from one hash list to another | ||
219 | */ | ||
220 | static void | ||
221 | hash_refile(struct svc_cacherep *rp) | ||
222 | { | ||
223 | hlist_del_init(&rp->c_hash); | ||
224 | /* | ||
225 | * No point in byte swapping c_xid since we're just using it to pick | ||
226 | * a hash bucket. | ||
227 | */ | ||
228 | hlist_add_head(&rp->c_hash, cache_hash + | ||
229 | hash_32((__force u32)rp->c_xid, maskbits)); | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * Walk the LRU list and prune off entries that are older than RC_EXPIRE. | ||
234 | * Also prune the oldest ones when the total exceeds the max number of entries. | ||
235 | */ | ||
236 | static long | 234 | static long |
237 | prune_cache_entries(void) | 235 | prune_bucket(struct nfsd_drc_bucket *b) |
238 | { | 236 | { |
239 | struct svc_cacherep *rp, *tmp; | 237 | struct svc_cacherep *rp, *tmp; |
240 | long freed = 0; | 238 | long freed = 0; |
241 | 239 | ||
242 | list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { | 240 | list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) { |
243 | /* | 241 | /* |
244 | * Don't free entries attached to calls that are still | 242 | * Don't free entries attached to calls that are still |
245 | * in-progress, but do keep scanning the list. | 243 | * in-progress, but do keep scanning the list. |
246 | */ | 244 | */ |
247 | if (rp->c_state == RC_INPROG) | 245 | if (rp->c_state == RC_INPROG) |
248 | continue; | 246 | continue; |
249 | if (num_drc_entries <= max_drc_entries && | 247 | if (atomic_read(&num_drc_entries) <= max_drc_entries && |
250 | time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) | 248 | time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) |
251 | break; | 249 | break; |
252 | nfsd_reply_cache_free_locked(rp); | 250 | nfsd_reply_cache_free_locked(rp); |
253 | freed++; | 251 | freed++; |
254 | } | 252 | } |
253 | return freed; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * Walk the LRU list and prune off entries that are older than RC_EXPIRE. | ||
258 | * Also prune the oldest ones when the total exceeds the max number of entries. | ||
259 | */ | ||
260 | static long | ||
261 | prune_cache_entries(void) | ||
262 | { | ||
263 | unsigned int i; | ||
264 | long freed = 0; | ||
265 | bool cancel = true; | ||
266 | |||
267 | for (i = 0; i < drc_hashsize; i++) { | ||
268 | struct nfsd_drc_bucket *b = &drc_hashtbl[i]; | ||
269 | |||
270 | if (list_empty(&b->lru_head)) | ||
271 | continue; | ||
272 | spin_lock(&b->cache_lock); | ||
273 | freed += prune_bucket(b); | ||
274 | if (!list_empty(&b->lru_head)) | ||
275 | cancel = false; | ||
276 | spin_unlock(&b->cache_lock); | ||
277 | } | ||
255 | 278 | ||
256 | /* | 279 | /* |
257 | * Conditionally rearm the job. If we cleaned out the list, then | 280 | * Conditionally rearm the job to run in RC_EXPIRE since we just |
258 | * cancel any pending run (since there won't be any work to do). | 281 | * ran the pruner. |
259 | * Otherwise, we rearm the job or modify the existing one to run in | ||
260 | * RC_EXPIRE since we just ran the pruner. | ||
261 | */ | 282 | */ |
262 | if (list_empty(&lru_head)) | 283 | if (!cancel) |
263 | cancel_delayed_work(&cache_cleaner); | ||
264 | else | ||
265 | mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); | 284 | mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); |
266 | return freed; | 285 | return freed; |
267 | } | 286 | } |
@@ -269,32 +288,19 @@ prune_cache_entries(void) | |||
269 | static void | 288 | static void |
270 | cache_cleaner_func(struct work_struct *unused) | 289 | cache_cleaner_func(struct work_struct *unused) |
271 | { | 290 | { |
272 | spin_lock(&cache_lock); | ||
273 | prune_cache_entries(); | 291 | prune_cache_entries(); |
274 | spin_unlock(&cache_lock); | ||
275 | } | 292 | } |
276 | 293 | ||
277 | static unsigned long | 294 | static unsigned long |
278 | nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) | 295 | nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) |
279 | { | 296 | { |
280 | unsigned long num; | 297 | return atomic_read(&num_drc_entries); |
281 | |||
282 | spin_lock(&cache_lock); | ||
283 | num = num_drc_entries; | ||
284 | spin_unlock(&cache_lock); | ||
285 | |||
286 | return num; | ||
287 | } | 298 | } |
288 | 299 | ||
289 | static unsigned long | 300 | static unsigned long |
290 | nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) | 301 | nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) |
291 | { | 302 | { |
292 | unsigned long freed; | 303 | return prune_cache_entries(); |
293 | |||
294 | spin_lock(&cache_lock); | ||
295 | freed = prune_cache_entries(); | ||
296 | spin_unlock(&cache_lock); | ||
297 | return freed; | ||
298 | } | 304 | } |
299 | /* | 305 | /* |
300 | * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes | 306 | * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes |
@@ -332,20 +338,24 @@ nfsd_cache_csum(struct svc_rqst *rqstp) | |||
332 | static bool | 338 | static bool |
333 | nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) | 339 | nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) |
334 | { | 340 | { |
335 | /* Check RPC header info first */ | 341 | /* Check RPC XID first */ |
336 | if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || | 342 | if (rqstp->rq_xid != rp->c_xid) |
337 | rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers || | ||
338 | rqstp->rq_arg.len != rp->c_len || | ||
339 | !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || | ||
340 | rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) | ||
341 | return false; | 343 | return false; |
342 | |||
343 | /* compare checksum of NFS data */ | 344 | /* compare checksum of NFS data */ |
344 | if (csum != rp->c_csum) { | 345 | if (csum != rp->c_csum) { |
345 | ++payload_misses; | 346 | ++payload_misses; |
346 | return false; | 347 | return false; |
347 | } | 348 | } |
348 | 349 | ||
350 | /* Other discriminators */ | ||
351 | if (rqstp->rq_proc != rp->c_proc || | ||
352 | rqstp->rq_prot != rp->c_prot || | ||
353 | rqstp->rq_vers != rp->c_vers || | ||
354 | rqstp->rq_arg.len != rp->c_len || | ||
355 | !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || | ||
356 | rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) | ||
357 | return false; | ||
358 | |||
349 | return true; | 359 | return true; |
350 | } | 360 | } |
351 | 361 | ||
@@ -355,18 +365,14 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) | |||
355 | * NULL on failure. | 365 | * NULL on failure. |
356 | */ | 366 | */ |
357 | static struct svc_cacherep * | 367 | static struct svc_cacherep * |
358 | nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) | 368 | nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp, |
369 | __wsum csum) | ||
359 | { | 370 | { |
360 | struct svc_cacherep *rp, *ret = NULL; | 371 | struct svc_cacherep *rp, *ret = NULL; |
361 | struct hlist_head *rh; | 372 | struct list_head *rh = &b->lru_head; |
362 | unsigned int entries = 0; | 373 | unsigned int entries = 0; |
363 | 374 | ||
364 | /* | 375 | list_for_each_entry(rp, rh, c_lru) { |
365 | * No point in byte swapping rq_xid since we're just using it to pick | ||
366 | * a hash bucket. | ||
367 | */ | ||
368 | rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)]; | ||
369 | hlist_for_each_entry(rp, rh, c_hash) { | ||
370 | ++entries; | 376 | ++entries; |
371 | if (nfsd_cache_match(rqstp, csum, rp)) { | 377 | if (nfsd_cache_match(rqstp, csum, rp)) { |
372 | ret = rp; | 378 | ret = rp; |
@@ -377,11 +383,12 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) | |||
377 | /* tally hash chain length stats */ | 383 | /* tally hash chain length stats */ |
378 | if (entries > longest_chain) { | 384 | if (entries > longest_chain) { |
379 | longest_chain = entries; | 385 | longest_chain = entries; |
380 | longest_chain_cachesize = num_drc_entries; | 386 | longest_chain_cachesize = atomic_read(&num_drc_entries); |
381 | } else if (entries == longest_chain) { | 387 | } else if (entries == longest_chain) { |
382 | /* prefer to keep the smallest cachesize possible here */ | 388 | /* prefer to keep the smallest cachesize possible here */ |
383 | longest_chain_cachesize = min(longest_chain_cachesize, | 389 | longest_chain_cachesize = min_t(unsigned int, |
384 | num_drc_entries); | 390 | longest_chain_cachesize, |
391 | atomic_read(&num_drc_entries)); | ||
385 | } | 392 | } |
386 | 393 | ||
387 | return ret; | 394 | return ret; |
@@ -403,6 +410,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) | |||
403 | vers = rqstp->rq_vers, | 410 | vers = rqstp->rq_vers, |
404 | proc = rqstp->rq_proc; | 411 | proc = rqstp->rq_proc; |
405 | __wsum csum; | 412 | __wsum csum; |
413 | u32 hash = nfsd_cache_hash(xid); | ||
414 | struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; | ||
406 | unsigned long age; | 415 | unsigned long age; |
407 | int type = rqstp->rq_cachetype; | 416 | int type = rqstp->rq_cachetype; |
408 | int rtn = RC_DOIT; | 417 | int rtn = RC_DOIT; |
@@ -420,16 +429,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) | |||
420 | * preallocate an entry. | 429 | * preallocate an entry. |
421 | */ | 430 | */ |
422 | rp = nfsd_reply_cache_alloc(); | 431 | rp = nfsd_reply_cache_alloc(); |
423 | spin_lock(&cache_lock); | 432 | spin_lock(&b->cache_lock); |
424 | if (likely(rp)) { | 433 | if (likely(rp)) { |
425 | ++num_drc_entries; | 434 | atomic_inc(&num_drc_entries); |
426 | drc_mem_usage += sizeof(*rp); | 435 | drc_mem_usage += sizeof(*rp); |
427 | } | 436 | } |
428 | 437 | ||
429 | /* go ahead and prune the cache */ | 438 | /* go ahead and prune the cache */ |
430 | prune_cache_entries(); | 439 | prune_bucket(b); |
431 | 440 | ||
432 | found = nfsd_cache_search(rqstp, csum); | 441 | found = nfsd_cache_search(b, rqstp, csum); |
433 | if (found) { | 442 | if (found) { |
434 | if (likely(rp)) | 443 | if (likely(rp)) |
435 | nfsd_reply_cache_free_locked(rp); | 444 | nfsd_reply_cache_free_locked(rp); |
@@ -454,8 +463,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) | |||
454 | rp->c_len = rqstp->rq_arg.len; | 463 | rp->c_len = rqstp->rq_arg.len; |
455 | rp->c_csum = csum; | 464 | rp->c_csum = csum; |
456 | 465 | ||
457 | hash_refile(rp); | 466 | lru_put_end(b, rp); |
458 | lru_put_end(rp); | ||
459 | 467 | ||
460 | /* release any buffer */ | 468 | /* release any buffer */ |
461 | if (rp->c_type == RC_REPLBUFF) { | 469 | if (rp->c_type == RC_REPLBUFF) { |
@@ -465,14 +473,14 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) | |||
465 | } | 473 | } |
466 | rp->c_type = RC_NOCACHE; | 474 | rp->c_type = RC_NOCACHE; |
467 | out: | 475 | out: |
468 | spin_unlock(&cache_lock); | 476 | spin_unlock(&b->cache_lock); |
469 | return rtn; | 477 | return rtn; |
470 | 478 | ||
471 | found_entry: | 479 | found_entry: |
472 | nfsdstats.rchits++; | 480 | nfsdstats.rchits++; |
473 | /* We found a matching entry which is either in progress or done. */ | 481 | /* We found a matching entry which is either in progress or done. */ |
474 | age = jiffies - rp->c_timestamp; | 482 | age = jiffies - rp->c_timestamp; |
475 | lru_put_end(rp); | 483 | lru_put_end(b, rp); |
476 | 484 | ||
477 | rtn = RC_DROPIT; | 485 | rtn = RC_DROPIT; |
478 | /* Request being processed or excessive rexmits */ | 486 | /* Request being processed or excessive rexmits */ |
@@ -527,18 +535,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) | |||
527 | { | 535 | { |
528 | struct svc_cacherep *rp = rqstp->rq_cacherep; | 536 | struct svc_cacherep *rp = rqstp->rq_cacherep; |
529 | struct kvec *resv = &rqstp->rq_res.head[0], *cachv; | 537 | struct kvec *resv = &rqstp->rq_res.head[0], *cachv; |
538 | u32 hash; | ||
539 | struct nfsd_drc_bucket *b; | ||
530 | int len; | 540 | int len; |
531 | size_t bufsize = 0; | 541 | size_t bufsize = 0; |
532 | 542 | ||
533 | if (!rp) | 543 | if (!rp) |
534 | return; | 544 | return; |
535 | 545 | ||
546 | hash = nfsd_cache_hash(rp->c_xid); | ||
547 | b = &drc_hashtbl[hash]; | ||
548 | |||
536 | len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); | 549 | len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); |
537 | len >>= 2; | 550 | len >>= 2; |
538 | 551 | ||
539 | /* Don't cache excessive amounts of data and XDR failures */ | 552 | /* Don't cache excessive amounts of data and XDR failures */ |
540 | if (!statp || len > (256 >> 2)) { | 553 | if (!statp || len > (256 >> 2)) { |
541 | nfsd_reply_cache_free(rp); | 554 | nfsd_reply_cache_free(b, rp); |
542 | return; | 555 | return; |
543 | } | 556 | } |
544 | 557 | ||
@@ -553,23 +566,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) | |||
553 | bufsize = len << 2; | 566 | bufsize = len << 2; |
554 | cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); | 567 | cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); |
555 | if (!cachv->iov_base) { | 568 | if (!cachv->iov_base) { |
556 | nfsd_reply_cache_free(rp); | 569 | nfsd_reply_cache_free(b, rp); |
557 | return; | 570 | return; |
558 | } | 571 | } |
559 | cachv->iov_len = bufsize; | 572 | cachv->iov_len = bufsize; |
560 | memcpy(cachv->iov_base, statp, bufsize); | 573 | memcpy(cachv->iov_base, statp, bufsize); |
561 | break; | 574 | break; |
562 | case RC_NOCACHE: | 575 | case RC_NOCACHE: |
563 | nfsd_reply_cache_free(rp); | 576 | nfsd_reply_cache_free(b, rp); |
564 | return; | 577 | return; |
565 | } | 578 | } |
566 | spin_lock(&cache_lock); | 579 | spin_lock(&b->cache_lock); |
567 | drc_mem_usage += bufsize; | 580 | drc_mem_usage += bufsize; |
568 | lru_put_end(rp); | 581 | lru_put_end(b, rp); |
569 | rp->c_secure = rqstp->rq_secure; | 582 | rp->c_secure = rqstp->rq_secure; |
570 | rp->c_type = cachetype; | 583 | rp->c_type = cachetype; |
571 | rp->c_state = RC_DONE; | 584 | rp->c_state = RC_DONE; |
572 | spin_unlock(&cache_lock); | 585 | spin_unlock(&b->cache_lock); |
573 | return; | 586 | return; |
574 | } | 587 | } |
575 | 588 | ||
@@ -600,9 +613,9 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) | |||
600 | */ | 613 | */ |
601 | static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) | 614 | static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) |
602 | { | 615 | { |
603 | spin_lock(&cache_lock); | ||
604 | seq_printf(m, "max entries: %u\n", max_drc_entries); | 616 | seq_printf(m, "max entries: %u\n", max_drc_entries); |
605 | seq_printf(m, "num entries: %u\n", num_drc_entries); | 617 | seq_printf(m, "num entries: %u\n", |
618 | atomic_read(&num_drc_entries)); | ||
606 | seq_printf(m, "hash buckets: %u\n", 1 << maskbits); | 619 | seq_printf(m, "hash buckets: %u\n", 1 << maskbits); |
607 | seq_printf(m, "mem usage: %u\n", drc_mem_usage); | 620 | seq_printf(m, "mem usage: %u\n", drc_mem_usage); |
608 | seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); | 621 | seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); |
@@ -611,7 +624,6 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) | |||
611 | seq_printf(m, "payload misses: %u\n", payload_misses); | 624 | seq_printf(m, "payload misses: %u\n", payload_misses); |
612 | seq_printf(m, "longest chain len: %u\n", longest_chain); | 625 | seq_printf(m, "longest chain len: %u\n", longest_chain); |
613 | seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); | 626 | seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); |
614 | spin_unlock(&cache_lock); | ||
615 | return 0; | 627 | return 0; |
616 | } | 628 | } |
617 | 629 | ||
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 4e042105fb6e..ca73ca79a0ee 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -49,6 +49,7 @@ enum { | |||
49 | NFSD_Leasetime, | 49 | NFSD_Leasetime, |
50 | NFSD_Gracetime, | 50 | NFSD_Gracetime, |
51 | NFSD_RecoveryDir, | 51 | NFSD_RecoveryDir, |
52 | NFSD_V4EndGrace, | ||
52 | #endif | 53 | #endif |
53 | }; | 54 | }; |
54 | 55 | ||
@@ -68,6 +69,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size); | |||
68 | static ssize_t write_leasetime(struct file *file, char *buf, size_t size); | 69 | static ssize_t write_leasetime(struct file *file, char *buf, size_t size); |
69 | static ssize_t write_gracetime(struct file *file, char *buf, size_t size); | 70 | static ssize_t write_gracetime(struct file *file, char *buf, size_t size); |
70 | static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); | 71 | static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); |
72 | static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size); | ||
71 | #endif | 73 | #endif |
72 | 74 | ||
73 | static ssize_t (*write_op[])(struct file *, char *, size_t) = { | 75 | static ssize_t (*write_op[])(struct file *, char *, size_t) = { |
@@ -84,6 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { | |||
84 | [NFSD_Leasetime] = write_leasetime, | 86 | [NFSD_Leasetime] = write_leasetime, |
85 | [NFSD_Gracetime] = write_gracetime, | 87 | [NFSD_Gracetime] = write_gracetime, |
86 | [NFSD_RecoveryDir] = write_recoverydir, | 88 | [NFSD_RecoveryDir] = write_recoverydir, |
89 | [NFSD_V4EndGrace] = write_v4_end_grace, | ||
87 | #endif | 90 | #endif |
88 | }; | 91 | }; |
89 | 92 | ||
@@ -1077,6 +1080,47 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) | |||
1077 | return rv; | 1080 | return rv; |
1078 | } | 1081 | } |
1079 | 1082 | ||
1083 | /** | ||
1084 | * write_v4_end_grace - release grace period for nfsd's v4.x lock manager | ||
1085 | * | ||
1086 | * Input: | ||
1087 | * buf: ignored | ||
1088 | * size: zero | ||
1089 | * OR | ||
1090 | * | ||
1091 | * Input: | ||
1092 | * buf: any value | ||
1093 | * size: non-zero length of C string in @buf | ||
1094 | * Output: | ||
1095 | * passed-in buffer filled with "Y" or "N" with a newline | ||
1096 | * and NULL-terminated C string. This indicates whether | ||
1097 | * the grace period has ended in the current net | ||
1098 | * namespace. Return code is the size in bytes of the | ||
1099 | * string. Writing a string that starts with 'Y', 'y', or | ||
1100 | * '1' to the file will end the grace period for nfsd's v4 | ||
1101 | * lock manager. | ||
1102 | */ | ||
1103 | static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) | ||
1104 | { | ||
1105 | struct net *net = file->f_dentry->d_sb->s_fs_info; | ||
1106 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | ||
1107 | |||
1108 | if (size > 0) { | ||
1109 | switch(buf[0]) { | ||
1110 | case 'Y': | ||
1111 | case 'y': | ||
1112 | case '1': | ||
1113 | nfsd4_end_grace(nn); | ||
1114 | break; | ||
1115 | default: | ||
1116 | return -EINVAL; | ||
1117 | } | ||
1118 | } | ||
1119 | |||
1120 | return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n", | ||
1121 | nn->grace_ended ? 'Y' : 'N'); | ||
1122 | } | ||
1123 | |||
1080 | #endif | 1124 | #endif |
1081 | 1125 | ||
1082 | /*----------------------------------------------------------------------------*/ | 1126 | /*----------------------------------------------------------------------------*/ |
@@ -1110,6 +1154,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) | |||
1110 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, | 1154 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, |
1111 | [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, | 1155 | [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, |
1112 | [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, | 1156 | [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, |
1157 | [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO}, | ||
1113 | #endif | 1158 | #endif |
1114 | /* last one */ {""} | 1159 | /* last one */ {""} |
1115 | }; | 1160 | }; |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 847daf37e566..747f3b95bd11 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -251,7 +251,7 @@ void nfsd_lockd_shutdown(void); | |||
251 | #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) | 251 | #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) |
252 | #define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) | 252 | #define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) |
253 | #define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) | 253 | #define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) |
254 | #define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) | 254 | #define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP) |
255 | #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) | 255 | #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) |
256 | #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) | 256 | #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) |
257 | #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) | 257 | #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index e883a5868be6..88026fc6a981 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -209,8 +209,10 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) | |||
209 | * fix that case easily. | 209 | * fix that case easily. |
210 | */ | 210 | */ |
211 | struct cred *new = prepare_creds(); | 211 | struct cred *new = prepare_creds(); |
212 | if (!new) | 212 | if (!new) { |
213 | return nfserrno(-ENOMEM); | 213 | error = nfserrno(-ENOMEM); |
214 | goto out; | ||
215 | } | ||
214 | new->cap_effective = | 216 | new->cap_effective = |
215 | cap_raise_nfsd_set(new->cap_effective, | 217 | cap_raise_nfsd_set(new->cap_effective, |
216 | new->cap_permitted); | 218 | new->cap_permitted); |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4a89e00d7461..0a47c6a6b301 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -62,16 +62,21 @@ typedef struct { | |||
62 | (s)->si_generation | 62 | (s)->si_generation |
63 | 63 | ||
64 | struct nfsd4_callback { | 64 | struct nfsd4_callback { |
65 | void *cb_op; | ||
66 | struct nfs4_client *cb_clp; | 65 | struct nfs4_client *cb_clp; |
67 | struct list_head cb_per_client; | 66 | struct list_head cb_per_client; |
68 | u32 cb_minorversion; | 67 | u32 cb_minorversion; |
69 | struct rpc_message cb_msg; | 68 | struct rpc_message cb_msg; |
70 | const struct rpc_call_ops *cb_ops; | 69 | struct nfsd4_callback_ops *cb_ops; |
71 | struct work_struct cb_work; | 70 | struct work_struct cb_work; |
72 | bool cb_done; | 71 | bool cb_done; |
73 | }; | 72 | }; |
74 | 73 | ||
74 | struct nfsd4_callback_ops { | ||
75 | void (*prepare)(struct nfsd4_callback *); | ||
76 | int (*done)(struct nfsd4_callback *, struct rpc_task *); | ||
77 | void (*release)(struct nfsd4_callback *); | ||
78 | }; | ||
79 | |||
75 | /* | 80 | /* |
76 | * A core object that represents a "common" stateid. These are generally | 81 | * A core object that represents a "common" stateid. These are generally |
77 | * embedded within the different (more specific) stateid objects and contain | 82 | * embedded within the different (more specific) stateid objects and contain |
@@ -127,6 +132,9 @@ struct nfs4_delegation { | |||
127 | struct nfsd4_callback dl_recall; | 132 | struct nfsd4_callback dl_recall; |
128 | }; | 133 | }; |
129 | 134 | ||
135 | #define cb_to_delegation(cb) \ | ||
136 | container_of(cb, struct nfs4_delegation, dl_recall) | ||
137 | |||
130 | /* client delegation callback info */ | 138 | /* client delegation callback info */ |
131 | struct nfs4_cb_conn { | 139 | struct nfs4_cb_conn { |
132 | /* SETCLIENTID info */ | 140 | /* SETCLIENTID info */ |
@@ -306,6 +314,7 @@ struct nfs4_client { | |||
306 | #define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ | 314 | #define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ |
307 | #define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ | 315 | #define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ |
308 | #define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ | 316 | #define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ |
317 | #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ | ||
309 | #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ | 318 | #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ |
310 | 1 << NFSD4_CLIENT_CB_KILL) | 319 | 1 << NFSD4_CLIENT_CB_KILL) |
311 | unsigned long cl_flags; | 320 | unsigned long cl_flags; |
@@ -517,6 +526,13 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) | |||
517 | #define RD_STATE 0x00000010 | 526 | #define RD_STATE 0x00000010 |
518 | #define WR_STATE 0x00000020 | 527 | #define WR_STATE 0x00000020 |
519 | 528 | ||
529 | enum nfsd4_cb_op { | ||
530 | NFSPROC4_CLNT_CB_NULL = 0, | ||
531 | NFSPROC4_CLNT_CB_RECALL, | ||
532 | NFSPROC4_CLNT_CB_SEQUENCE, | ||
533 | }; | ||
534 | |||
535 | |||
520 | struct nfsd4_compound_state; | 536 | struct nfsd4_compound_state; |
521 | struct nfsd_net; | 537 | struct nfsd_net; |
522 | 538 | ||
@@ -531,12 +547,12 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, | |||
531 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid, | 547 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid, |
532 | struct nfsd4_compound_state *cstate, struct nfsd_net *nn); | 548 | struct nfsd4_compound_state *cstate, struct nfsd_net *nn); |
533 | extern int set_callback_cred(void); | 549 | extern int set_callback_cred(void); |
534 | void nfsd4_run_cb_null(struct work_struct *w); | ||
535 | void nfsd4_run_cb_recall(struct work_struct *w); | ||
536 | extern void nfsd4_probe_callback(struct nfs4_client *clp); | 550 | extern void nfsd4_probe_callback(struct nfs4_client *clp); |
537 | extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); | 551 | extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); |
538 | extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); | 552 | extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); |
539 | extern void nfsd4_cb_recall(struct nfs4_delegation *dp); | 553 | extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, |
554 | struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); | ||
555 | extern void nfsd4_run_cb(struct nfsd4_callback *cb); | ||
540 | extern int nfsd4_create_callback_queue(void); | 556 | extern int nfsd4_create_callback_queue(void); |
541 | extern void nfsd4_destroy_callback_queue(void); | 557 | extern void nfsd4_destroy_callback_queue(void); |
542 | extern void nfsd4_shutdown_callback(struct nfs4_client *); | 558 | extern void nfsd4_shutdown_callback(struct nfs4_client *); |
@@ -545,13 +561,16 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, | |||
545 | struct nfsd_net *nn); | 561 | struct nfsd_net *nn); |
546 | extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); | 562 | extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); |
547 | 563 | ||
564 | /* grace period management */ | ||
565 | void nfsd4_end_grace(struct nfsd_net *nn); | ||
566 | |||
548 | /* nfs4recover operations */ | 567 | /* nfs4recover operations */ |
549 | extern int nfsd4_client_tracking_init(struct net *net); | 568 | extern int nfsd4_client_tracking_init(struct net *net); |
550 | extern void nfsd4_client_tracking_exit(struct net *net); | 569 | extern void nfsd4_client_tracking_exit(struct net *net); |
551 | extern void nfsd4_client_record_create(struct nfs4_client *clp); | 570 | extern void nfsd4_client_record_create(struct nfs4_client *clp); |
552 | extern void nfsd4_client_record_remove(struct nfs4_client *clp); | 571 | extern void nfsd4_client_record_remove(struct nfs4_client *clp); |
553 | extern int nfsd4_client_record_check(struct nfs4_client *clp); | 572 | extern int nfsd4_client_record_check(struct nfs4_client *clp); |
554 | extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); | 573 | extern void nfsd4_record_grace_done(struct nfsd_net *nn); |
555 | 574 | ||
556 | /* nfs fault injection functions */ | 575 | /* nfs fault injection functions */ |
557 | #ifdef CONFIG_NFSD_FAULT_INJECTION | 576 | #ifdef CONFIG_NFSD_FAULT_INJECTION |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f501a9b5c9df..965cffd17a0c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -445,6 +445,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
445 | if (err) | 445 | if (err) |
446 | goto out; | 446 | goto out; |
447 | size_change = 1; | 447 | size_change = 1; |
448 | |||
449 | /* | ||
450 | * RFC5661, Section 18.30.4: | ||
451 | * Changing the size of a file with SETATTR indirectly | ||
452 | * changes the time_modify and change attributes. | ||
453 | * | ||
454 | * (and similar for the older RFCs) | ||
455 | */ | ||
456 | if (iap->ia_size != i_size_read(inode)) | ||
457 | iap->ia_valid |= ATTR_MTIME; | ||
448 | } | 458 | } |
449 | 459 | ||
450 | iap->ia_valid |= ATTR_CTIME; | 460 | iap->ia_valid |= ATTR_CTIME; |
@@ -649,6 +659,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, | |||
649 | { | 659 | { |
650 | struct path path; | 660 | struct path path; |
651 | struct inode *inode; | 661 | struct inode *inode; |
662 | struct file *file; | ||
652 | int flags = O_RDONLY|O_LARGEFILE; | 663 | int flags = O_RDONLY|O_LARGEFILE; |
653 | __be32 err; | 664 | __be32 err; |
654 | int host_err = 0; | 665 | int host_err = 0; |
@@ -703,19 +714,25 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, | |||
703 | else | 714 | else |
704 | flags = O_WRONLY|O_LARGEFILE; | 715 | flags = O_WRONLY|O_LARGEFILE; |
705 | } | 716 | } |
706 | *filp = dentry_open(&path, flags, current_cred()); | ||
707 | if (IS_ERR(*filp)) { | ||
708 | host_err = PTR_ERR(*filp); | ||
709 | *filp = NULL; | ||
710 | } else { | ||
711 | host_err = ima_file_check(*filp, may_flags); | ||
712 | 717 | ||
713 | if (may_flags & NFSD_MAY_64BIT_COOKIE) | 718 | file = dentry_open(&path, flags, current_cred()); |
714 | (*filp)->f_mode |= FMODE_64BITHASH; | 719 | if (IS_ERR(file)) { |
715 | else | 720 | host_err = PTR_ERR(file); |
716 | (*filp)->f_mode |= FMODE_32BITHASH; | 721 | goto out_nfserr; |
717 | } | 722 | } |
718 | 723 | ||
724 | host_err = ima_file_check(file, may_flags); | ||
725 | if (host_err) { | ||
726 | nfsd_close(file); | ||
727 | goto out_nfserr; | ||
728 | } | ||
729 | |||
730 | if (may_flags & NFSD_MAY_64BIT_COOKIE) | ||
731 | file->f_mode |= FMODE_64BITHASH; | ||
732 | else | ||
733 | file->f_mode |= FMODE_32BITHASH; | ||
734 | |||
735 | *filp = file; | ||
719 | out_nfserr: | 736 | out_nfserr: |
720 | err = nfserrno(host_err); | 737 | err = nfserrno(host_err); |
721 | out: | 738 | out: |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 465e7799742a..5720e9457f33 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete { | |||
428 | u32 rca_one_fs; | 428 | u32 rca_one_fs; |
429 | }; | 429 | }; |
430 | 430 | ||
431 | struct nfsd4_seek { | ||
432 | /* request */ | ||
433 | stateid_t seek_stateid; | ||
434 | loff_t seek_offset; | ||
435 | u32 seek_whence; | ||
436 | |||
437 | /* response */ | ||
438 | u32 seek_eof; | ||
439 | loff_t seek_pos; | ||
440 | }; | ||
441 | |||
431 | struct nfsd4_op { | 442 | struct nfsd4_op { |
432 | int opnum; | 443 | int opnum; |
433 | __be32 status; | 444 | __be32 status; |
@@ -473,6 +484,9 @@ struct nfsd4_op { | |||
473 | struct nfsd4_reclaim_complete reclaim_complete; | 484 | struct nfsd4_reclaim_complete reclaim_complete; |
474 | struct nfsd4_test_stateid test_stateid; | 485 | struct nfsd4_test_stateid test_stateid; |
475 | struct nfsd4_free_stateid free_stateid; | 486 | struct nfsd4_free_stateid free_stateid; |
487 | |||
488 | /* NFSv4.2 */ | ||
489 | struct nfsd4_seek seek; | ||
476 | } u; | 490 | } u; |
477 | struct nfs4_replay * replay; | 491 | struct nfs4_replay * replay; |
478 | }; | 492 | }; |
diff --git a/fs/stack.c b/fs/stack.c index 5b5388250e29..a54e33ed10f1 100644 --- a/fs/stack.c +++ b/fs/stack.c | |||
@@ -44,7 +44,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src) | |||
44 | * include/linux/fs.h). We don't necessarily hold i_mutex when this | 44 | * include/linux/fs.h). We don't necessarily hold i_mutex when this |
45 | * is called, so take i_lock for that case. | 45 | * is called, so take i_lock for that case. |
46 | * | 46 | * |
47 | * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the | 47 | * And if CONFIG_LBDAF (on 32-bit), continue our effort to keep the |
48 | * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock | 48 | * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock |
49 | * for that case too, and do both at once by combining the tests. | 49 | * for that case too, and do both at once by combining the tests. |
50 | * | 50 | * |
diff --git a/fs/timerfd.c b/fs/timerfd.c index 80c350216ea8..b46ffa94372a 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -333,8 +333,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg | |||
333 | spin_lock_irq(&ctx->wqh.lock); | 333 | spin_lock_irq(&ctx->wqh.lock); |
334 | if (!timerfd_canceled(ctx)) { | 334 | if (!timerfd_canceled(ctx)) { |
335 | ctx->ticks = ticks; | 335 | ctx->ticks = ticks; |
336 | if (ticks) | 336 | wake_up_locked(&ctx->wqh); |
337 | wake_up_locked(&ctx->wqh); | ||
338 | } else | 337 | } else |
339 | ret = -ECANCELED; | 338 | ret = -ECANCELED; |
340 | spin_unlock_irq(&ctx->wqh.lock); | 339 | spin_unlock_irq(&ctx->wqh.lock); |