diff options
author | Jaegeuk Kim <jaegeuk@kernel.org> | 2014-09-21 01:06:39 -0400 |
---|---|---|
committer | Jaegeuk Kim <jaegeuk@kernel.org> | 2014-09-30 18:06:09 -0400 |
commit | 4b2fecc84655055a6a1fe9151786992ac04b56ce (patch) | |
tree | 05f5f041260756216c0b3f1a0da2360be31580b6 | |
parent | 75ab4cb8301adb3a02a96c5c03c837ed941f1bc5 (diff) |
f2fs: introduce FITRIM in f2fs_ioctl
This patch introduces FITRIM in f2fs_ioctl.
In this case, f2fs will issue small discards and prefree discards as many as
possible for the given area.
Reviewed-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r-- | fs/f2fs/checkpoint.c | 4 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 9 | ||||
-rw-r--r-- | fs/f2fs/file.c | 29 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 104 | ||||
-rw-r--r-- | fs/f2fs/super.c | 1 | ||||
-rw-r--r-- | include/trace/events/f2fs.h | 3 |
6 files changed, 136 insertions, 14 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index efc530cb74a9..4abf0ba01525 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -997,7 +997,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
997 | 997 | ||
998 | mutex_lock(&sbi->cp_mutex); | 998 | mutex_lock(&sbi->cp_mutex); |
999 | 999 | ||
1000 | if (!sbi->s_dirty) | 1000 | if (!sbi->s_dirty && cpc->reason != CP_DISCARD) |
1001 | goto out; | 1001 | goto out; |
1002 | if (unlikely(f2fs_cp_error(sbi))) | 1002 | if (unlikely(f2fs_cp_error(sbi))) |
1003 | goto out; | 1003 | goto out; |
@@ -1020,7 +1020,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1020 | 1020 | ||
1021 | /* write cached NAT/SIT entries to NAT/SIT area */ | 1021 | /* write cached NAT/SIT entries to NAT/SIT area */ |
1022 | flush_nat_entries(sbi); | 1022 | flush_nat_entries(sbi); |
1023 | flush_sit_entries(sbi); | 1023 | flush_sit_entries(sbi, cpc); |
1024 | 1024 | ||
1025 | /* unlock all the fs_lock[] in do_checkpoint() */ | 1025 | /* unlock all the fs_lock[] in do_checkpoint() */ |
1026 | do_checkpoint(sbi, cpc); | 1026 | do_checkpoint(sbi, cpc); |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 529892418862..7b1e1d20a9c1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -99,10 +99,15 @@ enum { | |||
99 | enum { | 99 | enum { |
100 | CP_UMOUNT, | 100 | CP_UMOUNT, |
101 | CP_SYNC, | 101 | CP_SYNC, |
102 | CP_DISCARD, | ||
102 | }; | 103 | }; |
103 | 104 | ||
104 | struct cp_control { | 105 | struct cp_control { |
105 | int reason; | 106 | int reason; |
107 | __u64 trim_start; | ||
108 | __u64 trim_end; | ||
109 | __u64 trim_minlen; | ||
110 | __u64 trimmed; | ||
106 | }; | 111 | }; |
107 | 112 | ||
108 | /* | 113 | /* |
@@ -1276,9 +1281,11 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *); | |||
1276 | void invalidate_blocks(struct f2fs_sb_info *, block_t); | 1281 | void invalidate_blocks(struct f2fs_sb_info *, block_t); |
1277 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); | 1282 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); |
1278 | void clear_prefree_segments(struct f2fs_sb_info *); | 1283 | void clear_prefree_segments(struct f2fs_sb_info *); |
1284 | void release_discard_addrs(struct f2fs_sb_info *); | ||
1279 | void discard_next_dnode(struct f2fs_sb_info *, block_t); | 1285 | void discard_next_dnode(struct f2fs_sb_info *, block_t); |
1280 | int npages_for_summary_flush(struct f2fs_sb_info *); | 1286 | int npages_for_summary_flush(struct f2fs_sb_info *); |
1281 | void allocate_new_segments(struct f2fs_sb_info *); | 1287 | void allocate_new_segments(struct f2fs_sb_info *); |
1288 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); | ||
1282 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); | 1289 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); |
1283 | void write_meta_page(struct f2fs_sb_info *, struct page *); | 1290 | void write_meta_page(struct f2fs_sb_info *, struct page *); |
1284 | void write_node_page(struct f2fs_sb_info *, struct page *, | 1291 | void write_node_page(struct f2fs_sb_info *, struct page *, |
@@ -1295,7 +1302,7 @@ void write_data_summaries(struct f2fs_sb_info *, block_t); | |||
1295 | void write_node_summaries(struct f2fs_sb_info *, block_t); | 1302 | void write_node_summaries(struct f2fs_sb_info *, block_t); |
1296 | int lookup_journal_in_cursum(struct f2fs_summary_block *, | 1303 | int lookup_journal_in_cursum(struct f2fs_summary_block *, |
1297 | int, unsigned int, int); | 1304 | int, unsigned int, int); |
1298 | void flush_sit_entries(struct f2fs_sb_info *); | 1305 | void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *); |
1299 | int build_segment_manager(struct f2fs_sb_info *); | 1306 | int build_segment_manager(struct f2fs_sb_info *); |
1300 | void destroy_segment_manager(struct f2fs_sb_info *); | 1307 | void destroy_segment_manager(struct f2fs_sb_info *); |
1301 | int __init create_segment_manager_caches(void); | 1308 | int __init create_segment_manager_caches(void); |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ac8c6804097f..11842076d960 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -860,6 +860,35 @@ out: | |||
860 | mnt_drop_write_file(filp); | 860 | mnt_drop_write_file(filp); |
861 | return ret; | 861 | return ret; |
862 | } | 862 | } |
863 | case FITRIM: | ||
864 | { | ||
865 | struct super_block *sb = inode->i_sb; | ||
866 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
867 | struct fstrim_range range; | ||
868 | int ret = 0; | ||
869 | |||
870 | if (!capable(CAP_SYS_ADMIN)) | ||
871 | return -EPERM; | ||
872 | |||
873 | if (!blk_queue_discard(q)) | ||
874 | return -EOPNOTSUPP; | ||
875 | |||
876 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, | ||
877 | sizeof(range))) | ||
878 | return -EFAULT; | ||
879 | |||
880 | range.minlen = max((unsigned int)range.minlen, | ||
881 | q->limits.discard_granularity); | ||
882 | ret = f2fs_trim_fs(F2FS_SB(sb), &range); | ||
883 | if (ret < 0) | ||
884 | return ret; | ||
885 | |||
886 | if (copy_to_user((struct fstrim_range __user *)arg, &range, | ||
887 | sizeof(range))) | ||
888 | return -EFAULT; | ||
889 | |||
890 | return 0; | ||
891 | } | ||
863 | default: | 892 | default: |
864 | return -ENOTTY; | 893 | return -ENOTTY; |
865 | } | 894 | } |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3125a3d35245..fc87da189884 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -386,22 +386,48 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) | |||
386 | } | 386 | } |
387 | } | 387 | } |
388 | 388 | ||
389 | static void add_discard_addrs(struct f2fs_sb_info *sbi, | 389 | static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
390 | unsigned int segno, struct seg_entry *se) | ||
391 | { | 390 | { |
392 | struct list_head *head = &SM_I(sbi)->discard_list; | 391 | struct list_head *head = &SM_I(sbi)->discard_list; |
393 | struct discard_entry *new; | 392 | struct discard_entry *new; |
394 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); | 393 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); |
395 | int max_blocks = sbi->blocks_per_seg; | 394 | int max_blocks = sbi->blocks_per_seg; |
395 | struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); | ||
396 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; | 396 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; |
397 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; | 397 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; |
398 | unsigned long dmap[entries]; | 398 | unsigned long dmap[entries]; |
399 | unsigned int start = 0, end = -1; | 399 | unsigned int start = 0, end = -1; |
400 | bool force = (cpc->reason == CP_DISCARD); | ||
400 | int i; | 401 | int i; |
401 | 402 | ||
402 | if (!test_opt(sbi, DISCARD)) | 403 | if (!force && !test_opt(sbi, DISCARD)) |
403 | return; | 404 | return; |
404 | 405 | ||
406 | if (force && !se->valid_blocks) { | ||
407 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | ||
408 | /* | ||
409 | * if this segment is registered in the prefree list, then | ||
410 | * we should skip adding a discard candidate, and let the | ||
411 | * checkpoint do that later. | ||
412 | */ | ||
413 | mutex_lock(&dirty_i->seglist_lock); | ||
414 | if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) { | ||
415 | mutex_unlock(&dirty_i->seglist_lock); | ||
416 | cpc->trimmed += sbi->blocks_per_seg; | ||
417 | return; | ||
418 | } | ||
419 | mutex_unlock(&dirty_i->seglist_lock); | ||
420 | |||
421 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); | ||
422 | INIT_LIST_HEAD(&new->list); | ||
423 | new->blkaddr = START_BLOCK(sbi, cpc->trim_start); | ||
424 | new->len = sbi->blocks_per_seg; | ||
425 | list_add_tail(&new->list, head); | ||
426 | SM_I(sbi)->nr_discards += sbi->blocks_per_seg; | ||
427 | cpc->trimmed += sbi->blocks_per_seg; | ||
428 | return; | ||
429 | } | ||
430 | |||
405 | /* zero block will be discarded through the prefree list */ | 431 | /* zero block will be discarded through the prefree list */ |
406 | if (!se->valid_blocks || se->valid_blocks == max_blocks) | 432 | if (!se->valid_blocks || se->valid_blocks == max_blocks) |
407 | return; | 433 | return; |
@@ -410,23 +436,39 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, | |||
410 | for (i = 0; i < entries; i++) | 436 | for (i = 0; i < entries; i++) |
411 | dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; | 437 | dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; |
412 | 438 | ||
413 | while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { | 439 | while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { |
414 | start = __find_rev_next_bit(dmap, max_blocks, end + 1); | 440 | start = __find_rev_next_bit(dmap, max_blocks, end + 1); |
415 | if (start >= max_blocks) | 441 | if (start >= max_blocks) |
416 | break; | 442 | break; |
417 | 443 | ||
418 | end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); | 444 | end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); |
419 | 445 | ||
446 | if (end - start < cpc->trim_minlen) | ||
447 | continue; | ||
448 | |||
420 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); | 449 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); |
421 | INIT_LIST_HEAD(&new->list); | 450 | INIT_LIST_HEAD(&new->list); |
422 | new->blkaddr = START_BLOCK(sbi, segno) + start; | 451 | new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; |
423 | new->len = end - start; | 452 | new->len = end - start; |
453 | cpc->trimmed += end - start; | ||
424 | 454 | ||
425 | list_add_tail(&new->list, head); | 455 | list_add_tail(&new->list, head); |
426 | SM_I(sbi)->nr_discards += end - start; | 456 | SM_I(sbi)->nr_discards += end - start; |
427 | } | 457 | } |
428 | } | 458 | } |
429 | 459 | ||
460 | void release_discard_addrs(struct f2fs_sb_info *sbi) | ||
461 | { | ||
462 | struct list_head *head = &(SM_I(sbi)->discard_list); | ||
463 | struct discard_entry *entry, *this; | ||
464 | |||
465 | /* drop caches */ | ||
466 | list_for_each_entry_safe(entry, this, head, list) { | ||
467 | list_del(&entry->list); | ||
468 | kmem_cache_free(discard_entry_slab, entry); | ||
469 | } | ||
470 | } | ||
471 | |||
430 | /* | 472 | /* |
431 | * Should call clear_prefree_segments after checkpoint is done. | 473 | * Should call clear_prefree_segments after checkpoint is done. |
432 | */ | 474 | */ |
@@ -897,6 +939,41 @@ static const struct segment_allocation default_salloc_ops = { | |||
897 | .allocate_segment = allocate_segment_by_default, | 939 | .allocate_segment = allocate_segment_by_default, |
898 | }; | 940 | }; |
899 | 941 | ||
942 | int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) | ||
943 | { | ||
944 | block_t start_addr = SM_I(sbi)->main_blkaddr; | ||
945 | __u64 start = range->start >> sbi->log_blocksize; | ||
946 | __u64 end = start + (range->len >> sbi->log_blocksize) - 1; | ||
947 | __u64 segment = 1 << (sbi->log_blocksize + sbi->log_blocks_per_seg); | ||
948 | unsigned int start_segno, end_segno; | ||
949 | struct cp_control cpc; | ||
950 | |||
951 | if (range->minlen > segment || | ||
952 | start >= SM_I(sbi)->seg0_blkaddr + TOTAL_BLKS(sbi) || | ||
953 | range->len < sbi->blocksize) | ||
954 | return -EINVAL; | ||
955 | |||
956 | if (end <= start_addr) | ||
957 | goto out; | ||
958 | |||
959 | /* start/end segment number in main_area */ | ||
960 | start_segno = (start <= start_addr) ? 0 : GET_SEGNO(sbi, start); | ||
961 | end_segno = (end >= SM_I(sbi)->seg0_blkaddr + TOTAL_BLKS(sbi)) ? | ||
962 | TOTAL_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); | ||
963 | |||
964 | cpc.reason = CP_DISCARD; | ||
965 | cpc.trim_start = start_segno; | ||
966 | cpc.trim_end = end_segno; | ||
967 | cpc.trim_minlen = range->minlen >> sbi->log_blocksize; | ||
968 | cpc.trimmed = 0; | ||
969 | |||
970 | /* do checkpoint to issue discard commands safely */ | ||
971 | write_checkpoint(sbi, &cpc); | ||
972 | out: | ||
973 | range->len = cpc.trimmed << sbi->log_blocksize; | ||
974 | return 0; | ||
975 | } | ||
976 | |||
900 | static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) | 977 | static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) |
901 | { | 978 | { |
902 | struct curseg_info *curseg = CURSEG_I(sbi, type); | 979 | struct curseg_info *curseg = CURSEG_I(sbi, type); |
@@ -1524,7 +1601,7 @@ static void remove_sits_in_journal(struct f2fs_sb_info *sbi) | |||
1524 | * CP calls this function, which flushes SIT entries including sit_journal, | 1601 | * CP calls this function, which flushes SIT entries including sit_journal, |
1525 | * and moves prefree segs to free segs. | 1602 | * and moves prefree segs to free segs. |
1526 | */ | 1603 | */ |
1527 | void flush_sit_entries(struct f2fs_sb_info *sbi) | 1604 | void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
1528 | { | 1605 | { |
1529 | struct sit_info *sit_i = SIT_I(sbi); | 1606 | struct sit_info *sit_i = SIT_I(sbi); |
1530 | unsigned long *bitmap = sit_i->dirty_sentries_bitmap; | 1607 | unsigned long *bitmap = sit_i->dirty_sentries_bitmap; |
@@ -1534,6 +1611,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi) | |||
1534 | struct list_head *head = &SM_I(sbi)->sit_entry_set; | 1611 | struct list_head *head = &SM_I(sbi)->sit_entry_set; |
1535 | unsigned long nsegs = TOTAL_SEGS(sbi); | 1612 | unsigned long nsegs = TOTAL_SEGS(sbi); |
1536 | bool to_journal = true; | 1613 | bool to_journal = true; |
1614 | struct seg_entry *se; | ||
1537 | 1615 | ||
1538 | mutex_lock(&curseg->curseg_mutex); | 1616 | mutex_lock(&curseg->curseg_mutex); |
1539 | mutex_lock(&sit_i->sentry_lock); | 1617 | mutex_lock(&sit_i->sentry_lock); |
@@ -1580,11 +1658,14 @@ void flush_sit_entries(struct f2fs_sb_info *sbi) | |||
1580 | /* flush dirty sit entries in region of current sit set */ | 1658 | /* flush dirty sit entries in region of current sit set */ |
1581 | for_each_set_bit_from(segno, bitmap, end) { | 1659 | for_each_set_bit_from(segno, bitmap, end) { |
1582 | int offset, sit_offset; | 1660 | int offset, sit_offset; |
1583 | struct seg_entry *se = get_seg_entry(sbi, segno); | 1661 | |
1662 | se = get_seg_entry(sbi, segno); | ||
1584 | 1663 | ||
1585 | /* add discard candidates */ | 1664 | /* add discard candidates */ |
1586 | if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) | 1665 | if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { |
1587 | add_discard_addrs(sbi, segno, se); | 1666 | cpc->trim_start = segno; |
1667 | add_discard_addrs(sbi, cpc); | ||
1668 | } | ||
1588 | 1669 | ||
1589 | if (to_journal) { | 1670 | if (to_journal) { |
1590 | offset = lookup_journal_in_cursum(sum, | 1671 | offset = lookup_journal_in_cursum(sum, |
@@ -1614,8 +1695,11 @@ void flush_sit_entries(struct f2fs_sb_info *sbi) | |||
1614 | 1695 | ||
1615 | f2fs_bug_on(sbi, !list_empty(head)); | 1696 | f2fs_bug_on(sbi, !list_empty(head)); |
1616 | f2fs_bug_on(sbi, sit_i->dirty_sentries); | 1697 | f2fs_bug_on(sbi, sit_i->dirty_sentries); |
1617 | |||
1618 | out: | 1698 | out: |
1699 | if (cpc->reason == CP_DISCARD) { | ||
1700 | for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) | ||
1701 | add_discard_addrs(sbi, cpc); | ||
1702 | } | ||
1619 | mutex_unlock(&sit_i->sentry_lock); | 1703 | mutex_unlock(&sit_i->sentry_lock); |
1620 | mutex_unlock(&curseg->curseg_mutex); | 1704 | mutex_unlock(&curseg->curseg_mutex); |
1621 | 1705 | ||
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 128c42000fa3..bb6b568d6ad4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -446,6 +446,7 @@ static void f2fs_put_super(struct super_block *sb) | |||
446 | * In addition, EIO will skip do checkpoint, we need this as well. | 446 | * In addition, EIO will skip do checkpoint, we need this as well. |
447 | */ | 447 | */ |
448 | release_dirty_inode(sbi); | 448 | release_dirty_inode(sbi); |
449 | release_discard_addrs(sbi); | ||
449 | 450 | ||
450 | iput(sbi->node_inode); | 451 | iput(sbi->node_inode); |
451 | iput(sbi->meta_inode); | 452 | iput(sbi->meta_inode); |
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 66eaace9c07e..bbc4de9baef7 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h | |||
@@ -72,7 +72,8 @@ | |||
72 | #define show_cpreason(type) \ | 72 | #define show_cpreason(type) \ |
73 | __print_symbolic(type, \ | 73 | __print_symbolic(type, \ |
74 | { CP_UMOUNT, "Umount" }, \ | 74 | { CP_UMOUNT, "Umount" }, \ |
75 | { CP_SYNC, "Sync" }) | 75 | { CP_SYNC, "Sync" }, \ |
76 | { CP_DISCARD, "Discard" }) | ||
76 | 77 | ||
77 | struct victim_sel_policy; | 78 | struct victim_sel_policy; |
78 | 79 | ||