diff options
author | Kent Overstreet <kmo@daterainc.com> | 2013-09-11 01:53:34 -0400 |
---|---|---|
committer | Kent Overstreet <kmo@daterainc.com> | 2014-01-08 16:05:12 -0500 |
commit | 67539e85289c14a76a1c4162613d14a5f05a0027 (patch) | |
tree | 7650b78775bf7b9f2b92113606d92a4a838a6753 /drivers/md | |
parent | 911c9610099f26e9e6ea3d1962ce24f53890b163 (diff) |
bcache: Add struct bset_sort_state
More disentangling bset.c from the rest of the bcache code - soon, the
sorting routines won't have any dependencies on any outside structs.
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/bcache.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 68 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 38 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 14 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 9 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 2 |
6 files changed, 87 insertions, 49 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 2b46c86ac440..7bd4c93475e7 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -187,6 +187,7 @@ | |||
187 | #include <linux/types.h> | 187 | #include <linux/types.h> |
188 | #include <linux/workqueue.h> | 188 | #include <linux/workqueue.h> |
189 | 189 | ||
190 | #include "bset.h" | ||
190 | #include "util.h" | 191 | #include "util.h" |
191 | #include "closure.h" | 192 | #include "closure.h" |
192 | 193 | ||
@@ -645,8 +646,7 @@ struct cache_set { | |||
645 | */ | 646 | */ |
646 | mempool_t *fill_iter; | 647 | mempool_t *fill_iter; |
647 | 648 | ||
648 | mempool_t *sort_pool; | 649 | struct bset_sort_state sort; |
649 | unsigned sort_crit_factor; | ||
650 | 650 | ||
651 | /* List of buckets we're currently writing data to */ | 651 | /* List of buckets we're currently writing data to */ |
652 | struct list_head data_buckets; | 652 | struct list_head data_buckets; |
@@ -662,7 +662,6 @@ struct cache_set { | |||
662 | unsigned congested_read_threshold_us; | 662 | unsigned congested_read_threshold_us; |
663 | unsigned congested_write_threshold_us; | 663 | unsigned congested_write_threshold_us; |
664 | 664 | ||
665 | struct time_stats sort_time; | ||
666 | struct time_stats btree_gc_time; | 665 | struct time_stats btree_gc_time; |
667 | struct time_stats btree_split_time; | 666 | struct time_stats btree_split_time; |
668 | struct time_stats btree_read_time; | 667 | struct time_stats btree_read_time; |
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 9e3a53d87de0..9d9c2edda760 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c | |||
@@ -952,6 +952,26 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, | |||
952 | 952 | ||
953 | /* Mergesort */ | 953 | /* Mergesort */ |
954 | 954 | ||
955 | void bch_bset_sort_state_free(struct bset_sort_state *state) | ||
956 | { | ||
957 | if (state->pool) | ||
958 | mempool_destroy(state->pool); | ||
959 | } | ||
960 | |||
961 | int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order) | ||
962 | { | ||
963 | spin_lock_init(&state->time.lock); | ||
964 | |||
965 | state->page_order = page_order; | ||
966 | state->crit_factor = int_sqrt(1 << page_order); | ||
967 | |||
968 | state->pool = mempool_create_page_pool(1, page_order); | ||
969 | if (!state->pool) | ||
970 | return -ENOMEM; | ||
971 | |||
972 | return 0; | ||
973 | } | ||
974 | |||
955 | static void sort_key_next(struct btree_iter *iter, | 975 | static void sort_key_next(struct btree_iter *iter, |
956 | struct btree_iter_set *i) | 976 | struct btree_iter_set *i) |
957 | { | 977 | { |
@@ -1077,22 +1097,24 @@ static void btree_mergesort(struct btree *b, struct bset *out, | |||
1077 | } | 1097 | } |
1078 | 1098 | ||
1079 | static void __btree_sort(struct btree *b, struct btree_iter *iter, | 1099 | static void __btree_sort(struct btree *b, struct btree_iter *iter, |
1080 | unsigned start, unsigned order, bool fixup) | 1100 | unsigned start, unsigned order, bool fixup, |
1101 | struct bset_sort_state *state) | ||
1081 | { | 1102 | { |
1082 | uint64_t start_time; | 1103 | uint64_t start_time; |
1083 | bool remove_stale = !b->written; | ||
1084 | bool used_mempool = false; | 1104 | bool used_mempool = false; |
1085 | struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, | 1105 | struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, |
1086 | order); | 1106 | order); |
1087 | if (!out) { | 1107 | if (!out) { |
1088 | out = page_address(mempool_alloc(b->c->sort_pool, GFP_NOIO)); | 1108 | BUG_ON(order > state->page_order); |
1109 | |||
1110 | out = page_address(mempool_alloc(state->pool, GFP_NOIO)); | ||
1089 | used_mempool = true; | 1111 | used_mempool = true; |
1090 | order = ilog2(bucket_pages(b->c)); | 1112 | order = ilog2(bucket_pages(b->c)); |
1091 | } | 1113 | } |
1092 | 1114 | ||
1093 | start_time = local_clock(); | 1115 | start_time = local_clock(); |
1094 | 1116 | ||
1095 | btree_mergesort(b, out, iter, fixup, remove_stale); | 1117 | btree_mergesort(b, out, iter, fixup, false); |
1096 | b->nsets = start; | 1118 | b->nsets = start; |
1097 | 1119 | ||
1098 | if (!start && order == b->page_order) { | 1120 | if (!start && order == b->page_order) { |
@@ -1113,18 +1135,18 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, | |||
1113 | } | 1135 | } |
1114 | 1136 | ||
1115 | if (used_mempool) | 1137 | if (used_mempool) |
1116 | mempool_free(virt_to_page(out), b->c->sort_pool); | 1138 | mempool_free(virt_to_page(out), state->pool); |
1117 | else | 1139 | else |
1118 | free_pages((unsigned long) out, order); | 1140 | free_pages((unsigned long) out, order); |
1119 | 1141 | ||
1120 | if (b->written) | 1142 | bset_build_written_tree(b); |
1121 | bset_build_written_tree(b); | ||
1122 | 1143 | ||
1123 | if (!start) | 1144 | if (!start) |
1124 | bch_time_stats_update(&b->c->sort_time, start_time); | 1145 | bch_time_stats_update(&state->time, start_time); |
1125 | } | 1146 | } |
1126 | 1147 | ||
1127 | void bch_btree_sort_partial(struct btree *b, unsigned start) | 1148 | void bch_btree_sort_partial(struct btree *b, unsigned start, |
1149 | struct bset_sort_state *state) | ||
1128 | { | 1150 | { |
1129 | size_t order = b->page_order, keys = 0; | 1151 | size_t order = b->page_order, keys = 0; |
1130 | struct btree_iter iter; | 1152 | struct btree_iter iter; |
@@ -1148,18 +1170,19 @@ void bch_btree_sort_partial(struct btree *b, unsigned start) | |||
1148 | order = ilog2(order); | 1170 | order = ilog2(order); |
1149 | } | 1171 | } |
1150 | 1172 | ||
1151 | __btree_sort(b, &iter, start, order, false); | 1173 | __btree_sort(b, &iter, start, order, false, state); |
1152 | 1174 | ||
1153 | EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); | 1175 | EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); |
1154 | } | 1176 | } |
1155 | 1177 | ||
1156 | void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) | 1178 | void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter, |
1179 | struct bset_sort_state *state) | ||
1157 | { | 1180 | { |
1158 | BUG_ON(!b->written); | 1181 | __btree_sort(b, iter, 0, b->page_order, true, state); |
1159 | __btree_sort(b, iter, 0, b->page_order, true); | ||
1160 | } | 1182 | } |
1161 | 1183 | ||
1162 | void bch_btree_sort_into(struct btree *b, struct btree *new) | 1184 | void bch_btree_sort_into(struct btree *b, struct btree *new, |
1185 | struct bset_sort_state *state) | ||
1163 | { | 1186 | { |
1164 | uint64_t start_time = local_clock(); | 1187 | uint64_t start_time = local_clock(); |
1165 | 1188 | ||
@@ -1168,15 +1191,14 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) | |||
1168 | 1191 | ||
1169 | btree_mergesort(b, new->sets->data, &iter, false, true); | 1192 | btree_mergesort(b, new->sets->data, &iter, false, true); |
1170 | 1193 | ||
1171 | bch_time_stats_update(&b->c->sort_time, start_time); | 1194 | bch_time_stats_update(&state->time, start_time); |
1172 | 1195 | ||
1173 | bkey_copy_key(&new->key, &b->key); | ||
1174 | new->sets->size = 0; | 1196 | new->sets->size = 0; |
1175 | } | 1197 | } |
1176 | 1198 | ||
1177 | #define SORT_CRIT (4096 / sizeof(uint64_t)) | 1199 | #define SORT_CRIT (4096 / sizeof(uint64_t)) |
1178 | 1200 | ||
1179 | void bch_btree_sort_lazy(struct btree *b) | 1201 | void bch_btree_sort_lazy(struct btree *b, struct bset_sort_state *state) |
1180 | { | 1202 | { |
1181 | unsigned crit = SORT_CRIT; | 1203 | unsigned crit = SORT_CRIT; |
1182 | int i; | 1204 | int i; |
@@ -1185,24 +1207,18 @@ void bch_btree_sort_lazy(struct btree *b) | |||
1185 | if (!b->nsets) | 1207 | if (!b->nsets) |
1186 | goto out; | 1208 | goto out; |
1187 | 1209 | ||
1188 | /* If not a leaf node, always sort */ | ||
1189 | if (b->level) { | ||
1190 | bch_btree_sort(b); | ||
1191 | return; | ||
1192 | } | ||
1193 | |||
1194 | for (i = b->nsets - 1; i >= 0; --i) { | 1210 | for (i = b->nsets - 1; i >= 0; --i) { |
1195 | crit *= b->c->sort_crit_factor; | 1211 | crit *= state->crit_factor; |
1196 | 1212 | ||
1197 | if (b->sets[i].data->keys < crit) { | 1213 | if (b->sets[i].data->keys < crit) { |
1198 | bch_btree_sort_partial(b, i); | 1214 | bch_btree_sort_partial(b, i, state); |
1199 | return; | 1215 | return; |
1200 | } | 1216 | } |
1201 | } | 1217 | } |
1202 | 1218 | ||
1203 | /* Sort if we'd overflow */ | 1219 | /* Sort if we'd overflow */ |
1204 | if (b->nsets + 1 == MAX_BSETS) { | 1220 | if (b->nsets + 1 == MAX_BSETS) { |
1205 | bch_btree_sort(b); | 1221 | bch_btree_sort(b, state); |
1206 | return; | 1222 | return; |
1207 | } | 1223 | } |
1208 | 1224 | ||
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 91bcbdb04085..4f60c21c7a38 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | 5 | ||
6 | #include "util.h" /* for time_stats */ | ||
7 | |||
6 | /* | 8 | /* |
7 | * BKEYS: | 9 | * BKEYS: |
8 | * | 10 | * |
@@ -190,6 +192,33 @@ struct bset_tree { | |||
190 | struct bset *data; | 192 | struct bset *data; |
191 | }; | 193 | }; |
192 | 194 | ||
195 | /* Sorting */ | ||
196 | |||
197 | struct bset_sort_state { | ||
198 | mempool_t *pool; | ||
199 | |||
200 | unsigned page_order; | ||
201 | unsigned crit_factor; | ||
202 | |||
203 | struct time_stats time; | ||
204 | }; | ||
205 | |||
206 | void bch_bset_sort_state_free(struct bset_sort_state *); | ||
207 | int bch_bset_sort_state_init(struct bset_sort_state *, unsigned); | ||
208 | void bch_btree_sort_lazy(struct btree *, struct bset_sort_state *); | ||
209 | void bch_btree_sort_into(struct btree *, struct btree *, | ||
210 | struct bset_sort_state *); | ||
211 | void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *, | ||
212 | struct bset_sort_state *); | ||
213 | void bch_btree_sort_partial(struct btree *, unsigned, | ||
214 | struct bset_sort_state *); | ||
215 | |||
216 | static inline void bch_btree_sort(struct btree *b, | ||
217 | struct bset_sort_state *state) | ||
218 | { | ||
219 | bch_btree_sort_partial(b, 0, state); | ||
220 | } | ||
221 | |||
193 | /* Keylists */ | 222 | /* Keylists */ |
194 | 223 | ||
195 | struct keylist { | 224 | struct keylist { |
@@ -374,15 +403,6 @@ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, | |||
374 | }) | 403 | }) |
375 | 404 | ||
376 | bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); | 405 | bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); |
377 | void bch_btree_sort_lazy(struct btree *); | ||
378 | void bch_btree_sort_into(struct btree *, struct btree *); | ||
379 | void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *); | ||
380 | void bch_btree_sort_partial(struct btree *, unsigned); | ||
381 | |||
382 | static inline void bch_btree_sort(struct btree *b) | ||
383 | { | ||
384 | bch_btree_sort_partial(b, 0); | ||
385 | } | ||
386 | 406 | ||
387 | int bch_bset_print_stats(struct cache_set *, char *); | 407 | int bch_bset_print_stats(struct cache_set *, char *); |
388 | 408 | ||
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e1e36e761724..78ba0b67ac16 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -263,7 +263,7 @@ void bch_btree_node_read_done(struct btree *b) | |||
263 | if (i->seq == b->sets[0].data->seq) | 263 | if (i->seq == b->sets[0].data->seq) |
264 | goto err; | 264 | goto err; |
265 | 265 | ||
266 | bch_btree_sort_and_fix_extents(b, iter); | 266 | bch_btree_sort_and_fix_extents(b, iter, &b->c->sort); |
267 | 267 | ||
268 | i = b->sets[0].data; | 268 | i = b->sets[0].data; |
269 | err = "short btree key"; | 269 | err = "short btree key"; |
@@ -476,7 +476,11 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) | |||
476 | atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, | 476 | atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, |
477 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); | 477 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); |
478 | 478 | ||
479 | bch_btree_sort_lazy(b); | 479 | /* If not a leaf node, always sort */ |
480 | if (b->level && b->nsets) | ||
481 | bch_btree_sort(b, &b->c->sort); | ||
482 | else | ||
483 | bch_btree_sort_lazy(b, &b->c->sort); | ||
480 | 484 | ||
481 | /* | 485 | /* |
482 | * do verify if there was more than one set initially (i.e. we did a | 486 | * do verify if there was more than one set initially (i.e. we did a |
@@ -1125,8 +1129,10 @@ err: | |||
1125 | static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) | 1129 | static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) |
1126 | { | 1130 | { |
1127 | struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); | 1131 | struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); |
1128 | if (!IS_ERR_OR_NULL(n)) | 1132 | if (!IS_ERR_OR_NULL(n)) { |
1129 | bch_btree_sort_into(b, n); | 1133 | bch_btree_sort_into(b, n, &b->c->sort); |
1134 | bkey_copy_key(&n->key, &b->key); | ||
1135 | } | ||
1130 | 1136 | ||
1131 | return n; | 1137 | return n; |
1132 | } | 1138 | } |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d05e75627714..1fc8165a5c01 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -1351,6 +1351,7 @@ static void cache_set_free(struct closure *cl) | |||
1351 | if (ca) | 1351 | if (ca) |
1352 | kobject_put(&ca->kobj); | 1352 | kobject_put(&ca->kobj); |
1353 | 1353 | ||
1354 | bch_bset_sort_state_free(&c->sort); | ||
1354 | free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); | 1355 | free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); |
1355 | 1356 | ||
1356 | if (c->bio_split) | 1357 | if (c->bio_split) |
@@ -1481,15 +1482,12 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) | |||
1481 | c->btree_pages = max_t(int, c->btree_pages / 4, | 1482 | c->btree_pages = max_t(int, c->btree_pages / 4, |
1482 | BTREE_MAX_PAGES); | 1483 | BTREE_MAX_PAGES); |
1483 | 1484 | ||
1484 | c->sort_crit_factor = int_sqrt(c->btree_pages); | ||
1485 | |||
1486 | sema_init(&c->sb_write_mutex, 1); | 1485 | sema_init(&c->sb_write_mutex, 1); |
1487 | mutex_init(&c->bucket_lock); | 1486 | mutex_init(&c->bucket_lock); |
1488 | init_waitqueue_head(&c->try_wait); | 1487 | init_waitqueue_head(&c->try_wait); |
1489 | init_waitqueue_head(&c->bucket_wait); | 1488 | init_waitqueue_head(&c->bucket_wait); |
1490 | sema_init(&c->uuid_write_mutex, 1); | 1489 | sema_init(&c->uuid_write_mutex, 1); |
1491 | 1490 | ||
1492 | spin_lock_init(&c->sort_time.lock); | ||
1493 | spin_lock_init(&c->btree_gc_time.lock); | 1491 | spin_lock_init(&c->btree_gc_time.lock); |
1494 | spin_lock_init(&c->btree_split_time.lock); | 1492 | spin_lock_init(&c->btree_split_time.lock); |
1495 | spin_lock_init(&c->btree_read_time.lock); | 1493 | spin_lock_init(&c->btree_read_time.lock); |
@@ -1517,12 +1515,11 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) | |||
1517 | bucket_pages(c))) || | 1515 | bucket_pages(c))) || |
1518 | !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || | 1516 | !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || |
1519 | !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || | 1517 | !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || |
1520 | !(c->sort_pool = mempool_create_page_pool(1, | ||
1521 | ilog2(bucket_pages(c)))) || | ||
1522 | !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || | 1518 | !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || |
1523 | bch_journal_alloc(c) || | 1519 | bch_journal_alloc(c) || |
1524 | bch_btree_cache_alloc(c) || | 1520 | bch_btree_cache_alloc(c) || |
1525 | bch_open_buckets_alloc(c)) | 1521 | bch_open_buckets_alloc(c) || |
1522 | bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages))) | ||
1526 | goto err; | 1523 | goto err; |
1527 | 1524 | ||
1528 | c->congested_read_threshold_us = 2000; | 1525 | c->congested_read_threshold_us = 2000; |
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index d5dd282b176f..206c80fb27c1 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
@@ -490,7 +490,7 @@ lock_root: | |||
490 | 490 | ||
491 | sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); | 491 | sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); |
492 | sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); | 492 | sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); |
493 | sysfs_print_time_stats(&c->sort_time, btree_sort, ms, us); | 493 | sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us); |
494 | sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); | 494 | sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); |
495 | sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); | 495 | sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); |
496 | 496 | ||