diff options
| author | Kent Overstreet <kmo@daterainc.com> | 2013-09-11 01:53:34 -0400 |
|---|---|---|
| committer | Kent Overstreet <kmo@daterainc.com> | 2014-01-08 16:05:12 -0500 |
| commit | 67539e85289c14a76a1c4162613d14a5f05a0027 (patch) | |
| tree | 7650b78775bf7b9f2b92113606d92a4a838a6753 | |
| parent | 911c9610099f26e9e6ea3d1962ce24f53890b163 (diff) | |
bcache: Add struct bset_sort_state
More disentangling bset.c from the rest of the bcache code - soon, the
sorting routines won't have any dependencies on any outside structs.
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
| -rw-r--r-- | drivers/md/bcache/bcache.h | 5 | ||||
| -rw-r--r-- | drivers/md/bcache/bset.c | 68 | ||||
| -rw-r--r-- | drivers/md/bcache/bset.h | 38 | ||||
| -rw-r--r-- | drivers/md/bcache/btree.c | 14 | ||||
| -rw-r--r-- | drivers/md/bcache/super.c | 9 | ||||
| -rw-r--r-- | drivers/md/bcache/sysfs.c | 2 |
6 files changed, 87 insertions, 49 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 2b46c86ac440..7bd4c93475e7 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
| @@ -187,6 +187,7 @@ | |||
| 187 | #include <linux/types.h> | 187 | #include <linux/types.h> |
| 188 | #include <linux/workqueue.h> | 188 | #include <linux/workqueue.h> |
| 189 | 189 | ||
| 190 | #include "bset.h" | ||
| 190 | #include "util.h" | 191 | #include "util.h" |
| 191 | #include "closure.h" | 192 | #include "closure.h" |
| 192 | 193 | ||
| @@ -645,8 +646,7 @@ struct cache_set { | |||
| 645 | */ | 646 | */ |
| 646 | mempool_t *fill_iter; | 647 | mempool_t *fill_iter; |
| 647 | 648 | ||
| 648 | mempool_t *sort_pool; | 649 | struct bset_sort_state sort; |
| 649 | unsigned sort_crit_factor; | ||
| 650 | 650 | ||
| 651 | /* List of buckets we're currently writing data to */ | 651 | /* List of buckets we're currently writing data to */ |
| 652 | struct list_head data_buckets; | 652 | struct list_head data_buckets; |
| @@ -662,7 +662,6 @@ struct cache_set { | |||
| 662 | unsigned congested_read_threshold_us; | 662 | unsigned congested_read_threshold_us; |
| 663 | unsigned congested_write_threshold_us; | 663 | unsigned congested_write_threshold_us; |
| 664 | 664 | ||
| 665 | struct time_stats sort_time; | ||
| 666 | struct time_stats btree_gc_time; | 665 | struct time_stats btree_gc_time; |
| 667 | struct time_stats btree_split_time; | 666 | struct time_stats btree_split_time; |
| 668 | struct time_stats btree_read_time; | 667 | struct time_stats btree_read_time; |
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 9e3a53d87de0..9d9c2edda760 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c | |||
| @@ -952,6 +952,26 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, | |||
| 952 | 952 | ||
| 953 | /* Mergesort */ | 953 | /* Mergesort */ |
| 954 | 954 | ||
| 955 | void bch_bset_sort_state_free(struct bset_sort_state *state) | ||
| 956 | { | ||
| 957 | if (state->pool) | ||
| 958 | mempool_destroy(state->pool); | ||
| 959 | } | ||
| 960 | |||
| 961 | int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order) | ||
| 962 | { | ||
| 963 | spin_lock_init(&state->time.lock); | ||
| 964 | |||
| 965 | state->page_order = page_order; | ||
| 966 | state->crit_factor = int_sqrt(1 << page_order); | ||
| 967 | |||
| 968 | state->pool = mempool_create_page_pool(1, page_order); | ||
| 969 | if (!state->pool) | ||
| 970 | return -ENOMEM; | ||
| 971 | |||
| 972 | return 0; | ||
| 973 | } | ||
| 974 | |||
| 955 | static void sort_key_next(struct btree_iter *iter, | 975 | static void sort_key_next(struct btree_iter *iter, |
| 956 | struct btree_iter_set *i) | 976 | struct btree_iter_set *i) |
| 957 | { | 977 | { |
| @@ -1077,22 +1097,24 @@ static void btree_mergesort(struct btree *b, struct bset *out, | |||
| 1077 | } | 1097 | } |
| 1078 | 1098 | ||
| 1079 | static void __btree_sort(struct btree *b, struct btree_iter *iter, | 1099 | static void __btree_sort(struct btree *b, struct btree_iter *iter, |
| 1080 | unsigned start, unsigned order, bool fixup) | 1100 | unsigned start, unsigned order, bool fixup, |
| 1101 | struct bset_sort_state *state) | ||
| 1081 | { | 1102 | { |
| 1082 | uint64_t start_time; | 1103 | uint64_t start_time; |
| 1083 | bool remove_stale = !b->written; | ||
| 1084 | bool used_mempool = false; | 1104 | bool used_mempool = false; |
| 1085 | struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, | 1105 | struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, |
| 1086 | order); | 1106 | order); |
| 1087 | if (!out) { | 1107 | if (!out) { |
| 1088 | out = page_address(mempool_alloc(b->c->sort_pool, GFP_NOIO)); | 1108 | BUG_ON(order > state->page_order); |
| 1109 | |||
| 1110 | out = page_address(mempool_alloc(state->pool, GFP_NOIO)); | ||
| 1089 | used_mempool = true; | 1111 | used_mempool = true; |
| 1090 | order = ilog2(bucket_pages(b->c)); | 1112 | order = ilog2(bucket_pages(b->c)); |
| 1091 | } | 1113 | } |
| 1092 | 1114 | ||
| 1093 | start_time = local_clock(); | 1115 | start_time = local_clock(); |
| 1094 | 1116 | ||
| 1095 | btree_mergesort(b, out, iter, fixup, remove_stale); | 1117 | btree_mergesort(b, out, iter, fixup, false); |
| 1096 | b->nsets = start; | 1118 | b->nsets = start; |
| 1097 | 1119 | ||
| 1098 | if (!start && order == b->page_order) { | 1120 | if (!start && order == b->page_order) { |
| @@ -1113,18 +1135,18 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, | |||
| 1113 | } | 1135 | } |
| 1114 | 1136 | ||
| 1115 | if (used_mempool) | 1137 | if (used_mempool) |
| 1116 | mempool_free(virt_to_page(out), b->c->sort_pool); | 1138 | mempool_free(virt_to_page(out), state->pool); |
| 1117 | else | 1139 | else |
| 1118 | free_pages((unsigned long) out, order); | 1140 | free_pages((unsigned long) out, order); |
| 1119 | 1141 | ||
| 1120 | if (b->written) | 1142 | bset_build_written_tree(b); |
| 1121 | bset_build_written_tree(b); | ||
| 1122 | 1143 | ||
| 1123 | if (!start) | 1144 | if (!start) |
| 1124 | bch_time_stats_update(&b->c->sort_time, start_time); | 1145 | bch_time_stats_update(&state->time, start_time); |
| 1125 | } | 1146 | } |
| 1126 | 1147 | ||
| 1127 | void bch_btree_sort_partial(struct btree *b, unsigned start) | 1148 | void bch_btree_sort_partial(struct btree *b, unsigned start, |
| 1149 | struct bset_sort_state *state) | ||
| 1128 | { | 1150 | { |
| 1129 | size_t order = b->page_order, keys = 0; | 1151 | size_t order = b->page_order, keys = 0; |
| 1130 | struct btree_iter iter; | 1152 | struct btree_iter iter; |
| @@ -1148,18 +1170,19 @@ void bch_btree_sort_partial(struct btree *b, unsigned start) | |||
| 1148 | order = ilog2(order); | 1170 | order = ilog2(order); |
| 1149 | } | 1171 | } |
| 1150 | 1172 | ||
| 1151 | __btree_sort(b, &iter, start, order, false); | 1173 | __btree_sort(b, &iter, start, order, false, state); |
| 1152 | 1174 | ||
| 1153 | EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); | 1175 | EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); |
| 1154 | } | 1176 | } |
| 1155 | 1177 | ||
| 1156 | void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) | 1178 | void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter, |
| 1179 | struct bset_sort_state *state) | ||
| 1157 | { | 1180 | { |
| 1158 | BUG_ON(!b->written); | 1181 | __btree_sort(b, iter, 0, b->page_order, true, state); |
| 1159 | __btree_sort(b, iter, 0, b->page_order, true); | ||
| 1160 | } | 1182 | } |
| 1161 | 1183 | ||
| 1162 | void bch_btree_sort_into(struct btree *b, struct btree *new) | 1184 | void bch_btree_sort_into(struct btree *b, struct btree *new, |
| 1185 | struct bset_sort_state *state) | ||
| 1163 | { | 1186 | { |
| 1164 | uint64_t start_time = local_clock(); | 1187 | uint64_t start_time = local_clock(); |
| 1165 | 1188 | ||
| @@ -1168,15 +1191,14 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) | |||
| 1168 | 1191 | ||
| 1169 | btree_mergesort(b, new->sets->data, &iter, false, true); | 1192 | btree_mergesort(b, new->sets->data, &iter, false, true); |
| 1170 | 1193 | ||
| 1171 | bch_time_stats_update(&b->c->sort_time, start_time); | 1194 | bch_time_stats_update(&state->time, start_time); |
| 1172 | 1195 | ||
| 1173 | bkey_copy_key(&new->key, &b->key); | ||
| 1174 | new->sets->size = 0; | 1196 | new->sets->size = 0; |
| 1175 | } | 1197 | } |
| 1176 | 1198 | ||
| 1177 | #define SORT_CRIT (4096 / sizeof(uint64_t)) | 1199 | #define SORT_CRIT (4096 / sizeof(uint64_t)) |
| 1178 | 1200 | ||
| 1179 | void bch_btree_sort_lazy(struct btree *b) | 1201 | void bch_btree_sort_lazy(struct btree *b, struct bset_sort_state *state) |
| 1180 | { | 1202 | { |
| 1181 | unsigned crit = SORT_CRIT; | 1203 | unsigned crit = SORT_CRIT; |
| 1182 | int i; | 1204 | int i; |
| @@ -1185,24 +1207,18 @@ void bch_btree_sort_lazy(struct btree *b) | |||
| 1185 | if (!b->nsets) | 1207 | if (!b->nsets) |
| 1186 | goto out; | 1208 | goto out; |
| 1187 | 1209 | ||
| 1188 | /* If not a leaf node, always sort */ | ||
| 1189 | if (b->level) { | ||
| 1190 | bch_btree_sort(b); | ||
| 1191 | return; | ||
| 1192 | } | ||
| 1193 | |||
| 1194 | for (i = b->nsets - 1; i >= 0; --i) { | 1210 | for (i = b->nsets - 1; i >= 0; --i) { |
| 1195 | crit *= b->c->sort_crit_factor; | 1211 | crit *= state->crit_factor; |
| 1196 | 1212 | ||
| 1197 | if (b->sets[i].data->keys < crit) { | 1213 | if (b->sets[i].data->keys < crit) { |
| 1198 | bch_btree_sort_partial(b, i); | 1214 | bch_btree_sort_partial(b, i, state); |
| 1199 | return; | 1215 | return; |
| 1200 | } | 1216 | } |
| 1201 | } | 1217 | } |
| 1202 | 1218 | ||
| 1203 | /* Sort if we'd overflow */ | 1219 | /* Sort if we'd overflow */ |
| 1204 | if (b->nsets + 1 == MAX_BSETS) { | 1220 | if (b->nsets + 1 == MAX_BSETS) { |
| 1205 | bch_btree_sort(b); | 1221 | bch_btree_sort(b, state); |
| 1206 | return; | 1222 | return; |
| 1207 | } | 1223 | } |
| 1208 | 1224 | ||
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 91bcbdb04085..4f60c21c7a38 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| 5 | 5 | ||
| 6 | #include "util.h" /* for time_stats */ | ||
| 7 | |||
| 6 | /* | 8 | /* |
| 7 | * BKEYS: | 9 | * BKEYS: |
| 8 | * | 10 | * |
| @@ -190,6 +192,33 @@ struct bset_tree { | |||
| 190 | struct bset *data; | 192 | struct bset *data; |
| 191 | }; | 193 | }; |
| 192 | 194 | ||
| 195 | /* Sorting */ | ||
| 196 | |||
| 197 | struct bset_sort_state { | ||
| 198 | mempool_t *pool; | ||
| 199 | |||
| 200 | unsigned page_order; | ||
| 201 | unsigned crit_factor; | ||
| 202 | |||
| 203 | struct time_stats time; | ||
| 204 | }; | ||
| 205 | |||
| 206 | void bch_bset_sort_state_free(struct bset_sort_state *); | ||
| 207 | int bch_bset_sort_state_init(struct bset_sort_state *, unsigned); | ||
| 208 | void bch_btree_sort_lazy(struct btree *, struct bset_sort_state *); | ||
| 209 | void bch_btree_sort_into(struct btree *, struct btree *, | ||
| 210 | struct bset_sort_state *); | ||
| 211 | void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *, | ||
| 212 | struct bset_sort_state *); | ||
| 213 | void bch_btree_sort_partial(struct btree *, unsigned, | ||
| 214 | struct bset_sort_state *); | ||
| 215 | |||
| 216 | static inline void bch_btree_sort(struct btree *b, | ||
| 217 | struct bset_sort_state *state) | ||
| 218 | { | ||
| 219 | bch_btree_sort_partial(b, 0, state); | ||
| 220 | } | ||
| 221 | |||
| 193 | /* Keylists */ | 222 | /* Keylists */ |
| 194 | 223 | ||
| 195 | struct keylist { | 224 | struct keylist { |
| @@ -374,15 +403,6 @@ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, | |||
| 374 | }) | 403 | }) |
| 375 | 404 | ||
| 376 | bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); | 405 | bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); |
| 377 | void bch_btree_sort_lazy(struct btree *); | ||
| 378 | void bch_btree_sort_into(struct btree *, struct btree *); | ||
| 379 | void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *); | ||
| 380 | void bch_btree_sort_partial(struct btree *, unsigned); | ||
| 381 | |||
| 382 | static inline void bch_btree_sort(struct btree *b) | ||
| 383 | { | ||
| 384 | bch_btree_sort_partial(b, 0); | ||
| 385 | } | ||
| 386 | 406 | ||
| 387 | int bch_bset_print_stats(struct cache_set *, char *); | 407 | int bch_bset_print_stats(struct cache_set *, char *); |
| 388 | 408 | ||
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e1e36e761724..78ba0b67ac16 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
| @@ -263,7 +263,7 @@ void bch_btree_node_read_done(struct btree *b) | |||
| 263 | if (i->seq == b->sets[0].data->seq) | 263 | if (i->seq == b->sets[0].data->seq) |
| 264 | goto err; | 264 | goto err; |
| 265 | 265 | ||
| 266 | bch_btree_sort_and_fix_extents(b, iter); | 266 | bch_btree_sort_and_fix_extents(b, iter, &b->c->sort); |
| 267 | 267 | ||
| 268 | i = b->sets[0].data; | 268 | i = b->sets[0].data; |
| 269 | err = "short btree key"; | 269 | err = "short btree key"; |
| @@ -476,7 +476,11 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) | |||
| 476 | atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, | 476 | atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, |
| 477 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); | 477 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); |
| 478 | 478 | ||
| 479 | bch_btree_sort_lazy(b); | 479 | /* If not a leaf node, always sort */ |
| 480 | if (b->level && b->nsets) | ||
| 481 | bch_btree_sort(b, &b->c->sort); | ||
| 482 | else | ||
| 483 | bch_btree_sort_lazy(b, &b->c->sort); | ||
| 480 | 484 | ||
| 481 | /* | 485 | /* |
| 482 | * do verify if there was more than one set initially (i.e. we did a | 486 | * do verify if there was more than one set initially (i.e. we did a |
| @@ -1125,8 +1129,10 @@ err: | |||
| 1125 | static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) | 1129 | static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) |
| 1126 | { | 1130 | { |
| 1127 | struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); | 1131 | struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); |
| 1128 | if (!IS_ERR_OR_NULL(n)) | 1132 | if (!IS_ERR_OR_NULL(n)) { |
| 1129 | bch_btree_sort_into(b, n); | 1133 | bch_btree_sort_into(b, n, &b->c->sort); |
| 1134 | bkey_copy_key(&n->key, &b->key); | ||
| 1135 | } | ||
| 1130 | 1136 | ||
| 1131 | return n; | 1137 | return n; |
| 1132 | } | 1138 | } |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d05e75627714..1fc8165a5c01 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
| @@ -1351,6 +1351,7 @@ static void cache_set_free(struct closure *cl) | |||
| 1351 | if (ca) | 1351 | if (ca) |
| 1352 | kobject_put(&ca->kobj); | 1352 | kobject_put(&ca->kobj); |
| 1353 | 1353 | ||
| 1354 | bch_bset_sort_state_free(&c->sort); | ||
| 1354 | free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); | 1355 | free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); |
| 1355 | 1356 | ||
| 1356 | if (c->bio_split) | 1357 | if (c->bio_split) |
| @@ -1481,15 +1482,12 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) | |||
| 1481 | c->btree_pages = max_t(int, c->btree_pages / 4, | 1482 | c->btree_pages = max_t(int, c->btree_pages / 4, |
| 1482 | BTREE_MAX_PAGES); | 1483 | BTREE_MAX_PAGES); |
| 1483 | 1484 | ||
| 1484 | c->sort_crit_factor = int_sqrt(c->btree_pages); | ||
| 1485 | |||
| 1486 | sema_init(&c->sb_write_mutex, 1); | 1485 | sema_init(&c->sb_write_mutex, 1); |
| 1487 | mutex_init(&c->bucket_lock); | 1486 | mutex_init(&c->bucket_lock); |
| 1488 | init_waitqueue_head(&c->try_wait); | 1487 | init_waitqueue_head(&c->try_wait); |
| 1489 | init_waitqueue_head(&c->bucket_wait); | 1488 | init_waitqueue_head(&c->bucket_wait); |
| 1490 | sema_init(&c->uuid_write_mutex, 1); | 1489 | sema_init(&c->uuid_write_mutex, 1); |
| 1491 | 1490 | ||
| 1492 | spin_lock_init(&c->sort_time.lock); | ||
| 1493 | spin_lock_init(&c->btree_gc_time.lock); | 1491 | spin_lock_init(&c->btree_gc_time.lock); |
| 1494 | spin_lock_init(&c->btree_split_time.lock); | 1492 | spin_lock_init(&c->btree_split_time.lock); |
| 1495 | spin_lock_init(&c->btree_read_time.lock); | 1493 | spin_lock_init(&c->btree_read_time.lock); |
| @@ -1517,12 +1515,11 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) | |||
| 1517 | bucket_pages(c))) || | 1515 | bucket_pages(c))) || |
| 1518 | !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || | 1516 | !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || |
| 1519 | !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || | 1517 | !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || |
| 1520 | !(c->sort_pool = mempool_create_page_pool(1, | ||
| 1521 | ilog2(bucket_pages(c)))) || | ||
| 1522 | !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || | 1518 | !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || |
| 1523 | bch_journal_alloc(c) || | 1519 | bch_journal_alloc(c) || |
| 1524 | bch_btree_cache_alloc(c) || | 1520 | bch_btree_cache_alloc(c) || |
| 1525 | bch_open_buckets_alloc(c)) | 1521 | bch_open_buckets_alloc(c) || |
| 1522 | bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages))) | ||
| 1526 | goto err; | 1523 | goto err; |
| 1527 | 1524 | ||
| 1528 | c->congested_read_threshold_us = 2000; | 1525 | c->congested_read_threshold_us = 2000; |
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index d5dd282b176f..206c80fb27c1 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
| @@ -490,7 +490,7 @@ lock_root: | |||
| 490 | 490 | ||
| 491 | sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); | 491 | sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); |
| 492 | sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); | 492 | sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); |
| 493 | sysfs_print_time_stats(&c->sort_time, btree_sort, ms, us); | 493 | sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us); |
| 494 | sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); | 494 | sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); |
| 495 | sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); | 495 | sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); |
| 496 | 496 | ||
