aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2013-09-11 01:53:34 -0400
committerKent Overstreet <kmo@daterainc.com>2014-01-08 16:05:12 -0500
commit67539e85289c14a76a1c4162613d14a5f05a0027 (patch)
tree7650b78775bf7b9f2b92113606d92a4a838a6753 /drivers/md
parent911c9610099f26e9e6ea3d1962ce24f53890b163 (diff)
bcache: Add struct bset_sort_state
More disentangling bset.c from the rest of the bcache code - soon, the sorting routines won't have any dependencies on any outside structs. Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/bcache.h5
-rw-r--r--drivers/md/bcache/bset.c68
-rw-r--r--drivers/md/bcache/bset.h38
-rw-r--r--drivers/md/bcache/btree.c14
-rw-r--r--drivers/md/bcache/super.c9
-rw-r--r--drivers/md/bcache/sysfs.c2
6 files changed, 87 insertions, 49 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 2b46c86ac440..7bd4c93475e7 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -187,6 +187,7 @@
187#include <linux/types.h> 187#include <linux/types.h>
188#include <linux/workqueue.h> 188#include <linux/workqueue.h>
189 189
190#include "bset.h"
190#include "util.h" 191#include "util.h"
191#include "closure.h" 192#include "closure.h"
192 193
@@ -645,8 +646,7 @@ struct cache_set {
645 */ 646 */
646 mempool_t *fill_iter; 647 mempool_t *fill_iter;
647 648
648 mempool_t *sort_pool; 649 struct bset_sort_state sort;
649 unsigned sort_crit_factor;
650 650
651 /* List of buckets we're currently writing data to */ 651 /* List of buckets we're currently writing data to */
652 struct list_head data_buckets; 652 struct list_head data_buckets;
@@ -662,7 +662,6 @@ struct cache_set {
662 unsigned congested_read_threshold_us; 662 unsigned congested_read_threshold_us;
663 unsigned congested_write_threshold_us; 663 unsigned congested_write_threshold_us;
664 664
665 struct time_stats sort_time;
666 struct time_stats btree_gc_time; 665 struct time_stats btree_gc_time;
667 struct time_stats btree_split_time; 666 struct time_stats btree_split_time;
668 struct time_stats btree_read_time; 667 struct time_stats btree_read_time;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 9e3a53d87de0..9d9c2edda760 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -952,6 +952,26 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
952 952
953/* Mergesort */ 953/* Mergesort */
954 954
955void bch_bset_sort_state_free(struct bset_sort_state *state)
956{
957 if (state->pool)
958 mempool_destroy(state->pool);
959}
960
961int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
962{
963 spin_lock_init(&state->time.lock);
964
965 state->page_order = page_order;
966 state->crit_factor = int_sqrt(1 << page_order);
967
968 state->pool = mempool_create_page_pool(1, page_order);
969 if (!state->pool)
970 return -ENOMEM;
971
972 return 0;
973}
974
955static void sort_key_next(struct btree_iter *iter, 975static void sort_key_next(struct btree_iter *iter,
956 struct btree_iter_set *i) 976 struct btree_iter_set *i)
957{ 977{
@@ -1077,22 +1097,24 @@ static void btree_mergesort(struct btree *b, struct bset *out,
1077} 1097}
1078 1098
1079static void __btree_sort(struct btree *b, struct btree_iter *iter, 1099static void __btree_sort(struct btree *b, struct btree_iter *iter,
1080 unsigned start, unsigned order, bool fixup) 1100 unsigned start, unsigned order, bool fixup,
1101 struct bset_sort_state *state)
1081{ 1102{
1082 uint64_t start_time; 1103 uint64_t start_time;
1083 bool remove_stale = !b->written;
1084 bool used_mempool = false; 1104 bool used_mempool = false;
1085 struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, 1105 struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
1086 order); 1106 order);
1087 if (!out) { 1107 if (!out) {
1088 out = page_address(mempool_alloc(b->c->sort_pool, GFP_NOIO)); 1108 BUG_ON(order > state->page_order);
1109
1110 out = page_address(mempool_alloc(state->pool, GFP_NOIO));
1089 used_mempool = true; 1111 used_mempool = true;
1090 order = ilog2(bucket_pages(b->c)); 1112 order = ilog2(bucket_pages(b->c));
1091 } 1113 }
1092 1114
1093 start_time = local_clock(); 1115 start_time = local_clock();
1094 1116
1095 btree_mergesort(b, out, iter, fixup, remove_stale); 1117 btree_mergesort(b, out, iter, fixup, false);
1096 b->nsets = start; 1118 b->nsets = start;
1097 1119
1098 if (!start && order == b->page_order) { 1120 if (!start && order == b->page_order) {
@@ -1113,18 +1135,18 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter,
1113 } 1135 }
1114 1136
1115 if (used_mempool) 1137 if (used_mempool)
1116 mempool_free(virt_to_page(out), b->c->sort_pool); 1138 mempool_free(virt_to_page(out), state->pool);
1117 else 1139 else
1118 free_pages((unsigned long) out, order); 1140 free_pages((unsigned long) out, order);
1119 1141
1120 if (b->written) 1142 bset_build_written_tree(b);
1121 bset_build_written_tree(b);
1122 1143
1123 if (!start) 1144 if (!start)
1124 bch_time_stats_update(&b->c->sort_time, start_time); 1145 bch_time_stats_update(&state->time, start_time);
1125} 1146}
1126 1147
1127void bch_btree_sort_partial(struct btree *b, unsigned start) 1148void bch_btree_sort_partial(struct btree *b, unsigned start,
1149 struct bset_sort_state *state)
1128{ 1150{
1129 size_t order = b->page_order, keys = 0; 1151 size_t order = b->page_order, keys = 0;
1130 struct btree_iter iter; 1152 struct btree_iter iter;
@@ -1148,18 +1170,19 @@ void bch_btree_sort_partial(struct btree *b, unsigned start)
1148 order = ilog2(order); 1170 order = ilog2(order);
1149 } 1171 }
1150 1172
1151 __btree_sort(b, &iter, start, order, false); 1173 __btree_sort(b, &iter, start, order, false, state);
1152 1174
1153 EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); 1175 EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
1154} 1176}
1155 1177
1156void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) 1178void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter,
1179 struct bset_sort_state *state)
1157{ 1180{
1158 BUG_ON(!b->written); 1181 __btree_sort(b, iter, 0, b->page_order, true, state);
1159 __btree_sort(b, iter, 0, b->page_order, true);
1160} 1182}
1161 1183
1162void bch_btree_sort_into(struct btree *b, struct btree *new) 1184void bch_btree_sort_into(struct btree *b, struct btree *new,
1185 struct bset_sort_state *state)
1163{ 1186{
1164 uint64_t start_time = local_clock(); 1187 uint64_t start_time = local_clock();
1165 1188
@@ -1168,15 +1191,14 @@ void bch_btree_sort_into(struct btree *b, struct btree *new)
1168 1191
1169 btree_mergesort(b, new->sets->data, &iter, false, true); 1192 btree_mergesort(b, new->sets->data, &iter, false, true);
1170 1193
1171 bch_time_stats_update(&b->c->sort_time, start_time); 1194 bch_time_stats_update(&state->time, start_time);
1172 1195
1173 bkey_copy_key(&new->key, &b->key);
1174 new->sets->size = 0; 1196 new->sets->size = 0;
1175} 1197}
1176 1198
1177#define SORT_CRIT (4096 / sizeof(uint64_t)) 1199#define SORT_CRIT (4096 / sizeof(uint64_t))
1178 1200
1179void bch_btree_sort_lazy(struct btree *b) 1201void bch_btree_sort_lazy(struct btree *b, struct bset_sort_state *state)
1180{ 1202{
1181 unsigned crit = SORT_CRIT; 1203 unsigned crit = SORT_CRIT;
1182 int i; 1204 int i;
@@ -1185,24 +1207,18 @@ void bch_btree_sort_lazy(struct btree *b)
1185 if (!b->nsets) 1207 if (!b->nsets)
1186 goto out; 1208 goto out;
1187 1209
1188 /* If not a leaf node, always sort */
1189 if (b->level) {
1190 bch_btree_sort(b);
1191 return;
1192 }
1193
1194 for (i = b->nsets - 1; i >= 0; --i) { 1210 for (i = b->nsets - 1; i >= 0; --i) {
1195 crit *= b->c->sort_crit_factor; 1211 crit *= state->crit_factor;
1196 1212
1197 if (b->sets[i].data->keys < crit) { 1213 if (b->sets[i].data->keys < crit) {
1198 bch_btree_sort_partial(b, i); 1214 bch_btree_sort_partial(b, i, state);
1199 return; 1215 return;
1200 } 1216 }
1201 } 1217 }
1202 1218
1203 /* Sort if we'd overflow */ 1219 /* Sort if we'd overflow */
1204 if (b->nsets + 1 == MAX_BSETS) { 1220 if (b->nsets + 1 == MAX_BSETS) {
1205 bch_btree_sort(b); 1221 bch_btree_sort(b, state);
1206 return; 1222 return;
1207 } 1223 }
1208 1224
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 91bcbdb04085..4f60c21c7a38 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -3,6 +3,8 @@
3 3
4#include <linux/slab.h> 4#include <linux/slab.h>
5 5
6#include "util.h" /* for time_stats */
7
6/* 8/*
7 * BKEYS: 9 * BKEYS:
8 * 10 *
@@ -190,6 +192,33 @@ struct bset_tree {
190 struct bset *data; 192 struct bset *data;
191}; 193};
192 194
195/* Sorting */
196
197struct bset_sort_state {
198 mempool_t *pool;
199
200 unsigned page_order;
201 unsigned crit_factor;
202
203 struct time_stats time;
204};
205
206void bch_bset_sort_state_free(struct bset_sort_state *);
207int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
208void bch_btree_sort_lazy(struct btree *, struct bset_sort_state *);
209void bch_btree_sort_into(struct btree *, struct btree *,
210 struct bset_sort_state *);
211void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *,
212 struct bset_sort_state *);
213void bch_btree_sort_partial(struct btree *, unsigned,
214 struct bset_sort_state *);
215
216static inline void bch_btree_sort(struct btree *b,
217 struct bset_sort_state *state)
218{
219 bch_btree_sort_partial(b, 0, state);
220}
221
193/* Keylists */ 222/* Keylists */
194 223
195struct keylist { 224struct keylist {
@@ -374,15 +403,6 @@ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
374}) 403})
375 404
376bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); 405bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
377void bch_btree_sort_lazy(struct btree *);
378void bch_btree_sort_into(struct btree *, struct btree *);
379void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *);
380void bch_btree_sort_partial(struct btree *, unsigned);
381
382static inline void bch_btree_sort(struct btree *b)
383{
384 bch_btree_sort_partial(b, 0);
385}
386 406
387int bch_bset_print_stats(struct cache_set *, char *); 407int bch_bset_print_stats(struct cache_set *, char *);
388 408
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index e1e36e761724..78ba0b67ac16 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -263,7 +263,7 @@ void bch_btree_node_read_done(struct btree *b)
263 if (i->seq == b->sets[0].data->seq) 263 if (i->seq == b->sets[0].data->seq)
264 goto err; 264 goto err;
265 265
266 bch_btree_sort_and_fix_extents(b, iter); 266 bch_btree_sort_and_fix_extents(b, iter, &b->c->sort);
267 267
268 i = b->sets[0].data; 268 i = b->sets[0].data;
269 err = "short btree key"; 269 err = "short btree key";
@@ -476,7 +476,11 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
476 atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, 476 atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
477 &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); 477 &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
478 478
479 bch_btree_sort_lazy(b); 479 /* If not a leaf node, always sort */
480 if (b->level && b->nsets)
481 bch_btree_sort(b, &b->c->sort);
482 else
483 bch_btree_sort_lazy(b, &b->c->sort);
480 484
481 /* 485 /*
482 * do verify if there was more than one set initially (i.e. we did a 486 * do verify if there was more than one set initially (i.e. we did a
@@ -1125,8 +1129,10 @@ err:
1125static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) 1129static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait)
1126{ 1130{
1127 struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); 1131 struct btree *n = bch_btree_node_alloc(b->c, b->level, wait);
1128 if (!IS_ERR_OR_NULL(n)) 1132 if (!IS_ERR_OR_NULL(n)) {
1129 bch_btree_sort_into(b, n); 1133 bch_btree_sort_into(b, n, &b->c->sort);
1134 bkey_copy_key(&n->key, &b->key);
1135 }
1130 1136
1131 return n; 1137 return n;
1132} 1138}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index d05e75627714..1fc8165a5c01 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1351,6 +1351,7 @@ static void cache_set_free(struct closure *cl)
1351 if (ca) 1351 if (ca)
1352 kobject_put(&ca->kobj); 1352 kobject_put(&ca->kobj);
1353 1353
1354 bch_bset_sort_state_free(&c->sort);
1354 free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); 1355 free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
1355 1356
1356 if (c->bio_split) 1357 if (c->bio_split)
@@ -1481,15 +1482,12 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
1481 c->btree_pages = max_t(int, c->btree_pages / 4, 1482 c->btree_pages = max_t(int, c->btree_pages / 4,
1482 BTREE_MAX_PAGES); 1483 BTREE_MAX_PAGES);
1483 1484
1484 c->sort_crit_factor = int_sqrt(c->btree_pages);
1485
1486 sema_init(&c->sb_write_mutex, 1); 1485 sema_init(&c->sb_write_mutex, 1);
1487 mutex_init(&c->bucket_lock); 1486 mutex_init(&c->bucket_lock);
1488 init_waitqueue_head(&c->try_wait); 1487 init_waitqueue_head(&c->try_wait);
1489 init_waitqueue_head(&c->bucket_wait); 1488 init_waitqueue_head(&c->bucket_wait);
1490 sema_init(&c->uuid_write_mutex, 1); 1489 sema_init(&c->uuid_write_mutex, 1);
1491 1490
1492 spin_lock_init(&c->sort_time.lock);
1493 spin_lock_init(&c->btree_gc_time.lock); 1491 spin_lock_init(&c->btree_gc_time.lock);
1494 spin_lock_init(&c->btree_split_time.lock); 1492 spin_lock_init(&c->btree_split_time.lock);
1495 spin_lock_init(&c->btree_read_time.lock); 1493 spin_lock_init(&c->btree_read_time.lock);
@@ -1517,12 +1515,11 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
1517 bucket_pages(c))) || 1515 bucket_pages(c))) ||
1518 !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || 1516 !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
1519 !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || 1517 !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
1520 !(c->sort_pool = mempool_create_page_pool(1,
1521 ilog2(bucket_pages(c)))) ||
1522 !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || 1518 !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
1523 bch_journal_alloc(c) || 1519 bch_journal_alloc(c) ||
1524 bch_btree_cache_alloc(c) || 1520 bch_btree_cache_alloc(c) ||
1525 bch_open_buckets_alloc(c)) 1521 bch_open_buckets_alloc(c) ||
1522 bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
1526 goto err; 1523 goto err;
1527 1524
1528 c->congested_read_threshold_us = 2000; 1525 c->congested_read_threshold_us = 2000;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index d5dd282b176f..206c80fb27c1 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -490,7 +490,7 @@ lock_root:
490 490
491 sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms); 491 sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms);
492 sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); 492 sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
493 sysfs_print_time_stats(&c->sort_time, btree_sort, ms, us); 493 sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
494 sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); 494 sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
495 sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); 495 sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us);
496 496