diff options
-rw-r--r-- | drivers/md/bcache/alloc.c | 23 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 15 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 225 | ||||
-rw-r--r-- | drivers/md/bcache/btree.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 13 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 3 |
6 files changed, 145 insertions, 139 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 5ba4eaea57f4..a59ef6147fc7 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c | |||
@@ -375,6 +375,7 @@ static int bch_allocator_thread(void *arg) | |||
375 | } | 375 | } |
376 | 376 | ||
377 | allocator_wait(ca, bch_allocator_push(ca, bucket)); | 377 | allocator_wait(ca, bch_allocator_push(ca, bucket)); |
378 | wake_up(&ca->set->btree_cache_wait); | ||
378 | wake_up(&ca->set->bucket_wait); | 379 | wake_up(&ca->set->bucket_wait); |
379 | } | 380 | } |
380 | 381 | ||
@@ -717,25 +718,3 @@ int bch_cache_allocator_start(struct cache *ca) | |||
717 | ca->alloc_thread = k; | 718 | ca->alloc_thread = k; |
718 | return 0; | 719 | return 0; |
719 | } | 720 | } |
720 | |||
721 | int bch_cache_allocator_init(struct cache *ca) | ||
722 | { | ||
723 | /* | ||
724 | * Reserve: | ||
725 | * Prio/gen writes first | ||
726 | * Then 8 for btree allocations | ||
727 | * Then half for the moving garbage collector | ||
728 | */ | ||
729 | #if 0 | ||
730 | ca->watermark[WATERMARK_PRIO] = 0; | ||
731 | |||
732 | ca->watermark[WATERMARK_METADATA] = prio_buckets(ca); | ||
733 | |||
734 | ca->watermark[WATERMARK_MOVINGGC] = 8 + | ||
735 | ca->watermark[WATERMARK_METADATA]; | ||
736 | |||
737 | ca->watermark[WATERMARK_NONE] = ca->free.size / 2 + | ||
738 | ca->watermark[WATERMARK_MOVINGGC]; | ||
739 | #endif | ||
740 | return 0; | ||
741 | } | ||
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 15d26236caf9..171cda89cb6b 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -562,19 +562,16 @@ struct cache_set { | |||
562 | struct list_head btree_cache_freed; | 562 | struct list_head btree_cache_freed; |
563 | 563 | ||
564 | /* Number of elements in btree_cache + btree_cache_freeable lists */ | 564 | /* Number of elements in btree_cache + btree_cache_freeable lists */ |
565 | unsigned bucket_cache_used; | 565 | unsigned btree_cache_used; |
566 | 566 | ||
567 | /* | 567 | /* |
568 | * If we need to allocate memory for a new btree node and that | 568 | * If we need to allocate memory for a new btree node and that |
569 | * allocation fails, we can cannibalize another node in the btree cache | 569 | * allocation fails, we can cannibalize another node in the btree cache |
570 | * to satisfy the allocation. However, only one thread can be doing this | 570 | * to satisfy the allocation - lock to guarantee only one thread does |
571 | * at a time, for obvious reasons - try_harder and try_wait are | 571 | * this at a time: |
572 | * basically a lock for this that we can wait on asynchronously. The | ||
573 | * btree_root() macro releases the lock when it returns. | ||
574 | */ | 572 | */ |
575 | struct task_struct *try_harder; | 573 | wait_queue_head_t btree_cache_wait; |
576 | wait_queue_head_t try_wait; | 574 | struct task_struct *btree_cache_alloc_lock; |
577 | uint64_t try_harder_start; | ||
578 | 575 | ||
579 | /* | 576 | /* |
580 | * When we free a btree node, we increment the gen of the bucket the | 577 | * When we free a btree node, we increment the gen of the bucket the |
@@ -669,7 +666,6 @@ struct cache_set { | |||
669 | struct time_stats btree_gc_time; | 666 | struct time_stats btree_gc_time; |
670 | struct time_stats btree_split_time; | 667 | struct time_stats btree_split_time; |
671 | struct time_stats btree_read_time; | 668 | struct time_stats btree_read_time; |
672 | struct time_stats try_harder_time; | ||
673 | 669 | ||
674 | atomic_long_t cache_read_races; | 670 | atomic_long_t cache_read_races; |
675 | atomic_long_t writeback_keys_done; | 671 | atomic_long_t writeback_keys_done; |
@@ -956,7 +952,6 @@ int bch_open_buckets_alloc(struct cache_set *); | |||
956 | void bch_open_buckets_free(struct cache_set *); | 952 | void bch_open_buckets_free(struct cache_set *); |
957 | 953 | ||
958 | int bch_cache_allocator_start(struct cache *ca); | 954 | int bch_cache_allocator_start(struct cache *ca); |
959 | int bch_cache_allocator_init(struct cache *ca); | ||
960 | 955 | ||
961 | void bch_debug_exit(void); | 956 | void bch_debug_exit(void); |
962 | int bch_debug_init(struct kobject *); | 957 | int bch_debug_init(struct kobject *); |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index beb32551da78..be90596a9e2a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -117,7 +117,7 @@ | |||
117 | ({ \ | 117 | ({ \ |
118 | int _r, l = (b)->level - 1; \ | 118 | int _r, l = (b)->level - 1; \ |
119 | bool _w = l <= (op)->lock; \ | 119 | bool _w = l <= (op)->lock; \ |
120 | struct btree *_child = bch_btree_node_get((b)->c, key, l, _w); \ | 120 | struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\ |
121 | if (!IS_ERR(_child)) { \ | 121 | if (!IS_ERR(_child)) { \ |
122 | _child->parent = (b); \ | 122 | _child->parent = (b); \ |
123 | _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ | 123 | _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ |
@@ -146,17 +146,12 @@ | |||
146 | _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ | 146 | _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ |
147 | } \ | 147 | } \ |
148 | rw_unlock(_w, _b); \ | 148 | rw_unlock(_w, _b); \ |
149 | bch_cannibalize_unlock(c); \ | ||
149 | if (_r == -EINTR) \ | 150 | if (_r == -EINTR) \ |
150 | schedule(); \ | 151 | schedule(); \ |
151 | bch_cannibalize_unlock(c); \ | ||
152 | if (_r == -ENOSPC) { \ | ||
153 | wait_event((c)->try_wait, \ | ||
154 | !(c)->try_harder); \ | ||
155 | _r = -EINTR; \ | ||
156 | } \ | ||
157 | } while (_r == -EINTR); \ | 152 | } while (_r == -EINTR); \ |
158 | \ | 153 | \ |
159 | finish_wait(&(c)->bucket_wait, &(op)->wait); \ | 154 | finish_wait(&(c)->btree_cache_wait, &(op)->wait); \ |
160 | _r; \ | 155 | _r; \ |
161 | }) | 156 | }) |
162 | 157 | ||
@@ -563,7 +558,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) | |||
563 | #define mca_reserve(c) (((c->root && c->root->level) \ | 558 | #define mca_reserve(c) (((c->root && c->root->level) \ |
564 | ? c->root->level : 1) * 8 + 16) | 559 | ? c->root->level : 1) * 8 + 16) |
565 | #define mca_can_free(c) \ | 560 | #define mca_can_free(c) \ |
566 | max_t(int, 0, c->bucket_cache_used - mca_reserve(c)) | 561 | max_t(int, 0, c->btree_cache_used - mca_reserve(c)) |
567 | 562 | ||
568 | static void mca_data_free(struct btree *b) | 563 | static void mca_data_free(struct btree *b) |
569 | { | 564 | { |
@@ -571,7 +566,7 @@ static void mca_data_free(struct btree *b) | |||
571 | 566 | ||
572 | bch_btree_keys_free(&b->keys); | 567 | bch_btree_keys_free(&b->keys); |
573 | 568 | ||
574 | b->c->bucket_cache_used--; | 569 | b->c->btree_cache_used--; |
575 | list_move(&b->list, &b->c->btree_cache_freed); | 570 | list_move(&b->list, &b->c->btree_cache_freed); |
576 | } | 571 | } |
577 | 572 | ||
@@ -596,7 +591,7 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) | |||
596 | ilog2(b->c->btree_pages), | 591 | ilog2(b->c->btree_pages), |
597 | btree_order(k)), | 592 | btree_order(k)), |
598 | gfp)) { | 593 | gfp)) { |
599 | b->c->bucket_cache_used++; | 594 | b->c->btree_cache_used++; |
600 | list_move(&b->list, &b->c->btree_cache); | 595 | list_move(&b->list, &b->c->btree_cache); |
601 | } else { | 596 | } else { |
602 | list_move(&b->list, &b->c->btree_cache_freed); | 597 | list_move(&b->list, &b->c->btree_cache_freed); |
@@ -675,7 +670,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, | |||
675 | if (c->shrinker_disabled) | 670 | if (c->shrinker_disabled) |
676 | return SHRINK_STOP; | 671 | return SHRINK_STOP; |
677 | 672 | ||
678 | if (c->try_harder) | 673 | if (c->btree_cache_alloc_lock) |
679 | return SHRINK_STOP; | 674 | return SHRINK_STOP; |
680 | 675 | ||
681 | /* Return -1 if we can't do anything right now */ | 676 | /* Return -1 if we can't do anything right now */ |
@@ -707,7 +702,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, | |||
707 | } | 702 | } |
708 | } | 703 | } |
709 | 704 | ||
710 | for (i = 0; (nr--) && i < c->bucket_cache_used; i++) { | 705 | for (i = 0; (nr--) && i < c->btree_cache_used; i++) { |
711 | if (list_empty(&c->btree_cache)) | 706 | if (list_empty(&c->btree_cache)) |
712 | goto out; | 707 | goto out; |
713 | 708 | ||
@@ -736,7 +731,7 @@ static unsigned long bch_mca_count(struct shrinker *shrink, | |||
736 | if (c->shrinker_disabled) | 731 | if (c->shrinker_disabled) |
737 | return 0; | 732 | return 0; |
738 | 733 | ||
739 | if (c->try_harder) | 734 | if (c->btree_cache_alloc_lock) |
740 | return 0; | 735 | return 0; |
741 | 736 | ||
742 | return mca_can_free(c) * c->btree_pages; | 737 | return mca_can_free(c) * c->btree_pages; |
@@ -840,17 +835,30 @@ out: | |||
840 | return b; | 835 | return b; |
841 | } | 836 | } |
842 | 837 | ||
843 | static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k) | 838 | static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op) |
839 | { | ||
840 | struct task_struct *old; | ||
841 | |||
842 | old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current); | ||
843 | if (old && old != current) { | ||
844 | if (op) | ||
845 | prepare_to_wait(&c->btree_cache_wait, &op->wait, | ||
846 | TASK_UNINTERRUPTIBLE); | ||
847 | return -EINTR; | ||
848 | } | ||
849 | |||
850 | return 0; | ||
851 | } | ||
852 | |||
853 | static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op, | ||
854 | struct bkey *k) | ||
844 | { | 855 | { |
845 | struct btree *b; | 856 | struct btree *b; |
846 | 857 | ||
847 | trace_bcache_btree_cache_cannibalize(c); | 858 | trace_bcache_btree_cache_cannibalize(c); |
848 | 859 | ||
849 | if (!c->try_harder) { | 860 | if (mca_cannibalize_lock(c, op)) |
850 | c->try_harder = current; | 861 | return ERR_PTR(-EINTR); |
851 | c->try_harder_start = local_clock(); | ||
852 | } else if (c->try_harder != current) | ||
853 | return ERR_PTR(-ENOSPC); | ||
854 | 862 | ||
855 | list_for_each_entry_reverse(b, &c->btree_cache, list) | 863 | list_for_each_entry_reverse(b, &c->btree_cache, list) |
856 | if (!mca_reap(b, btree_order(k), false)) | 864 | if (!mca_reap(b, btree_order(k), false)) |
@@ -860,6 +868,7 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k) | |||
860 | if (!mca_reap(b, btree_order(k), true)) | 868 | if (!mca_reap(b, btree_order(k), true)) |
861 | return b; | 869 | return b; |
862 | 870 | ||
871 | WARN(1, "btree cache cannibalize failed\n"); | ||
863 | return ERR_PTR(-ENOMEM); | 872 | return ERR_PTR(-ENOMEM); |
864 | } | 873 | } |
865 | 874 | ||
@@ -871,14 +880,14 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k) | |||
871 | */ | 880 | */ |
872 | static void bch_cannibalize_unlock(struct cache_set *c) | 881 | static void bch_cannibalize_unlock(struct cache_set *c) |
873 | { | 882 | { |
874 | if (c->try_harder == current) { | 883 | if (c->btree_cache_alloc_lock == current) { |
875 | bch_time_stats_update(&c->try_harder_time, c->try_harder_start); | 884 | c->btree_cache_alloc_lock = NULL; |
876 | c->try_harder = NULL; | 885 | wake_up(&c->btree_cache_wait); |
877 | wake_up(&c->try_wait); | ||
878 | } | 886 | } |
879 | } | 887 | } |
880 | 888 | ||
881 | static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level) | 889 | static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op, |
890 | struct bkey *k, int level) | ||
882 | { | 891 | { |
883 | struct btree *b; | 892 | struct btree *b; |
884 | 893 | ||
@@ -941,7 +950,7 @@ err: | |||
941 | if (b) | 950 | if (b) |
942 | rw_unlock(true, b); | 951 | rw_unlock(true, b); |
943 | 952 | ||
944 | b = mca_cannibalize(c, k); | 953 | b = mca_cannibalize(c, op, k); |
945 | if (!IS_ERR(b)) | 954 | if (!IS_ERR(b)) |
946 | goto out; | 955 | goto out; |
947 | 956 | ||
@@ -957,8 +966,8 @@ err: | |||
957 | * The btree node will have either a read or a write lock held, depending on | 966 | * The btree node will have either a read or a write lock held, depending on |
958 | * level and op->lock. | 967 | * level and op->lock. |
959 | */ | 968 | */ |
960 | struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k, | 969 | struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, |
961 | int level, bool write) | 970 | struct bkey *k, int level, bool write) |
962 | { | 971 | { |
963 | int i = 0; | 972 | int i = 0; |
964 | struct btree *b; | 973 | struct btree *b; |
@@ -972,7 +981,7 @@ retry: | |||
972 | return ERR_PTR(-EAGAIN); | 981 | return ERR_PTR(-EAGAIN); |
973 | 982 | ||
974 | mutex_lock(&c->bucket_lock); | 983 | mutex_lock(&c->bucket_lock); |
975 | b = mca_alloc(c, k, level); | 984 | b = mca_alloc(c, op, k, level); |
976 | mutex_unlock(&c->bucket_lock); | 985 | mutex_unlock(&c->bucket_lock); |
977 | 986 | ||
978 | if (!b) | 987 | if (!b) |
@@ -1018,7 +1027,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) | |||
1018 | struct btree *b; | 1027 | struct btree *b; |
1019 | 1028 | ||
1020 | mutex_lock(&c->bucket_lock); | 1029 | mutex_lock(&c->bucket_lock); |
1021 | b = mca_alloc(c, k, level); | 1030 | b = mca_alloc(c, NULL, k, level); |
1022 | mutex_unlock(&c->bucket_lock); | 1031 | mutex_unlock(&c->bucket_lock); |
1023 | 1032 | ||
1024 | if (!IS_ERR_OR_NULL(b)) { | 1033 | if (!IS_ERR_OR_NULL(b)) { |
@@ -1051,20 +1060,21 @@ static void btree_node_free(struct btree *b) | |||
1051 | mutex_unlock(&b->c->bucket_lock); | 1060 | mutex_unlock(&b->c->bucket_lock); |
1052 | } | 1061 | } |
1053 | 1062 | ||
1054 | struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait) | 1063 | struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, |
1064 | int level) | ||
1055 | { | 1065 | { |
1056 | BKEY_PADDED(key) k; | 1066 | BKEY_PADDED(key) k; |
1057 | struct btree *b = ERR_PTR(-EAGAIN); | 1067 | struct btree *b = ERR_PTR(-EAGAIN); |
1058 | 1068 | ||
1059 | mutex_lock(&c->bucket_lock); | 1069 | mutex_lock(&c->bucket_lock); |
1060 | retry: | 1070 | retry: |
1061 | if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait)) | 1071 | if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL)) |
1062 | goto err; | 1072 | goto err; |
1063 | 1073 | ||
1064 | bkey_put(c, &k.key); | 1074 | bkey_put(c, &k.key); |
1065 | SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS); | 1075 | SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS); |
1066 | 1076 | ||
1067 | b = mca_alloc(c, &k.key, level); | 1077 | b = mca_alloc(c, op, &k.key, level); |
1068 | if (IS_ERR(b)) | 1078 | if (IS_ERR(b)) |
1069 | goto err_free; | 1079 | goto err_free; |
1070 | 1080 | ||
@@ -1090,9 +1100,10 @@ err: | |||
1090 | return b; | 1100 | return b; |
1091 | } | 1101 | } |
1092 | 1102 | ||
1093 | static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) | 1103 | static struct btree *btree_node_alloc_replacement(struct btree *b, |
1104 | struct btree_op *op) | ||
1094 | { | 1105 | { |
1095 | struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); | 1106 | struct btree *n = bch_btree_node_alloc(b->c, op, b->level); |
1096 | if (!IS_ERR_OR_NULL(n)) { | 1107 | if (!IS_ERR_OR_NULL(n)) { |
1097 | mutex_lock(&n->write_lock); | 1108 | mutex_lock(&n->write_lock); |
1098 | bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); | 1109 | bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); |
@@ -1126,22 +1137,22 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op) | |||
1126 | { | 1137 | { |
1127 | struct cache_set *c = b->c; | 1138 | struct cache_set *c = b->c; |
1128 | struct cache *ca; | 1139 | struct cache *ca; |
1129 | unsigned i, reserve = c->root->level * 2 + 1; | 1140 | unsigned i, reserve = (c->root->level - b->level) * 2 + 1; |
1130 | int ret = 0; | ||
1131 | 1141 | ||
1132 | mutex_lock(&c->bucket_lock); | 1142 | mutex_lock(&c->bucket_lock); |
1133 | 1143 | ||
1134 | for_each_cache(ca, c, i) | 1144 | for_each_cache(ca, c, i) |
1135 | if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) { | 1145 | if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) { |
1136 | if (op) | 1146 | if (op) |
1137 | prepare_to_wait(&c->bucket_wait, &op->wait, | 1147 | prepare_to_wait(&c->btree_cache_wait, &op->wait, |
1138 | TASK_UNINTERRUPTIBLE); | 1148 | TASK_UNINTERRUPTIBLE); |
1139 | ret = -EINTR; | 1149 | mutex_unlock(&c->bucket_lock); |
1140 | break; | 1150 | return -EINTR; |
1141 | } | 1151 | } |
1142 | 1152 | ||
1143 | mutex_unlock(&c->bucket_lock); | 1153 | mutex_unlock(&c->bucket_lock); |
1144 | return ret; | 1154 | |
1155 | return mca_cannibalize_lock(b->c, op); | ||
1145 | } | 1156 | } |
1146 | 1157 | ||
1147 | /* Garbage collection */ | 1158 | /* Garbage collection */ |
@@ -1273,14 +1284,19 @@ static int bch_btree_insert_node(struct btree *, struct btree_op *, | |||
1273 | struct keylist *, atomic_t *, struct bkey *); | 1284 | struct keylist *, atomic_t *, struct bkey *); |
1274 | 1285 | ||
1275 | static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | 1286 | static int btree_gc_coalesce(struct btree *b, struct btree_op *op, |
1276 | struct keylist *keylist, struct gc_stat *gc, | 1287 | struct gc_stat *gc, struct gc_merge_info *r) |
1277 | struct gc_merge_info *r) | ||
1278 | { | 1288 | { |
1279 | unsigned i, nodes = 0, keys = 0, blocks; | 1289 | unsigned i, nodes = 0, keys = 0, blocks; |
1280 | struct btree *new_nodes[GC_MERGE_NODES]; | 1290 | struct btree *new_nodes[GC_MERGE_NODES]; |
1291 | struct keylist keylist; | ||
1281 | struct closure cl; | 1292 | struct closure cl; |
1282 | struct bkey *k; | 1293 | struct bkey *k; |
1283 | 1294 | ||
1295 | bch_keylist_init(&keylist); | ||
1296 | |||
1297 | if (btree_check_reserve(b, NULL)) | ||
1298 | return 0; | ||
1299 | |||
1284 | memset(new_nodes, 0, sizeof(new_nodes)); | 1300 | memset(new_nodes, 0, sizeof(new_nodes)); |
1285 | closure_init_stack(&cl); | 1301 | closure_init_stack(&cl); |
1286 | 1302 | ||
@@ -1295,11 +1311,20 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1295 | return 0; | 1311 | return 0; |
1296 | 1312 | ||
1297 | for (i = 0; i < nodes; i++) { | 1313 | for (i = 0; i < nodes; i++) { |
1298 | new_nodes[i] = btree_node_alloc_replacement(r[i].b, false); | 1314 | new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL); |
1299 | if (IS_ERR_OR_NULL(new_nodes[i])) | 1315 | if (IS_ERR_OR_NULL(new_nodes[i])) |
1300 | goto out_nocoalesce; | 1316 | goto out_nocoalesce; |
1301 | } | 1317 | } |
1302 | 1318 | ||
1319 | /* | ||
1320 | * We have to check the reserve here, after we've allocated our new | ||
1321 | * nodes, to make sure the insert below will succeed - we also check | ||
1322 | * before as an optimization to potentially avoid a bunch of expensive | ||
1323 | * allocs/sorts | ||
1324 | */ | ||
1325 | if (btree_check_reserve(b, NULL)) | ||
1326 | goto out_nocoalesce; | ||
1327 | |||
1303 | for (i = 0; i < nodes; i++) | 1328 | for (i = 0; i < nodes; i++) |
1304 | mutex_lock(&new_nodes[i]->write_lock); | 1329 | mutex_lock(&new_nodes[i]->write_lock); |
1305 | 1330 | ||
@@ -1361,12 +1386,12 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1361 | 1386 | ||
1362 | n2->keys -= keys; | 1387 | n2->keys -= keys; |
1363 | 1388 | ||
1364 | if (__bch_keylist_realloc(keylist, | 1389 | if (__bch_keylist_realloc(&keylist, |
1365 | bkey_u64s(&new_nodes[i]->key))) | 1390 | bkey_u64s(&new_nodes[i]->key))) |
1366 | goto out_nocoalesce; | 1391 | goto out_nocoalesce; |
1367 | 1392 | ||
1368 | bch_btree_node_write(new_nodes[i], &cl); | 1393 | bch_btree_node_write(new_nodes[i], &cl); |
1369 | bch_keylist_add(keylist, &new_nodes[i]->key); | 1394 | bch_keylist_add(&keylist, &new_nodes[i]->key); |
1370 | } | 1395 | } |
1371 | 1396 | ||
1372 | for (i = 0; i < nodes; i++) | 1397 | for (i = 0; i < nodes; i++) |
@@ -1380,15 +1405,15 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1380 | rw_unlock(true, new_nodes[0]); | 1405 | rw_unlock(true, new_nodes[0]); |
1381 | 1406 | ||
1382 | for (i = 0; i < nodes; i++) { | 1407 | for (i = 0; i < nodes; i++) { |
1383 | if (__bch_keylist_realloc(keylist, bkey_u64s(&r[i].b->key))) | 1408 | if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key))) |
1384 | goto out_nocoalesce; | 1409 | goto out_nocoalesce; |
1385 | 1410 | ||
1386 | make_btree_freeing_key(r[i].b, keylist->top); | 1411 | make_btree_freeing_key(r[i].b, keylist.top); |
1387 | bch_keylist_push(keylist); | 1412 | bch_keylist_push(&keylist); |
1388 | } | 1413 | } |
1389 | 1414 | ||
1390 | bch_btree_insert_node(b, op, keylist, NULL, NULL); | 1415 | bch_btree_insert_node(b, op, &keylist, NULL, NULL); |
1391 | BUG_ON(!bch_keylist_empty(keylist)); | 1416 | BUG_ON(!bch_keylist_empty(&keylist)); |
1392 | 1417 | ||
1393 | for (i = 0; i < nodes; i++) { | 1418 | for (i = 0; i < nodes; i++) { |
1394 | btree_node_free(r[i].b); | 1419 | btree_node_free(r[i].b); |
@@ -1403,13 +1428,16 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1403 | trace_bcache_btree_gc_coalesce(nodes); | 1428 | trace_bcache_btree_gc_coalesce(nodes); |
1404 | gc->nodes--; | 1429 | gc->nodes--; |
1405 | 1430 | ||
1431 | bch_keylist_free(&keylist); | ||
1432 | |||
1406 | /* Invalidated our iterator */ | 1433 | /* Invalidated our iterator */ |
1407 | return -EINTR; | 1434 | return -EINTR; |
1408 | 1435 | ||
1409 | out_nocoalesce: | 1436 | out_nocoalesce: |
1410 | closure_sync(&cl); | 1437 | closure_sync(&cl); |
1438 | bch_keylist_free(&keylist); | ||
1411 | 1439 | ||
1412 | while ((k = bch_keylist_pop(keylist))) | 1440 | while ((k = bch_keylist_pop(&keylist))) |
1413 | if (!bkey_cmp(k, &ZERO_KEY)) | 1441 | if (!bkey_cmp(k, &ZERO_KEY)) |
1414 | atomic_dec(&b->c->prio_blocked); | 1442 | atomic_dec(&b->c->prio_blocked); |
1415 | 1443 | ||
@@ -1421,6 +1449,42 @@ out_nocoalesce: | |||
1421 | return 0; | 1449 | return 0; |
1422 | } | 1450 | } |
1423 | 1451 | ||
1452 | static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, | ||
1453 | struct btree *replace) | ||
1454 | { | ||
1455 | struct keylist keys; | ||
1456 | struct btree *n; | ||
1457 | |||
1458 | if (btree_check_reserve(b, NULL)) | ||
1459 | return 0; | ||
1460 | |||
1461 | n = btree_node_alloc_replacement(replace, NULL); | ||
1462 | |||
1463 | /* recheck reserve after allocating replacement node */ | ||
1464 | if (btree_check_reserve(b, NULL)) { | ||
1465 | btree_node_free(n); | ||
1466 | rw_unlock(true, n); | ||
1467 | return 0; | ||
1468 | } | ||
1469 | |||
1470 | bch_btree_node_write_sync(n); | ||
1471 | |||
1472 | bch_keylist_init(&keys); | ||
1473 | bch_keylist_add(&keys, &n->key); | ||
1474 | |||
1475 | make_btree_freeing_key(replace, keys.top); | ||
1476 | bch_keylist_push(&keys); | ||
1477 | |||
1478 | bch_btree_insert_node(b, op, &keys, NULL, NULL); | ||
1479 | BUG_ON(!bch_keylist_empty(&keys)); | ||
1480 | |||
1481 | btree_node_free(replace); | ||
1482 | rw_unlock(true, n); | ||
1483 | |||
1484 | /* Invalidated our iterator */ | ||
1485 | return -EINTR; | ||
1486 | } | ||
1487 | |||
1424 | static unsigned btree_gc_count_keys(struct btree *b) | 1488 | static unsigned btree_gc_count_keys(struct btree *b) |
1425 | { | 1489 | { |
1426 | struct bkey *k; | 1490 | struct bkey *k; |
@@ -1438,14 +1502,11 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1438 | { | 1502 | { |
1439 | int ret = 0; | 1503 | int ret = 0; |
1440 | bool should_rewrite; | 1504 | bool should_rewrite; |
1441 | struct btree *n; | ||
1442 | struct bkey *k; | 1505 | struct bkey *k; |
1443 | struct keylist keys; | ||
1444 | struct btree_iter iter; | 1506 | struct btree_iter iter; |
1445 | struct gc_merge_info r[GC_MERGE_NODES]; | 1507 | struct gc_merge_info r[GC_MERGE_NODES]; |
1446 | struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; | 1508 | struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; |
1447 | 1509 | ||
1448 | bch_keylist_init(&keys); | ||
1449 | bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); | 1510 | bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); |
1450 | 1511 | ||
1451 | for (i = r; i < r + ARRAY_SIZE(r); i++) | 1512 | for (i = r; i < r + ARRAY_SIZE(r); i++) |
@@ -1454,7 +1515,8 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1454 | while (1) { | 1515 | while (1) { |
1455 | k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); | 1516 | k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); |
1456 | if (k) { | 1517 | if (k) { |
1457 | r->b = bch_btree_node_get(b->c, k, b->level - 1, true); | 1518 | r->b = bch_btree_node_get(b->c, op, k, b->level - 1, |
1519 | true); | ||
1458 | if (IS_ERR(r->b)) { | 1520 | if (IS_ERR(r->b)) { |
1459 | ret = PTR_ERR(r->b); | 1521 | ret = PTR_ERR(r->b); |
1460 | break; | 1522 | break; |
@@ -1462,7 +1524,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1462 | 1524 | ||
1463 | r->keys = btree_gc_count_keys(r->b); | 1525 | r->keys = btree_gc_count_keys(r->b); |
1464 | 1526 | ||
1465 | ret = btree_gc_coalesce(b, op, &keys, gc, r); | 1527 | ret = btree_gc_coalesce(b, op, gc, r); |
1466 | if (ret) | 1528 | if (ret) |
1467 | break; | 1529 | break; |
1468 | } | 1530 | } |
@@ -1472,32 +1534,10 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1472 | 1534 | ||
1473 | if (!IS_ERR(last->b)) { | 1535 | if (!IS_ERR(last->b)) { |
1474 | should_rewrite = btree_gc_mark_node(last->b, gc); | 1536 | should_rewrite = btree_gc_mark_node(last->b, gc); |
1475 | if (should_rewrite && | 1537 | if (should_rewrite) { |
1476 | !btree_check_reserve(b, NULL)) { | 1538 | ret = btree_gc_rewrite_node(b, op, last->b); |
1477 | n = btree_node_alloc_replacement(last->b, | 1539 | if (ret) |
1478 | false); | ||
1479 | |||
1480 | if (!IS_ERR_OR_NULL(n)) { | ||
1481 | bch_btree_node_write_sync(n); | ||
1482 | |||
1483 | bch_keylist_add(&keys, &n->key); | ||
1484 | |||
1485 | make_btree_freeing_key(last->b, | ||
1486 | keys.top); | ||
1487 | bch_keylist_push(&keys); | ||
1488 | |||
1489 | bch_btree_insert_node(b, op, &keys, | ||
1490 | NULL, NULL); | ||
1491 | BUG_ON(!bch_keylist_empty(&keys)); | ||
1492 | |||
1493 | btree_node_free(last->b); | ||
1494 | rw_unlock(true, last->b); | ||
1495 | last->b = n; | ||
1496 | |||
1497 | /* Invalidated our iterator */ | ||
1498 | ret = -EINTR; | ||
1499 | break; | 1540 | break; |
1500 | } | ||
1501 | } | 1541 | } |
1502 | 1542 | ||
1503 | if (last->b->level) { | 1543 | if (last->b->level) { |
@@ -1537,8 +1577,6 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1537 | rw_unlock(true, i->b); | 1577 | rw_unlock(true, i->b); |
1538 | } | 1578 | } |
1539 | 1579 | ||
1540 | bch_keylist_free(&keys); | ||
1541 | |||
1542 | return ret; | 1580 | return ret; |
1543 | } | 1581 | } |
1544 | 1582 | ||
@@ -1551,7 +1589,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, | |||
1551 | 1589 | ||
1552 | should_rewrite = btree_gc_mark_node(b, gc); | 1590 | should_rewrite = btree_gc_mark_node(b, gc); |
1553 | if (should_rewrite) { | 1591 | if (should_rewrite) { |
1554 | n = btree_node_alloc_replacement(b, false); | 1592 | n = btree_node_alloc_replacement(b, NULL); |
1555 | 1593 | ||
1556 | if (!IS_ERR_OR_NULL(n)) { | 1594 | if (!IS_ERR_OR_NULL(n)) { |
1557 | bch_btree_node_write_sync(n); | 1595 | bch_btree_node_write_sync(n); |
@@ -1887,11 +1925,14 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
1887 | closure_init_stack(&cl); | 1925 | closure_init_stack(&cl); |
1888 | bch_keylist_init(&parent_keys); | 1926 | bch_keylist_init(&parent_keys); |
1889 | 1927 | ||
1890 | if (!b->level && | 1928 | if (btree_check_reserve(b, op)) { |
1891 | btree_check_reserve(b, op)) | 1929 | if (!b->level) |
1892 | return -EINTR; | 1930 | return -EINTR; |
1931 | else | ||
1932 | WARN(1, "insufficient reserve for split\n"); | ||
1933 | } | ||
1893 | 1934 | ||
1894 | n1 = btree_node_alloc_replacement(b, true); | 1935 | n1 = btree_node_alloc_replacement(b, op); |
1895 | if (IS_ERR(n1)) | 1936 | if (IS_ERR(n1)) |
1896 | goto err; | 1937 | goto err; |
1897 | 1938 | ||
@@ -1903,12 +1944,12 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
1903 | 1944 | ||
1904 | trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); | 1945 | trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); |
1905 | 1946 | ||
1906 | n2 = bch_btree_node_alloc(b->c, b->level, true); | 1947 | n2 = bch_btree_node_alloc(b->c, op, b->level); |
1907 | if (IS_ERR(n2)) | 1948 | if (IS_ERR(n2)) |
1908 | goto err_free1; | 1949 | goto err_free1; |
1909 | 1950 | ||
1910 | if (!b->parent) { | 1951 | if (!b->parent) { |
1911 | n3 = bch_btree_node_alloc(b->c, b->level + 1, true); | 1952 | n3 = bch_btree_node_alloc(b->c, op, b->level + 1); |
1912 | if (IS_ERR(n3)) | 1953 | if (IS_ERR(n3)) |
1913 | goto err_free2; | 1954 | goto err_free2; |
1914 | } | 1955 | } |
@@ -1995,7 +2036,7 @@ err_free1: | |||
1995 | btree_node_free(n1); | 2036 | btree_node_free(n1); |
1996 | rw_unlock(true, n1); | 2037 | rw_unlock(true, n1); |
1997 | err: | 2038 | err: |
1998 | WARN(1, "bcache: btree split failed"); | 2039 | WARN(1, "bcache: btree split failed (level %u)", b->level); |
1999 | 2040 | ||
2000 | if (n3 == ERR_PTR(-EAGAIN) || | 2041 | if (n3 == ERR_PTR(-EAGAIN) || |
2001 | n2 == ERR_PTR(-EAGAIN) || | 2042 | n2 == ERR_PTR(-EAGAIN) || |
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index acebf26809cc..3ce371fa7f98 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h | |||
@@ -242,8 +242,9 @@ void __bch_btree_node_write(struct btree *, struct closure *); | |||
242 | void bch_btree_node_write(struct btree *, struct closure *); | 242 | void bch_btree_node_write(struct btree *, struct closure *); |
243 | 243 | ||
244 | void bch_btree_set_root(struct btree *); | 244 | void bch_btree_set_root(struct btree *); |
245 | struct btree *bch_btree_node_alloc(struct cache_set *, int, bool); | 245 | struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int); |
246 | struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool); | 246 | struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *, |
247 | struct bkey *, int, bool); | ||
247 | 248 | ||
248 | int bch_btree_insert_check_key(struct btree *, struct btree_op *, | 249 | int bch_btree_insert_check_key(struct btree *, struct btree_op *, |
249 | struct bkey *); | 250 | struct bkey *); |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 307fe378ea43..2d4a56219ec7 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -1495,14 +1495,13 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) | |||
1495 | 1495 | ||
1496 | sema_init(&c->sb_write_mutex, 1); | 1496 | sema_init(&c->sb_write_mutex, 1); |
1497 | mutex_init(&c->bucket_lock); | 1497 | mutex_init(&c->bucket_lock); |
1498 | init_waitqueue_head(&c->try_wait); | 1498 | init_waitqueue_head(&c->btree_cache_wait); |
1499 | init_waitqueue_head(&c->bucket_wait); | 1499 | init_waitqueue_head(&c->bucket_wait); |
1500 | sema_init(&c->uuid_write_mutex, 1); | 1500 | sema_init(&c->uuid_write_mutex, 1); |
1501 | 1501 | ||
1502 | spin_lock_init(&c->btree_gc_time.lock); | 1502 | spin_lock_init(&c->btree_gc_time.lock); |
1503 | spin_lock_init(&c->btree_split_time.lock); | 1503 | spin_lock_init(&c->btree_split_time.lock); |
1504 | spin_lock_init(&c->btree_read_time.lock); | 1504 | spin_lock_init(&c->btree_read_time.lock); |
1505 | spin_lock_init(&c->try_harder_time.lock); | ||
1506 | 1505 | ||
1507 | bch_moving_init_cache_set(c); | 1506 | bch_moving_init_cache_set(c); |
1508 | 1507 | ||
@@ -1591,7 +1590,7 @@ static void run_cache_set(struct cache_set *c) | |||
1591 | goto err; | 1590 | goto err; |
1592 | 1591 | ||
1593 | err = "error reading btree root"; | 1592 | err = "error reading btree root"; |
1594 | c->root = bch_btree_node_get(c, k, j->btree_level, true); | 1593 | c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true); |
1595 | if (IS_ERR_OR_NULL(c->root)) | 1594 | if (IS_ERR_OR_NULL(c->root)) |
1596 | goto err; | 1595 | goto err; |
1597 | 1596 | ||
@@ -1666,7 +1665,7 @@ static void run_cache_set(struct cache_set *c) | |||
1666 | goto err; | 1665 | goto err; |
1667 | 1666 | ||
1668 | err = "cannot allocate new btree root"; | 1667 | err = "cannot allocate new btree root"; |
1669 | c->root = bch_btree_node_alloc(c, 0, true); | 1668 | c->root = bch_btree_node_alloc(c, NULL, 0); |
1670 | if (IS_ERR_OR_NULL(c->root)) | 1669 | if (IS_ERR_OR_NULL(c->root)) |
1671 | goto err; | 1670 | goto err; |
1672 | 1671 | ||
@@ -1847,13 +1846,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) | |||
1847 | for_each_bucket(b, ca) | 1846 | for_each_bucket(b, ca) |
1848 | atomic_set(&b->pin, 0); | 1847 | atomic_set(&b->pin, 0); |
1849 | 1848 | ||
1850 | if (bch_cache_allocator_init(ca)) | ||
1851 | goto err; | ||
1852 | |||
1853 | return 0; | 1849 | return 0; |
1854 | err: | ||
1855 | kobject_put(&ca->kobj); | ||
1856 | return -ENOMEM; | ||
1857 | } | 1850 | } |
1858 | 1851 | ||
1859 | static void register_cache(struct cache_sb *sb, struct page *sb_page, | 1852 | static void register_cache(struct cache_sb *sb, struct page *sb_page, |
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 662b9484ed5e..89aaa2ef38f9 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
@@ -54,7 +54,6 @@ sysfs_time_stats_attribute(btree_gc, sec, ms); | |||
54 | sysfs_time_stats_attribute(btree_split, sec, us); | 54 | sysfs_time_stats_attribute(btree_split, sec, us); |
55 | sysfs_time_stats_attribute(btree_sort, ms, us); | 55 | sysfs_time_stats_attribute(btree_sort, ms, us); |
56 | sysfs_time_stats_attribute(btree_read, ms, us); | 56 | sysfs_time_stats_attribute(btree_read, ms, us); |
57 | sysfs_time_stats_attribute(try_harder, ms, us); | ||
58 | 57 | ||
59 | read_attribute(btree_nodes); | 58 | read_attribute(btree_nodes); |
60 | read_attribute(btree_used_percent); | 59 | read_attribute(btree_used_percent); |
@@ -534,7 +533,6 @@ lock_root: | |||
534 | sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); | 533 | sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us); |
535 | sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us); | 534 | sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us); |
536 | sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); | 535 | sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us); |
537 | sysfs_print_time_stats(&c->try_harder_time, try_harder, ms, us); | ||
538 | 536 | ||
539 | sysfs_print(btree_used_percent, btree_used(c)); | 537 | sysfs_print(btree_used_percent, btree_used(c)); |
540 | sysfs_print(btree_nodes, c->gc_stats.nodes); | 538 | sysfs_print(btree_nodes, c->gc_stats.nodes); |
@@ -709,7 +707,6 @@ static struct attribute *bch_cache_set_internal_files[] = { | |||
709 | sysfs_time_stats_attribute_list(btree_split, sec, us) | 707 | sysfs_time_stats_attribute_list(btree_split, sec, us) |
710 | sysfs_time_stats_attribute_list(btree_sort, ms, us) | 708 | sysfs_time_stats_attribute_list(btree_sort, ms, us) |
711 | sysfs_time_stats_attribute_list(btree_read, ms, us) | 709 | sysfs_time_stats_attribute_list(btree_read, ms, us) |
712 | sysfs_time_stats_attribute_list(try_harder, ms, us) | ||
713 | 710 | ||
714 | &sysfs_btree_nodes, | 711 | &sysfs_btree_nodes, |
715 | &sysfs_btree_used_percent, | 712 | &sysfs_btree_used_percent, |