aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2013-09-10 21:41:15 -0400
committerKent Overstreet <kmo@daterainc.com>2013-11-11 00:55:57 -0500
commit26c949f8062cb9221a28b2228104f1cc5b265097 (patch)
treeda302aae6cd3154241450e4934410ac43310bd9a
parentd6fd3b11cea82346837957feab25b0be48aa424c (diff)
bcache: Add btree_insert_node()
The flow of control in the old btree insertion code was rather - backwards; we'd recurse down the btree (in btree_insert_recurse()), and then if we needed to split the keys to be inserted into the parent node would be effectively returned up to btree_insert_recurse(), which would notice there was more work to do and finish the insertion. The main problem with this was that the full logic for btree insertion could only be used by calling btree_insert_recurse; if you'd gotten to a btree leaf some other way and had a key to insert, if it turned out that node needed to be split you were SOL. This inverts the flow of control so btree_insert_node() does _full_ btree insertion, including splitting - and takes a (leaf) btree node to insert into as a parameter. This means we can now _correctly_ handle cache misses - for cache misses, we need to insert a fake "check" key into the btree when we discover we have a cache miss - while we still have the btree locked. Previously, if the btree node was full inserting a cache miss would just fail. Signed-off-by: Kent Overstreet <kmo@daterainc.com>
-rw-r--r--drivers/md/bcache/bset.c12
-rw-r--r--drivers/md/bcache/bset.h1
-rw-r--r--drivers/md/bcache/btree.c158
3 files changed, 105 insertions, 66 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 22d1ae72c282..830eede86d56 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -73,6 +73,18 @@ struct bkey *bch_keylist_pop(struct keylist *l)
73 return l->top = k; 73 return l->top = k;
74} 74}
75 75
76void bch_keylist_pop_front(struct keylist *l)
77{
78 struct bkey *next = bkey_next(l->bottom);
79 size_t bytes = ((void *) l->top) - ((void *) next);
80
81 memmove(l->bottom,
82 next,
83 bytes);
84
85 l->top = ((void *) l->bottom) + bytes;
86}
87
76/* Pointer validation */ 88/* Pointer validation */
77 89
78bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) 90bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index ae115a253d73..a3627d0cd88b 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -267,6 +267,7 @@ static inline void bch_keylist_free(struct keylist *l)
267 267
268void bch_keylist_copy(struct keylist *, struct keylist *); 268void bch_keylist_copy(struct keylist *, struct keylist *);
269struct bkey *bch_keylist_pop(struct keylist *); 269struct bkey *bch_keylist_pop(struct keylist *);
270void bch_keylist_pop_front(struct keylist *);
270int bch_keylist_realloc(struct keylist *, int, struct cache_set *); 271int bch_keylist_realloc(struct keylist *, int, struct cache_set *);
271 272
272void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, 273void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 87299baa8a1b..c2722e075f18 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1849,15 +1849,43 @@ merged:
1849 return true; 1849 return true;
1850} 1850}
1851 1851
1852static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) 1852static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op,
1853 struct keylist *insert_keys)
1853{ 1854{
1854 bool ret = false; 1855 bool ret = false;
1855 struct bkey *k;
1856 unsigned oldsize = bch_count_data(b); 1856 unsigned oldsize = bch_count_data(b);
1857 1857
1858 while ((k = bch_keylist_pop(&op->keys))) { 1858 BUG_ON(!insert_lock(op, b));
1859 bkey_put(b->c, k, b->level); 1859
1860 ret |= btree_insert_key(b, op, k); 1860 while (!bch_keylist_empty(insert_keys)) {
1861 struct bkey *k = insert_keys->bottom;
1862
1863 if (b->level ||
1864 bkey_cmp(k, &b->key) <= 0) {
1865 bkey_put(b->c, k, b->level);
1866
1867 ret |= btree_insert_key(b, op, k);
1868 bch_keylist_pop_front(insert_keys);
1869 } else if (bkey_cmp(&START_KEY(k), &b->key) < 0) {
1870#if 0
1871 if (op->type == BTREE_REPLACE) {
1872 bkey_put(b->c, k, b->level);
1873 bch_keylist_pop_front(insert_keys);
1874 op->insert_collision = true;
1875 break;
1876 }
1877#endif
1878 BKEY_PADDED(key) temp;
1879 bkey_copy(&temp.key, insert_keys->bottom);
1880
1881 bch_cut_back(&b->key, &temp.key);
1882 bch_cut_front(&b->key, insert_keys->bottom);
1883
1884 ret |= btree_insert_key(b, op, &temp.key);
1885 break;
1886 } else {
1887 break;
1888 }
1861 } 1889 }
1862 1890
1863 BUG_ON(bch_count_data(b) < oldsize); 1891 BUG_ON(bch_count_data(b) < oldsize);
@@ -1897,7 +1925,9 @@ out:
1897 return ret; 1925 return ret;
1898} 1926}
1899 1927
1900static int btree_split(struct btree *b, struct btree_op *op) 1928static int btree_split(struct btree *b, struct btree_op *op,
1929 struct keylist *insert_keys,
1930 struct keylist *parent_keys)
1901{ 1931{
1902 bool split; 1932 bool split;
1903 struct btree *n1, *n2 = NULL, *n3 = NULL; 1933 struct btree *n1, *n2 = NULL, *n3 = NULL;
@@ -1927,7 +1957,7 @@ static int btree_split(struct btree *b, struct btree_op *op)
1927 goto err_free2; 1957 goto err_free2;
1928 } 1958 }
1929 1959
1930 bch_btree_insert_keys(n1, op); 1960 bch_btree_insert_keys(n1, op, insert_keys);
1931 1961
1932 /* 1962 /*
1933 * Has to be a linear search because we don't have an auxiliary 1963 * Has to be a linear search because we don't have an auxiliary
@@ -1949,23 +1979,23 @@ static int btree_split(struct btree *b, struct btree_op *op)
1949 1979
1950 bkey_copy_key(&n2->key, &b->key); 1980 bkey_copy_key(&n2->key, &b->key);
1951 1981
1952 bch_keylist_add(&op->keys, &n2->key); 1982 bch_keylist_add(parent_keys, &n2->key);
1953 bch_btree_node_write(n2, &op->cl); 1983 bch_btree_node_write(n2, &op->cl);
1954 rw_unlock(true, n2); 1984 rw_unlock(true, n2);
1955 } else { 1985 } else {
1956 trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); 1986 trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
1957 1987
1958 bch_btree_insert_keys(n1, op); 1988 bch_btree_insert_keys(n1, op, insert_keys);
1959 } 1989 }
1960 1990
1961 bch_keylist_add(&op->keys, &n1->key); 1991 bch_keylist_add(parent_keys, &n1->key);
1962 bch_btree_node_write(n1, &op->cl); 1992 bch_btree_node_write(n1, &op->cl);
1963 1993
1964 if (n3) { 1994 if (n3) {
1965 /* Depth increases, make a new root */ 1995 /* Depth increases, make a new root */
1966 1996
1967 bkey_copy_key(&n3->key, &MAX_KEY); 1997 bkey_copy_key(&n3->key, &MAX_KEY);
1968 bch_btree_insert_keys(n3, op); 1998 bch_btree_insert_keys(n3, op, parent_keys);
1969 bch_btree_node_write(n3, &op->cl); 1999 bch_btree_node_write(n3, &op->cl);
1970 2000
1971 closure_sync(&op->cl); 2001 closure_sync(&op->cl);
@@ -1974,22 +2004,22 @@ static int btree_split(struct btree *b, struct btree_op *op)
1974 } else if (!b->parent) { 2004 } else if (!b->parent) {
1975 /* Root filled up but didn't need to be split */ 2005 /* Root filled up but didn't need to be split */
1976 2006
1977 op->keys.top = op->keys.bottom; 2007 parent_keys->top = parent_keys->bottom;
1978 closure_sync(&op->cl); 2008 closure_sync(&op->cl);
1979 bch_btree_set_root(n1); 2009 bch_btree_set_root(n1);
1980 } else { 2010 } else {
1981 unsigned i; 2011 unsigned i;
1982 2012
1983 bkey_copy(op->keys.top, &b->key); 2013 bkey_copy(parent_keys->top, &b->key);
1984 bkey_copy_key(op->keys.top, &ZERO_KEY); 2014 bkey_copy_key(parent_keys->top, &ZERO_KEY);
1985 2015
1986 for (i = 0; i < KEY_PTRS(&b->key); i++) { 2016 for (i = 0; i < KEY_PTRS(&b->key); i++) {
1987 uint8_t g = PTR_BUCKET(b->c, &b->key, i)->gen + 1; 2017 uint8_t g = PTR_BUCKET(b->c, &b->key, i)->gen + 1;
1988 2018
1989 SET_PTR_GEN(op->keys.top, i, g); 2019 SET_PTR_GEN(parent_keys->top, i, g);
1990 } 2020 }
1991 2021
1992 bch_keylist_push(&op->keys); 2022 bch_keylist_push(parent_keys);
1993 closure_sync(&op->cl); 2023 closure_sync(&op->cl);
1994 atomic_inc(&b->c->prio_blocked); 2024 atomic_inc(&b->c->prio_blocked);
1995 } 2025 }
@@ -2018,69 +2048,65 @@ err:
2018 return -ENOMEM; 2048 return -ENOMEM;
2019} 2049}
2020 2050
2021static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op, 2051static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
2022 struct keylist *stack_keys) 2052 struct keylist *insert_keys)
2023{ 2053{
2024 if (b->level) { 2054 int ret = 0;
2025 int ret; 2055 struct keylist split_keys;
2026 struct bkey *insert = op->keys.bottom;
2027 struct bkey *k = bch_next_recurse_key(b, &START_KEY(insert));
2028 2056
2029 if (!k) { 2057 bch_keylist_init(&split_keys);
2030 btree_bug(b, "no key to recurse on at level %i/%i",
2031 b->level, b->c->root->level);
2032 2058
2033 op->keys.top = op->keys.bottom; 2059 BUG_ON(b->level);
2034 return -EIO;
2035 }
2036 2060
2037 if (bkey_cmp(insert, k) > 0) { 2061 do {
2038 unsigned i; 2062 if (should_split(b)) {
2063 if (current->bio_list) {
2064 op->lock = b->c->root->level + 1;
2065 ret = -EAGAIN;
2066 } else if (op->lock <= b->c->root->level) {
2067 op->lock = b->c->root->level + 1;
2068 ret = -EINTR;
2069 } else {
2070 struct btree *parent = b->parent;
2039 2071
2040 if (op->type == BTREE_REPLACE) { 2072 ret = btree_split(b, op, insert_keys,
2041 __bkey_put(b->c, insert); 2073 &split_keys);
2042 op->keys.top = op->keys.bottom; 2074 insert_keys = &split_keys;
2043 op->insert_collision = true; 2075 b = parent;
2044 return 0;
2045 } 2076 }
2077 } else {
2078 BUG_ON(write_block(b) != b->sets[b->nsets].data);
2046 2079
2047 for (i = 0; i < KEY_PTRS(insert); i++) 2080 if (bch_btree_insert_keys(b, op, insert_keys)) {
2048 atomic_inc(&PTR_BUCKET(b->c, insert, i)->pin); 2081 if (!b->level)
2049 2082 bch_btree_leaf_dirty(b, op);
2050 bkey_copy(stack_keys->top, insert); 2083 else
2051 2084 bch_btree_node_write(b, &op->cl);
2052 bch_cut_back(k, insert); 2085 }
2053 bch_cut_front(k, stack_keys->top);
2054
2055 bch_keylist_push(stack_keys);
2056 } 2086 }
2087 } while (!bch_keylist_empty(&split_keys));
2057 2088
2058 ret = btree(insert_recurse, k, b, op, stack_keys); 2089 return ret;
2059 if (ret) 2090}
2060 return ret;
2061 }
2062 2091
2063 if (!bch_keylist_empty(&op->keys)) { 2092static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op)
2064 if (should_split(b)) { 2093{
2065 if (op->lock <= b->c->root->level) { 2094 if (b->level) {
2066 BUG_ON(b->level); 2095 struct bkey *insert = op->keys.bottom;
2067 op->lock = b->c->root->level + 1; 2096 struct bkey *k = bch_next_recurse_key(b, &START_KEY(insert));
2068 return -EINTR;
2069 }
2070 return btree_split(b, op);
2071 }
2072 2097
2073 BUG_ON(write_block(b) != b->sets[b->nsets].data); 2098 if (!k) {
2099 btree_bug(b, "no key to recurse on at level %i/%i",
2100 b->level, b->c->root->level);
2074 2101
2075 if (bch_btree_insert_keys(b, op)) { 2102 op->keys.top = op->keys.bottom;
2076 if (!b->level) 2103 return -EIO;
2077 bch_btree_leaf_dirty(b, op);
2078 else
2079 bch_btree_node_write(b, &op->cl);
2080 } 2104 }
2081 }
2082 2105
2083 return 0; 2106 return btree(insert_recurse, k, b, op);
2107 } else {
2108 return bch_btree_insert_node(b, op, &op->keys);
2109 }
2084} 2110}
2085 2111
2086int bch_btree_insert(struct btree_op *op, struct cache_set *c) 2112int bch_btree_insert(struct btree_op *op, struct cache_set *c)
@@ -2106,7 +2132,7 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c)
2106 op->lock = 0; 2132 op->lock = 0;
2107 } 2133 }
2108 2134
2109 ret = btree_root(insert_recurse, c, op, &stack_keys); 2135 ret = btree_root(insert_recurse, c, op);
2110 2136
2111 if (ret == -EAGAIN) { 2137 if (ret == -EAGAIN) {
2112 ret = 0; 2138 ret = 0;