diff options
author | Kent Overstreet <kmo@daterainc.com> | 2013-09-10 21:41:15 -0400 |
---|---|---|
committer | Kent Overstreet <kmo@daterainc.com> | 2013-11-11 00:55:57 -0500 |
commit | 26c949f8062cb9221a28b2228104f1cc5b265097 (patch) | |
tree | da302aae6cd3154241450e4934410ac43310bd9a | |
parent | d6fd3b11cea82346837957feab25b0be48aa424c (diff) |
bcache: Add btree_insert_node()
The flow of control in the old btree insertion code was rather -
backwards; we'd recurse down the btree (in btree_insert_recurse()), and
then if we needed to split the keys to be inserted into the parent node
would be effectively returned up to btree_insert_recurse(), which would
notice there was more work to do and finish the insertion.
The main problem with this was that the full logic for btree insertion
could only be used by calling btree_insert_recurse; if you'd gotten to a
btree leaf some other way and had a key to insert, if it turned out that
node needed to be split you were SOL.
This inverts the flow of control so btree_insert_node() does _full_
btree insertion, including splitting - and takes a (leaf) btree node to
insert into as a parameter.
This means we can now _correctly_ handle cache misses - for cache
misses, we need to insert a fake "check" key into the btree when we
discover we have a cache miss - while we still have the btree locked.
Previously, if the btree node was full inserting a cache miss would just
fail.
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
-rw-r--r-- | drivers/md/bcache/bset.c | 12 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 158 |
3 files changed, 105 insertions, 66 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 22d1ae72c282..830eede86d56 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c | |||
@@ -73,6 +73,18 @@ struct bkey *bch_keylist_pop(struct keylist *l) | |||
73 | return l->top = k; | 73 | return l->top = k; |
74 | } | 74 | } |
75 | 75 | ||
76 | void bch_keylist_pop_front(struct keylist *l) | ||
77 | { | ||
78 | struct bkey *next = bkey_next(l->bottom); | ||
79 | size_t bytes = ((void *) l->top) - ((void *) next); | ||
80 | |||
81 | memmove(l->bottom, | ||
82 | next, | ||
83 | bytes); | ||
84 | |||
85 | l->top = ((void *) l->bottom) + bytes; | ||
86 | } | ||
87 | |||
76 | /* Pointer validation */ | 88 | /* Pointer validation */ |
77 | 89 | ||
78 | bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) | 90 | bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) |
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index ae115a253d73..a3627d0cd88b 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h | |||
@@ -267,6 +267,7 @@ static inline void bch_keylist_free(struct keylist *l) | |||
267 | 267 | ||
268 | void bch_keylist_copy(struct keylist *, struct keylist *); | 268 | void bch_keylist_copy(struct keylist *, struct keylist *); |
269 | struct bkey *bch_keylist_pop(struct keylist *); | 269 | struct bkey *bch_keylist_pop(struct keylist *); |
270 | void bch_keylist_pop_front(struct keylist *); | ||
270 | int bch_keylist_realloc(struct keylist *, int, struct cache_set *); | 271 | int bch_keylist_realloc(struct keylist *, int, struct cache_set *); |
271 | 272 | ||
272 | void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, | 273 | void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 87299baa8a1b..c2722e075f18 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -1849,15 +1849,43 @@ merged: | |||
1849 | return true; | 1849 | return true; |
1850 | } | 1850 | } |
1851 | 1851 | ||
1852 | static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op) | 1852 | static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, |
1853 | struct keylist *insert_keys) | ||
1853 | { | 1854 | { |
1854 | bool ret = false; | 1855 | bool ret = false; |
1855 | struct bkey *k; | ||
1856 | unsigned oldsize = bch_count_data(b); | 1856 | unsigned oldsize = bch_count_data(b); |
1857 | 1857 | ||
1858 | while ((k = bch_keylist_pop(&op->keys))) { | 1858 | BUG_ON(!insert_lock(op, b)); |
1859 | bkey_put(b->c, k, b->level); | 1859 | |
1860 | ret |= btree_insert_key(b, op, k); | 1860 | while (!bch_keylist_empty(insert_keys)) { |
1861 | struct bkey *k = insert_keys->bottom; | ||
1862 | |||
1863 | if (b->level || | ||
1864 | bkey_cmp(k, &b->key) <= 0) { | ||
1865 | bkey_put(b->c, k, b->level); | ||
1866 | |||
1867 | ret |= btree_insert_key(b, op, k); | ||
1868 | bch_keylist_pop_front(insert_keys); | ||
1869 | } else if (bkey_cmp(&START_KEY(k), &b->key) < 0) { | ||
1870 | #if 0 | ||
1871 | if (op->type == BTREE_REPLACE) { | ||
1872 | bkey_put(b->c, k, b->level); | ||
1873 | bch_keylist_pop_front(insert_keys); | ||
1874 | op->insert_collision = true; | ||
1875 | break; | ||
1876 | } | ||
1877 | #endif | ||
1878 | BKEY_PADDED(key) temp; | ||
1879 | bkey_copy(&temp.key, insert_keys->bottom); | ||
1880 | |||
1881 | bch_cut_back(&b->key, &temp.key); | ||
1882 | bch_cut_front(&b->key, insert_keys->bottom); | ||
1883 | |||
1884 | ret |= btree_insert_key(b, op, &temp.key); | ||
1885 | break; | ||
1886 | } else { | ||
1887 | break; | ||
1888 | } | ||
1861 | } | 1889 | } |
1862 | 1890 | ||
1863 | BUG_ON(bch_count_data(b) < oldsize); | 1891 | BUG_ON(bch_count_data(b) < oldsize); |
@@ -1897,7 +1925,9 @@ out: | |||
1897 | return ret; | 1925 | return ret; |
1898 | } | 1926 | } |
1899 | 1927 | ||
1900 | static int btree_split(struct btree *b, struct btree_op *op) | 1928 | static int btree_split(struct btree *b, struct btree_op *op, |
1929 | struct keylist *insert_keys, | ||
1930 | struct keylist *parent_keys) | ||
1901 | { | 1931 | { |
1902 | bool split; | 1932 | bool split; |
1903 | struct btree *n1, *n2 = NULL, *n3 = NULL; | 1933 | struct btree *n1, *n2 = NULL, *n3 = NULL; |
@@ -1927,7 +1957,7 @@ static int btree_split(struct btree *b, struct btree_op *op) | |||
1927 | goto err_free2; | 1957 | goto err_free2; |
1928 | } | 1958 | } |
1929 | 1959 | ||
1930 | bch_btree_insert_keys(n1, op); | 1960 | bch_btree_insert_keys(n1, op, insert_keys); |
1931 | 1961 | ||
1932 | /* | 1962 | /* |
1933 | * Has to be a linear search because we don't have an auxiliary | 1963 | * Has to be a linear search because we don't have an auxiliary |
@@ -1949,23 +1979,23 @@ static int btree_split(struct btree *b, struct btree_op *op) | |||
1949 | 1979 | ||
1950 | bkey_copy_key(&n2->key, &b->key); | 1980 | bkey_copy_key(&n2->key, &b->key); |
1951 | 1981 | ||
1952 | bch_keylist_add(&op->keys, &n2->key); | 1982 | bch_keylist_add(parent_keys, &n2->key); |
1953 | bch_btree_node_write(n2, &op->cl); | 1983 | bch_btree_node_write(n2, &op->cl); |
1954 | rw_unlock(true, n2); | 1984 | rw_unlock(true, n2); |
1955 | } else { | 1985 | } else { |
1956 | trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); | 1986 | trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); |
1957 | 1987 | ||
1958 | bch_btree_insert_keys(n1, op); | 1988 | bch_btree_insert_keys(n1, op, insert_keys); |
1959 | } | 1989 | } |
1960 | 1990 | ||
1961 | bch_keylist_add(&op->keys, &n1->key); | 1991 | bch_keylist_add(parent_keys, &n1->key); |
1962 | bch_btree_node_write(n1, &op->cl); | 1992 | bch_btree_node_write(n1, &op->cl); |
1963 | 1993 | ||
1964 | if (n3) { | 1994 | if (n3) { |
1965 | /* Depth increases, make a new root */ | 1995 | /* Depth increases, make a new root */ |
1966 | 1996 | ||
1967 | bkey_copy_key(&n3->key, &MAX_KEY); | 1997 | bkey_copy_key(&n3->key, &MAX_KEY); |
1968 | bch_btree_insert_keys(n3, op); | 1998 | bch_btree_insert_keys(n3, op, parent_keys); |
1969 | bch_btree_node_write(n3, &op->cl); | 1999 | bch_btree_node_write(n3, &op->cl); |
1970 | 2000 | ||
1971 | closure_sync(&op->cl); | 2001 | closure_sync(&op->cl); |
@@ -1974,22 +2004,22 @@ static int btree_split(struct btree *b, struct btree_op *op) | |||
1974 | } else if (!b->parent) { | 2004 | } else if (!b->parent) { |
1975 | /* Root filled up but didn't need to be split */ | 2005 | /* Root filled up but didn't need to be split */ |
1976 | 2006 | ||
1977 | op->keys.top = op->keys.bottom; | 2007 | parent_keys->top = parent_keys->bottom; |
1978 | closure_sync(&op->cl); | 2008 | closure_sync(&op->cl); |
1979 | bch_btree_set_root(n1); | 2009 | bch_btree_set_root(n1); |
1980 | } else { | 2010 | } else { |
1981 | unsigned i; | 2011 | unsigned i; |
1982 | 2012 | ||
1983 | bkey_copy(op->keys.top, &b->key); | 2013 | bkey_copy(parent_keys->top, &b->key); |
1984 | bkey_copy_key(op->keys.top, &ZERO_KEY); | 2014 | bkey_copy_key(parent_keys->top, &ZERO_KEY); |
1985 | 2015 | ||
1986 | for (i = 0; i < KEY_PTRS(&b->key); i++) { | 2016 | for (i = 0; i < KEY_PTRS(&b->key); i++) { |
1987 | uint8_t g = PTR_BUCKET(b->c, &b->key, i)->gen + 1; | 2017 | uint8_t g = PTR_BUCKET(b->c, &b->key, i)->gen + 1; |
1988 | 2018 | ||
1989 | SET_PTR_GEN(op->keys.top, i, g); | 2019 | SET_PTR_GEN(parent_keys->top, i, g); |
1990 | } | 2020 | } |
1991 | 2021 | ||
1992 | bch_keylist_push(&op->keys); | 2022 | bch_keylist_push(parent_keys); |
1993 | closure_sync(&op->cl); | 2023 | closure_sync(&op->cl); |
1994 | atomic_inc(&b->c->prio_blocked); | 2024 | atomic_inc(&b->c->prio_blocked); |
1995 | } | 2025 | } |
@@ -2018,69 +2048,65 @@ err: | |||
2018 | return -ENOMEM; | 2048 | return -ENOMEM; |
2019 | } | 2049 | } |
2020 | 2050 | ||
2021 | static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op, | 2051 | static int bch_btree_insert_node(struct btree *b, struct btree_op *op, |
2022 | struct keylist *stack_keys) | 2052 | struct keylist *insert_keys) |
2023 | { | 2053 | { |
2024 | if (b->level) { | 2054 | int ret = 0; |
2025 | int ret; | 2055 | struct keylist split_keys; |
2026 | struct bkey *insert = op->keys.bottom; | ||
2027 | struct bkey *k = bch_next_recurse_key(b, &START_KEY(insert)); | ||
2028 | 2056 | ||
2029 | if (!k) { | 2057 | bch_keylist_init(&split_keys); |
2030 | btree_bug(b, "no key to recurse on at level %i/%i", | ||
2031 | b->level, b->c->root->level); | ||
2032 | 2058 | ||
2033 | op->keys.top = op->keys.bottom; | 2059 | BUG_ON(b->level); |
2034 | return -EIO; | ||
2035 | } | ||
2036 | 2060 | ||
2037 | if (bkey_cmp(insert, k) > 0) { | 2061 | do { |
2038 | unsigned i; | 2062 | if (should_split(b)) { |
2063 | if (current->bio_list) { | ||
2064 | op->lock = b->c->root->level + 1; | ||
2065 | ret = -EAGAIN; | ||
2066 | } else if (op->lock <= b->c->root->level) { | ||
2067 | op->lock = b->c->root->level + 1; | ||
2068 | ret = -EINTR; | ||
2069 | } else { | ||
2070 | struct btree *parent = b->parent; | ||
2039 | 2071 | ||
2040 | if (op->type == BTREE_REPLACE) { | 2072 | ret = btree_split(b, op, insert_keys, |
2041 | __bkey_put(b->c, insert); | 2073 | &split_keys); |
2042 | op->keys.top = op->keys.bottom; | 2074 | insert_keys = &split_keys; |
2043 | op->insert_collision = true; | 2075 | b = parent; |
2044 | return 0; | ||
2045 | } | 2076 | } |
2077 | } else { | ||
2078 | BUG_ON(write_block(b) != b->sets[b->nsets].data); | ||
2046 | 2079 | ||
2047 | for (i = 0; i < KEY_PTRS(insert); i++) | 2080 | if (bch_btree_insert_keys(b, op, insert_keys)) { |
2048 | atomic_inc(&PTR_BUCKET(b->c, insert, i)->pin); | 2081 | if (!b->level) |
2049 | 2082 | bch_btree_leaf_dirty(b, op); | |
2050 | bkey_copy(stack_keys->top, insert); | 2083 | else |
2051 | 2084 | bch_btree_node_write(b, &op->cl); | |
2052 | bch_cut_back(k, insert); | 2085 | } |
2053 | bch_cut_front(k, stack_keys->top); | ||
2054 | |||
2055 | bch_keylist_push(stack_keys); | ||
2056 | } | 2086 | } |
2087 | } while (!bch_keylist_empty(&split_keys)); | ||
2057 | 2088 | ||
2058 | ret = btree(insert_recurse, k, b, op, stack_keys); | 2089 | return ret; |
2059 | if (ret) | 2090 | } |
2060 | return ret; | ||
2061 | } | ||
2062 | 2091 | ||
2063 | if (!bch_keylist_empty(&op->keys)) { | 2092 | static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op) |
2064 | if (should_split(b)) { | 2093 | { |
2065 | if (op->lock <= b->c->root->level) { | 2094 | if (b->level) { |
2066 | BUG_ON(b->level); | 2095 | struct bkey *insert = op->keys.bottom; |
2067 | op->lock = b->c->root->level + 1; | 2096 | struct bkey *k = bch_next_recurse_key(b, &START_KEY(insert)); |
2068 | return -EINTR; | ||
2069 | } | ||
2070 | return btree_split(b, op); | ||
2071 | } | ||
2072 | 2097 | ||
2073 | BUG_ON(write_block(b) != b->sets[b->nsets].data); | 2098 | if (!k) { |
2099 | btree_bug(b, "no key to recurse on at level %i/%i", | ||
2100 | b->level, b->c->root->level); | ||
2074 | 2101 | ||
2075 | if (bch_btree_insert_keys(b, op)) { | 2102 | op->keys.top = op->keys.bottom; |
2076 | if (!b->level) | 2103 | return -EIO; |
2077 | bch_btree_leaf_dirty(b, op); | ||
2078 | else | ||
2079 | bch_btree_node_write(b, &op->cl); | ||
2080 | } | 2104 | } |
2081 | } | ||
2082 | 2105 | ||
2083 | return 0; | 2106 | return btree(insert_recurse, k, b, op); |
2107 | } else { | ||
2108 | return bch_btree_insert_node(b, op, &op->keys); | ||
2109 | } | ||
2084 | } | 2110 | } |
2085 | 2111 | ||
2086 | int bch_btree_insert(struct btree_op *op, struct cache_set *c) | 2112 | int bch_btree_insert(struct btree_op *op, struct cache_set *c) |
@@ -2106,7 +2132,7 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c) | |||
2106 | op->lock = 0; | 2132 | op->lock = 0; |
2107 | } | 2133 | } |
2108 | 2134 | ||
2109 | ret = btree_root(insert_recurse, c, op, &stack_keys); | 2135 | ret = btree_root(insert_recurse, c, op); |
2110 | 2136 | ||
2111 | if (ret == -EAGAIN) { | 2137 | if (ret == -EAGAIN) { |
2112 | ret = 0; | 2138 | ret = 0; |