diff options
| -rw-r--r-- | net/netfilter/nft_hash.c | 291 |
1 files changed, 55 insertions, 236 deletions
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 4080ed6a072b..28fb8f38e6ba 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c | |||
| @@ -15,209 +15,40 @@ | |||
| 15 | #include <linux/log2.h> | 15 | #include <linux/log2.h> |
| 16 | #include <linux/jhash.h> | 16 | #include <linux/jhash.h> |
| 17 | #include <linux/netlink.h> | 17 | #include <linux/netlink.h> |
| 18 | #include <linux/vmalloc.h> | 18 | #include <linux/rhashtable.h> |
| 19 | #include <linux/netfilter.h> | 19 | #include <linux/netfilter.h> |
| 20 | #include <linux/netfilter/nf_tables.h> | 20 | #include <linux/netfilter/nf_tables.h> |
| 21 | #include <net/netfilter/nf_tables.h> | 21 | #include <net/netfilter/nf_tables.h> |
| 22 | 22 | ||
| 23 | #define NFT_HASH_MIN_SIZE 4UL | 23 | /* We target a hash table size of 4, element hint is 75% of final size */ |
| 24 | 24 | #define NFT_HASH_ELEMENT_HINT 3 | |
| 25 | struct nft_hash { | ||
| 26 | struct nft_hash_table __rcu *tbl; | ||
| 27 | }; | ||
| 28 | |||
| 29 | struct nft_hash_table { | ||
| 30 | unsigned int size; | ||
| 31 | struct nft_hash_elem __rcu *buckets[]; | ||
| 32 | }; | ||
| 33 | 25 | ||
| 34 | struct nft_hash_elem { | 26 | struct nft_hash_elem { |
| 35 | struct nft_hash_elem __rcu *next; | 27 | struct rhash_head node; |
| 36 | struct nft_data key; | 28 | struct nft_data key; |
| 37 | struct nft_data data[]; | 29 | struct nft_data data[]; |
| 38 | }; | 30 | }; |
| 39 | 31 | ||
| 40 | #define nft_hash_for_each_entry(i, head) \ | ||
| 41 | for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next)) | ||
| 42 | #define nft_hash_for_each_entry_rcu(i, head) \ | ||
| 43 | for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next)) | ||
| 44 | |||
| 45 | static u32 nft_hash_rnd __read_mostly; | ||
| 46 | static bool nft_hash_rnd_initted __read_mostly; | ||
| 47 | |||
| 48 | static unsigned int nft_hash_data(const struct nft_data *data, | ||
| 49 | unsigned int hsize, unsigned int len) | ||
| 50 | { | ||
| 51 | unsigned int h; | ||
| 52 | |||
| 53 | h = jhash(data->data, len, nft_hash_rnd); | ||
| 54 | return h & (hsize - 1); | ||
| 55 | } | ||
| 56 | |||
| 57 | static bool nft_hash_lookup(const struct nft_set *set, | 32 | static bool nft_hash_lookup(const struct nft_set *set, |
| 58 | const struct nft_data *key, | 33 | const struct nft_data *key, |
| 59 | struct nft_data *data) | 34 | struct nft_data *data) |
| 60 | { | 35 | { |
| 61 | const struct nft_hash *priv = nft_set_priv(set); | 36 | const struct rhashtable *priv = nft_set_priv(set); |
| 62 | const struct nft_hash_table *tbl = rcu_dereference(priv->tbl); | ||
| 63 | const struct nft_hash_elem *he; | 37 | const struct nft_hash_elem *he; |
| 64 | unsigned int h; | ||
| 65 | |||
| 66 | h = nft_hash_data(key, tbl->size, set->klen); | ||
| 67 | nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) { | ||
| 68 | if (nft_data_cmp(&he->key, key, set->klen)) | ||
| 69 | continue; | ||
| 70 | if (set->flags & NFT_SET_MAP) | ||
| 71 | nft_data_copy(data, he->data); | ||
| 72 | return true; | ||
| 73 | } | ||
| 74 | return false; | ||
| 75 | } | ||
| 76 | |||
| 77 | static void nft_hash_tbl_free(const struct nft_hash_table *tbl) | ||
| 78 | { | ||
| 79 | kvfree(tbl); | ||
| 80 | } | ||
| 81 | |||
| 82 | static unsigned int nft_hash_tbl_size(unsigned int nelem) | ||
| 83 | { | ||
| 84 | return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE); | ||
| 85 | } | ||
| 86 | |||
| 87 | static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) | ||
| 88 | { | ||
| 89 | struct nft_hash_table *tbl; | ||
| 90 | size_t size; | ||
| 91 | |||
| 92 | size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); | ||
| 93 | tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN); | ||
| 94 | if (tbl == NULL) | ||
| 95 | tbl = vzalloc(size); | ||
| 96 | if (tbl == NULL) | ||
| 97 | return NULL; | ||
| 98 | tbl->size = nbuckets; | ||
| 99 | |||
| 100 | return tbl; | ||
| 101 | } | ||
| 102 | |||
| 103 | static void nft_hash_chain_unzip(const struct nft_set *set, | ||
| 104 | const struct nft_hash_table *ntbl, | ||
| 105 | struct nft_hash_table *tbl, unsigned int n) | ||
| 106 | { | ||
| 107 | struct nft_hash_elem *he, *last, *next; | ||
| 108 | unsigned int h; | ||
| 109 | |||
| 110 | he = nft_dereference(tbl->buckets[n]); | ||
| 111 | if (he == NULL) | ||
| 112 | return; | ||
| 113 | h = nft_hash_data(&he->key, ntbl->size, set->klen); | ||
| 114 | |||
| 115 | /* Find last element of first chain hashing to bucket h */ | ||
| 116 | last = he; | ||
| 117 | nft_hash_for_each_entry(he, he->next) { | ||
| 118 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) | ||
| 119 | break; | ||
| 120 | last = he; | ||
| 121 | } | ||
| 122 | |||
| 123 | /* Unlink first chain from the old table */ | ||
| 124 | RCU_INIT_POINTER(tbl->buckets[n], last->next); | ||
| 125 | 38 | ||
| 126 | /* If end of chain reached, done */ | 39 | he = rhashtable_lookup(priv, key); |
| 127 | if (he == NULL) | 40 | if (he && set->flags & NFT_SET_MAP) |
| 128 | return; | 41 | nft_data_copy(data, he->data); |
| 129 | 42 | ||
| 130 | /* Find first element of second chain hashing to bucket h */ | 43 | return !!he; |
| 131 | next = NULL; | ||
| 132 | nft_hash_for_each_entry(he, he->next) { | ||
| 133 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != h) | ||
| 134 | continue; | ||
| 135 | next = he; | ||
| 136 | break; | ||
| 137 | } | ||
| 138 | |||
| 139 | /* Link the two chains */ | ||
| 140 | RCU_INIT_POINTER(last->next, next); | ||
| 141 | } | ||
| 142 | |||
| 143 | static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv) | ||
| 144 | { | ||
| 145 | struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; | ||
| 146 | struct nft_hash_elem *he; | ||
| 147 | unsigned int i, h; | ||
| 148 | bool complete; | ||
| 149 | |||
| 150 | ntbl = nft_hash_tbl_alloc(tbl->size * 2); | ||
| 151 | if (ntbl == NULL) | ||
| 152 | return -ENOMEM; | ||
| 153 | |||
| 154 | /* Link new table's buckets to first element in the old table | ||
| 155 | * hashing to the new bucket. | ||
| 156 | */ | ||
| 157 | for (i = 0; i < ntbl->size; i++) { | ||
| 158 | h = i < tbl->size ? i : i - tbl->size; | ||
| 159 | nft_hash_for_each_entry(he, tbl->buckets[h]) { | ||
| 160 | if (nft_hash_data(&he->key, ntbl->size, set->klen) != i) | ||
| 161 | continue; | ||
| 162 | RCU_INIT_POINTER(ntbl->buckets[i], he); | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | } | ||
| 166 | |||
| 167 | /* Publish new table */ | ||
| 168 | rcu_assign_pointer(priv->tbl, ntbl); | ||
| 169 | |||
| 170 | /* Unzip interleaved hash chains */ | ||
| 171 | do { | ||
| 172 | /* Wait for readers to use new table/unzipped chains */ | ||
| 173 | synchronize_rcu(); | ||
| 174 | |||
| 175 | complete = true; | ||
| 176 | for (i = 0; i < tbl->size; i++) { | ||
| 177 | nft_hash_chain_unzip(set, ntbl, tbl, i); | ||
| 178 | if (tbl->buckets[i] != NULL) | ||
| 179 | complete = false; | ||
| 180 | } | ||
| 181 | } while (!complete); | ||
| 182 | |||
| 183 | nft_hash_tbl_free(tbl); | ||
| 184 | return 0; | ||
| 185 | } | ||
| 186 | |||
| 187 | static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv) | ||
| 188 | { | ||
| 189 | struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl; | ||
| 190 | struct nft_hash_elem __rcu **pprev; | ||
| 191 | unsigned int i; | ||
| 192 | |||
| 193 | ntbl = nft_hash_tbl_alloc(tbl->size / 2); | ||
| 194 | if (ntbl == NULL) | ||
| 195 | return -ENOMEM; | ||
| 196 | |||
| 197 | for (i = 0; i < ntbl->size; i++) { | ||
| 198 | ntbl->buckets[i] = tbl->buckets[i]; | ||
| 199 | |||
| 200 | for (pprev = &ntbl->buckets[i]; *pprev != NULL; | ||
| 201 | pprev = &nft_dereference(*pprev)->next) | ||
| 202 | ; | ||
| 203 | RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); | ||
| 204 | } | ||
| 205 | |||
| 206 | /* Publish new table */ | ||
| 207 | rcu_assign_pointer(priv->tbl, ntbl); | ||
| 208 | synchronize_rcu(); | ||
| 209 | |||
| 210 | nft_hash_tbl_free(tbl); | ||
| 211 | return 0; | ||
| 212 | } | 44 | } |
| 213 | 45 | ||
| 214 | static int nft_hash_insert(const struct nft_set *set, | 46 | static int nft_hash_insert(const struct nft_set *set, |
| 215 | const struct nft_set_elem *elem) | 47 | const struct nft_set_elem *elem) |
| 216 | { | 48 | { |
| 217 | struct nft_hash *priv = nft_set_priv(set); | 49 | struct rhashtable *priv = nft_set_priv(set); |
| 218 | struct nft_hash_table *tbl = nft_dereference(priv->tbl); | ||
| 219 | struct nft_hash_elem *he; | 50 | struct nft_hash_elem *he; |
| 220 | unsigned int size, h; | 51 | unsigned int size; |
| 221 | 52 | ||
| 222 | if (elem->flags != 0) | 53 | if (elem->flags != 0) |
| 223 | return -EINVAL; | 54 | return -EINVAL; |
| @@ -234,13 +65,7 @@ static int nft_hash_insert(const struct nft_set *set, | |||
| 234 | if (set->flags & NFT_SET_MAP) | 65 | if (set->flags & NFT_SET_MAP) |
| 235 | nft_data_copy(he->data, &elem->data); | 66 | nft_data_copy(he->data, &elem->data); |
| 236 | 67 | ||
| 237 | h = nft_hash_data(&he->key, tbl->size, set->klen); | 68 | rhashtable_insert(priv, &he->node, GFP_KERNEL); |
| 238 | RCU_INIT_POINTER(he->next, tbl->buckets[h]); | ||
| 239 | rcu_assign_pointer(tbl->buckets[h], he); | ||
| 240 | |||
| 241 | /* Expand table when exceeding 75% load */ | ||
| 242 | if (set->nelems + 1 > tbl->size / 4 * 3) | ||
| 243 | nft_hash_tbl_expand(set, priv); | ||
| 244 | 69 | ||
| 245 | return 0; | 70 | return 0; |
| 246 | } | 71 | } |
| @@ -257,36 +82,31 @@ static void nft_hash_elem_destroy(const struct nft_set *set, | |||
| 257 | static void nft_hash_remove(const struct nft_set *set, | 82 | static void nft_hash_remove(const struct nft_set *set, |
| 258 | const struct nft_set_elem *elem) | 83 | const struct nft_set_elem *elem) |
| 259 | { | 84 | { |
| 260 | struct nft_hash *priv = nft_set_priv(set); | 85 | struct rhashtable *priv = nft_set_priv(set); |
| 261 | struct nft_hash_table *tbl = nft_dereference(priv->tbl); | 86 | struct rhash_head *he, __rcu **pprev; |
| 262 | struct nft_hash_elem *he, __rcu **pprev; | ||
| 263 | 87 | ||
| 264 | pprev = elem->cookie; | 88 | pprev = elem->cookie; |
| 265 | he = nft_dereference((*pprev)); | 89 | he = rht_dereference((*pprev), priv); |
| 90 | |||
| 91 | rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL); | ||
| 266 | 92 | ||
| 267 | RCU_INIT_POINTER(*pprev, he->next); | ||
| 268 | synchronize_rcu(); | 93 | synchronize_rcu(); |
| 269 | kfree(he); | 94 | kfree(he); |
| 270 | |||
| 271 | /* Shrink table beneath 30% load */ | ||
| 272 | if (set->nelems - 1 < tbl->size * 3 / 10 && | ||
| 273 | tbl->size > NFT_HASH_MIN_SIZE) | ||
| 274 | nft_hash_tbl_shrink(set, priv); | ||
| 275 | } | 95 | } |
| 276 | 96 | ||
| 277 | static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) | 97 | static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) |
| 278 | { | 98 | { |
| 279 | const struct nft_hash *priv = nft_set_priv(set); | 99 | const struct rhashtable *priv = nft_set_priv(set); |
| 280 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); | 100 | const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv); |
| 281 | struct nft_hash_elem __rcu * const *pprev; | 101 | struct rhash_head __rcu * const *pprev; |
| 282 | struct nft_hash_elem *he; | 102 | struct nft_hash_elem *he; |
| 283 | unsigned int h; | 103 | u32 h; |
| 284 | 104 | ||
| 285 | h = nft_hash_data(&elem->key, tbl->size, set->klen); | 105 | h = rhashtable_hashfn(priv, &elem->key, set->klen); |
| 286 | pprev = &tbl->buckets[h]; | 106 | pprev = &tbl->buckets[h]; |
| 287 | nft_hash_for_each_entry(he, tbl->buckets[h]) { | 107 | rht_for_each_entry_rcu(he, tbl->buckets[h], node) { |
| 288 | if (nft_data_cmp(&he->key, &elem->key, set->klen)) { | 108 | if (nft_data_cmp(&he->key, &elem->key, set->klen)) { |
| 289 | pprev = &he->next; | 109 | pprev = &he->node.next; |
| 290 | continue; | 110 | continue; |
| 291 | } | 111 | } |
| 292 | 112 | ||
| @@ -302,14 +122,15 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) | |||
| 302 | static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, | 122 | static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, |
| 303 | struct nft_set_iter *iter) | 123 | struct nft_set_iter *iter) |
| 304 | { | 124 | { |
| 305 | const struct nft_hash *priv = nft_set_priv(set); | 125 | const struct rhashtable *priv = nft_set_priv(set); |
| 306 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); | 126 | const struct bucket_table *tbl; |
| 307 | const struct nft_hash_elem *he; | 127 | const struct nft_hash_elem *he; |
| 308 | struct nft_set_elem elem; | 128 | struct nft_set_elem elem; |
| 309 | unsigned int i; | 129 | unsigned int i; |
| 310 | 130 | ||
| 131 | tbl = rht_dereference_rcu(priv->tbl, priv); | ||
| 311 | for (i = 0; i < tbl->size; i++) { | 132 | for (i = 0; i < tbl->size; i++) { |
| 312 | nft_hash_for_each_entry(he, tbl->buckets[i]) { | 133 | rht_for_each_entry_rcu(he, tbl->buckets[i], node) { |
| 313 | if (iter->count < iter->skip) | 134 | if (iter->count < iter->skip) |
| 314 | goto cont; | 135 | goto cont; |
| 315 | 136 | ||
| @@ -329,48 +150,46 @@ cont: | |||
| 329 | 150 | ||
| 330 | static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) | 151 | static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) |
| 331 | { | 152 | { |
| 332 | return sizeof(struct nft_hash); | 153 | return sizeof(struct rhashtable); |
| 154 | } | ||
| 155 | |||
| 156 | static int lockdep_nfnl_lock_is_held(void) | ||
| 157 | { | ||
| 158 | return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES); | ||
| 333 | } | 159 | } |
| 334 | 160 | ||
| 335 | static int nft_hash_init(const struct nft_set *set, | 161 | static int nft_hash_init(const struct nft_set *set, |
| 336 | const struct nft_set_desc *desc, | 162 | const struct nft_set_desc *desc, |
| 337 | const struct nlattr * const tb[]) | 163 | const struct nlattr * const tb[]) |
| 338 | { | 164 | { |
| 339 | struct nft_hash *priv = nft_set_priv(set); | 165 | struct rhashtable *priv = nft_set_priv(set); |
| 340 | struct nft_hash_table *tbl; | 166 | struct rhashtable_params params = { |
| 341 | unsigned int size; | 167 | .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT, |
| 168 | .head_offset = offsetof(struct nft_hash_elem, node), | ||
| 169 | .key_offset = offsetof(struct nft_hash_elem, key), | ||
| 170 | .key_len = set->klen, | ||
| 171 | .hashfn = jhash, | ||
| 172 | .grow_decision = rht_grow_above_75, | ||
| 173 | .shrink_decision = rht_shrink_below_30, | ||
| 174 | .mutex_is_held = lockdep_nfnl_lock_is_held, | ||
| 175 | }; | ||
| 342 | 176 | ||
| 343 | if (unlikely(!nft_hash_rnd_initted)) { | 177 | return rhashtable_init(priv, ¶ms); |
| 344 | get_random_bytes(&nft_hash_rnd, 4); | ||
| 345 | nft_hash_rnd_initted = true; | ||
| 346 | } | ||
| 347 | |||
| 348 | size = NFT_HASH_MIN_SIZE; | ||
| 349 | if (desc->size) | ||
| 350 | size = nft_hash_tbl_size(desc->size); | ||
| 351 | |||
| 352 | tbl = nft_hash_tbl_alloc(size); | ||
| 353 | if (tbl == NULL) | ||
| 354 | return -ENOMEM; | ||
| 355 | RCU_INIT_POINTER(priv->tbl, tbl); | ||
| 356 | return 0; | ||
| 357 | } | 178 | } |
| 358 | 179 | ||
| 359 | static void nft_hash_destroy(const struct nft_set *set) | 180 | static void nft_hash_destroy(const struct nft_set *set) |
| 360 | { | 181 | { |
| 361 | const struct nft_hash *priv = nft_set_priv(set); | 182 | const struct rhashtable *priv = nft_set_priv(set); |
| 362 | const struct nft_hash_table *tbl = nft_dereference(priv->tbl); | 183 | const struct bucket_table *tbl; |
| 363 | struct nft_hash_elem *he, *next; | 184 | struct nft_hash_elem *he, *next; |
| 364 | unsigned int i; | 185 | unsigned int i; |
| 365 | 186 | ||
| 366 | for (i = 0; i < tbl->size; i++) { | 187 | tbl = rht_dereference(priv->tbl, priv); |
| 367 | for (he = nft_dereference(tbl->buckets[i]); he != NULL; | 188 | for (i = 0; i < tbl->size; i++) |
| 368 | he = next) { | 189 | rht_for_each_entry_safe(he, next, tbl->buckets[i], priv, node) |
| 369 | next = nft_dereference(he->next); | ||
| 370 | nft_hash_elem_destroy(set, he); | 190 | nft_hash_elem_destroy(set, he); |
| 371 | } | 191 | |
| 372 | } | 192 | rhashtable_destroy(priv); |
| 373 | kfree(tbl); | ||
| 374 | } | 193 | } |
| 375 | 194 | ||
| 376 | static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, | 195 | static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, |
| @@ -383,8 +202,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, | |||
| 383 | esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); | 202 | esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]); |
| 384 | 203 | ||
| 385 | if (desc->size) { | 204 | if (desc->size) { |
| 386 | est->size = sizeof(struct nft_hash) + | 205 | est->size = sizeof(struct rhashtable) + |
| 387 | nft_hash_tbl_size(desc->size) * | 206 | roundup_pow_of_two(desc->size * 4 / 3) * |
| 388 | sizeof(struct nft_hash_elem *) + | 207 | sizeof(struct nft_hash_elem *) + |
| 389 | desc->size * esize; | 208 | desc->size * esize; |
| 390 | } else { | 209 | } else { |
