aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorThomas Graf <tgraf@suug.ch>2015-01-02 17:00:20 -0500
committerDavid S. Miller <davem@davemloft.net>2015-01-03 14:32:57 -0500
commit97defe1ecf868b8127f8e62395499d6a06e4c4b1 (patch)
treed3ed6d3db4943e01b1ae58e73580537ba1642d9e /net/netfilter
parent113948d841e8d78039e5dbbb5248f5b73e99eafa (diff)
rhashtable: Per bucket locks & deferred expansion/shrinking
Introduces an array of spinlocks to protect bucket mutations. The number of spinlocks per CPU is configurable and selected based on the hash of the bucket. This allows for parallel insertions and removals of entries which do not share a lock. The patch also defers expansion and shrinking to a worker queue which allows insertion and removal from atomic context. Insertions and deletions may occur in parallel to it and are only held up briefly while the particular bucket is linked or unzipped. Mutations of the bucket table pointer is protected by a new mutex, read access is RCU protected. In the event of an expansion or shrinking, the new bucket table allocated is exposed as a so called future table as soon as the resize process starts. Lookups, deletions, and insertions will briefly use both tables. The future table becomes the main table after an RCU grace period and initial linking of the old to the new table was performed. Optimization of the chains to make use of the new number of buckets follows only the new table is in use. The side effect of this is that during that RCU grace period, a bucket traversal using any rht_for_each() variant on the main table will not see any insertions performed during the RCU grace period which would at that point land in the future table. The lookup will see them as it searches both tables if needed. Having multiple insertions and removals occur in parallel requires nelems to become an atomic counter. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/nft_hash.c27
1 files changed, 12 insertions, 15 deletions
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 7f903cf9a1b9..75887d7d2c6a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -33,7 +33,7 @@ static bool nft_hash_lookup(const struct nft_set *set,
33 const struct nft_data *key, 33 const struct nft_data *key,
34 struct nft_data *data) 34 struct nft_data *data)
35{ 35{
36 const struct rhashtable *priv = nft_set_priv(set); 36 struct rhashtable *priv = nft_set_priv(set);
37 const struct nft_hash_elem *he; 37 const struct nft_hash_elem *he;
38 38
39 he = rhashtable_lookup(priv, key); 39 he = rhashtable_lookup(priv, key);
@@ -113,7 +113,7 @@ static bool nft_hash_compare(void *ptr, void *arg)
113 113
114static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem) 114static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
115{ 115{
116 const struct rhashtable *priv = nft_set_priv(set); 116 struct rhashtable *priv = nft_set_priv(set);
117 struct nft_compare_arg arg = { 117 struct nft_compare_arg arg = {
118 .set = set, 118 .set = set,
119 .elem = elem, 119 .elem = elem,
@@ -129,7 +129,7 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
129static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, 129static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
130 struct nft_set_iter *iter) 130 struct nft_set_iter *iter)
131{ 131{
132 const struct rhashtable *priv = nft_set_priv(set); 132 struct rhashtable *priv = nft_set_priv(set);
133 const struct bucket_table *tbl; 133 const struct bucket_table *tbl;
134 const struct nft_hash_elem *he; 134 const struct nft_hash_elem *he;
135 struct nft_set_elem elem; 135 struct nft_set_elem elem;
@@ -162,13 +162,6 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
162 return sizeof(struct rhashtable); 162 return sizeof(struct rhashtable);
163} 163}
164 164
165#ifdef CONFIG_PROVE_LOCKING
166static int lockdep_nfnl_lock_is_held(void *parent)
167{
168 return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES);
169}
170#endif
171
172static int nft_hash_init(const struct nft_set *set, 165static int nft_hash_init(const struct nft_set *set,
173 const struct nft_set_desc *desc, 166 const struct nft_set_desc *desc,
174 const struct nlattr * const tb[]) 167 const struct nlattr * const tb[])
@@ -182,9 +175,6 @@ static int nft_hash_init(const struct nft_set *set,
182 .hashfn = jhash, 175 .hashfn = jhash,
183 .grow_decision = rht_grow_above_75, 176 .grow_decision = rht_grow_above_75,
184 .shrink_decision = rht_shrink_below_30, 177 .shrink_decision = rht_shrink_below_30,
185#ifdef CONFIG_PROVE_LOCKING
186 .mutex_is_held = lockdep_nfnl_lock_is_held,
187#endif
188 }; 178 };
189 179
190 return rhashtable_init(priv, &params); 180 return rhashtable_init(priv, &params);
@@ -192,16 +182,23 @@ static int nft_hash_init(const struct nft_set *set,
192 182
193static void nft_hash_destroy(const struct nft_set *set) 183static void nft_hash_destroy(const struct nft_set *set)
194{ 184{
195 const struct rhashtable *priv = nft_set_priv(set); 185 struct rhashtable *priv = nft_set_priv(set);
196 const struct bucket_table *tbl = priv->tbl; 186 const struct bucket_table *tbl;
197 struct nft_hash_elem *he; 187 struct nft_hash_elem *he;
198 struct rhash_head *pos, *next; 188 struct rhash_head *pos, *next;
199 unsigned int i; 189 unsigned int i;
200 190
191 /* Stop an eventual async resizing */
192 priv->being_destroyed = true;
193 mutex_lock(&priv->mutex);
194
195 tbl = rht_dereference(priv->tbl, priv);
201 for (i = 0; i < tbl->size; i++) { 196 for (i = 0; i < tbl->size; i++) {
202 rht_for_each_entry_safe(he, pos, next, tbl, i, node) 197 rht_for_each_entry_safe(he, pos, next, tbl, i, node)
203 nft_hash_elem_destroy(set, he); 198 nft_hash_elem_destroy(set, he);
204 } 199 }
200 mutex_unlock(&priv->mutex);
201
205 rhashtable_destroy(priv); 202 rhashtable_destroy(priv);
206} 203}
207 204