aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/net/flow.h5
-rw-r--r--include/net/flowcache.h25
-rw-r--r--include/net/netns/xfrm.h12
-rw-r--r--include/net/xfrm.h25
-rw-r--r--include/uapi/linux/pfkeyv2.h15
-rw-r--r--include/uapi/linux/xfrm.h10
-rw-r--r--net/core/flow.c127
-rw-r--r--net/core/skbuff.c26
-rw-r--r--net/ipv4/ah4.c53
-rw-r--r--net/ipv6/ah6.c56
-rw-r--r--net/key/af_key.c39
-rw-r--r--net/xfrm/xfrm_policy.c35
-rw-r--r--net/xfrm/xfrm_state.c72
-rw-r--r--net/xfrm/xfrm_user.c37
-rw-r--r--security/selinux/include/xfrm.h5
17 files changed, 396 insertions, 149 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index db8bb0d2379e..b0196ab3e489 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6009,6 +6009,7 @@ L: netdev@vger.kernel.org
6009T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git 6009T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
6010T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git 6010T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git
6011S: Maintained 6011S: Maintained
6012F: net/core/flow.c
6012F: net/xfrm/ 6013F: net/xfrm/
6013F: net/key/ 6014F: net/key/
6014F: net/ipv4/xfrm* 6015F: net/ipv4/xfrm*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3ebbbe7b6d05..11b6925f0e96 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -691,6 +691,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
691 unsigned int headroom); 691 unsigned int headroom);
692struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, 692struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
693 int newtailroom, gfp_t priority); 693 int newtailroom, gfp_t priority);
694int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
695 int offset, int len);
694int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, 696int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
695 int len); 697 int len);
696int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); 698int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
diff --git a/include/net/flow.h b/include/net/flow.h
index d23e7fa2042e..bee3741e5a6f 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -218,9 +218,10 @@ struct flow_cache_object *flow_cache_lookup(struct net *net,
218 const struct flowi *key, u16 family, 218 const struct flowi *key, u16 family,
219 u8 dir, flow_resolve_t resolver, 219 u8 dir, flow_resolve_t resolver,
220 void *ctx); 220 void *ctx);
221int flow_cache_init(struct net *net);
221 222
222void flow_cache_flush(void); 223void flow_cache_flush(struct net *net);
223void flow_cache_flush_deferred(void); 224void flow_cache_flush_deferred(struct net *net);
224extern atomic_t flow_cache_genid; 225extern atomic_t flow_cache_genid;
225 226
226#endif 227#endif
diff --git a/include/net/flowcache.h b/include/net/flowcache.h
new file mode 100644
index 000000000000..c8f665ec6e0d
--- /dev/null
+++ b/include/net/flowcache.h
@@ -0,0 +1,25 @@
1#ifndef _NET_FLOWCACHE_H
2#define _NET_FLOWCACHE_H
3
4#include <linux/interrupt.h>
5#include <linux/types.h>
6#include <linux/timer.h>
7#include <linux/notifier.h>
8
9struct flow_cache_percpu {
10 struct hlist_head *hash_table;
11 int hash_count;
12 u32 hash_rnd;
13 int hash_rnd_recalc;
14 struct tasklet_struct flush_tasklet;
15};
16
17struct flow_cache {
18 u32 hash_shift;
19 struct flow_cache_percpu __percpu *percpu;
20 struct notifier_block hotcpu_notifier;
21 int low_watermark;
22 int high_watermark;
23 struct timer_list rnd_timer;
24};
25#endif /* _NET_FLOWCACHE_H */
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 1006a265beb3..51f0dce7b643 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -6,6 +6,7 @@
6#include <linux/workqueue.h> 6#include <linux/workqueue.h>
7#include <linux/xfrm.h> 7#include <linux/xfrm.h>
8#include <net/dst_ops.h> 8#include <net/dst_ops.h>
9#include <net/flowcache.h>
9 10
10struct ctl_table_header; 11struct ctl_table_header;
11 12
@@ -58,9 +59,18 @@ struct netns_xfrm {
58 struct dst_ops xfrm6_dst_ops; 59 struct dst_ops xfrm6_dst_ops;
59#endif 60#endif
60 spinlock_t xfrm_state_lock; 61 spinlock_t xfrm_state_lock;
61 spinlock_t xfrm_policy_sk_bundle_lock;
62 rwlock_t xfrm_policy_lock; 62 rwlock_t xfrm_policy_lock;
63 struct mutex xfrm_cfg_mutex; 63 struct mutex xfrm_cfg_mutex;
64
65 /* flow cache part */
66 struct flow_cache flow_cache_global;
67 struct kmem_cache *flow_cachep;
68 atomic_t flow_cache_genid;
69 struct list_head flow_cache_gc_list;
70 spinlock_t flow_cache_gc_lock;
71 struct work_struct flow_cache_gc_work;
72 struct work_struct flow_cache_flush_work;
73 struct mutex flow_flush_sem;
64}; 74};
65 75
66#endif 76#endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index afa5730fb3bd..45332acac022 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -118,11 +118,10 @@
118struct xfrm_state_walk { 118struct xfrm_state_walk {
119 struct list_head all; 119 struct list_head all;
120 u8 state; 120 u8 state;
121 union { 121 u8 dying;
122 u8 dying; 122 u8 proto;
123 u8 proto;
124 };
125 u32 seq; 123 u32 seq;
124 struct xfrm_filter *filter;
126}; 125};
127 126
128/* Full description of state of transformer. */ 127/* Full description of state of transformer. */
@@ -594,6 +593,7 @@ struct xfrm_mgr {
594 const struct xfrm_migrate *m, 593 const struct xfrm_migrate *m,
595 int num_bundles, 594 int num_bundles,
596 const struct xfrm_kmaddress *k); 595 const struct xfrm_kmaddress *k);
596 bool (*is_alive)(const struct km_event *c);
597}; 597};
598 598
599int xfrm_register_km(struct xfrm_mgr *km); 599int xfrm_register_km(struct xfrm_mgr *km);
@@ -1405,7 +1405,8 @@ static inline void xfrm_sysctl_fini(struct net *net)
1405} 1405}
1406#endif 1406#endif
1407 1407
1408void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); 1408void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
1409 struct xfrm_filter *filter);
1409int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, 1410int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1410 int (*func)(struct xfrm_state *, int, void*), void *); 1411 int (*func)(struct xfrm_state *, int, void*), void *);
1411void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net); 1412void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net);
@@ -1646,6 +1647,20 @@ static inline int xfrm_aevent_is_on(struct net *net)
1646 rcu_read_unlock(); 1647 rcu_read_unlock();
1647 return ret; 1648 return ret;
1648} 1649}
1650
1651static inline int xfrm_acquire_is_on(struct net *net)
1652{
1653 struct sock *nlsk;
1654 int ret = 0;
1655
1656 rcu_read_lock();
1657 nlsk = rcu_dereference(net->xfrm.nlsk);
1658 if (nlsk)
1659 ret = netlink_has_listeners(nlsk, XFRMNLGRP_ACQUIRE);
1660 rcu_read_unlock();
1661
1662 return ret;
1663}
1649#endif 1664#endif
1650 1665
1651static inline int xfrm_alg_len(const struct xfrm_algo *alg) 1666static inline int xfrm_alg_len(const struct xfrm_algo *alg)
diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h
index 0b80c806631f..ada7f0171ccc 100644
--- a/include/uapi/linux/pfkeyv2.h
+++ b/include/uapi/linux/pfkeyv2.h
@@ -235,6 +235,18 @@ struct sadb_x_kmaddress {
235} __attribute__((packed)); 235} __attribute__((packed));
236/* sizeof(struct sadb_x_kmaddress) == 8 */ 236/* sizeof(struct sadb_x_kmaddress) == 8 */
237 237
238/* To specify the SA dump filter */
239struct sadb_x_filter {
240 __u16 sadb_x_filter_len;
241 __u16 sadb_x_filter_exttype;
242 __u32 sadb_x_filter_saddr[4];
243 __u32 sadb_x_filter_daddr[4];
244 __u16 sadb_x_filter_family;
245 __u8 sadb_x_filter_splen;
246 __u8 sadb_x_filter_dplen;
247} __attribute__((packed));
248/* sizeof(struct sadb_x_filter) == 40 */
249
238/* Message types */ 250/* Message types */
239#define SADB_RESERVED 0 251#define SADB_RESERVED 0
240#define SADB_GETSPI 1 252#define SADB_GETSPI 1
@@ -358,7 +370,8 @@ struct sadb_x_kmaddress {
358#define SADB_X_EXT_SEC_CTX 24 370#define SADB_X_EXT_SEC_CTX 24
359/* Used with MIGRATE to pass @ to IKE for negotiation */ 371/* Used with MIGRATE to pass @ to IKE for negotiation */
360#define SADB_X_EXT_KMADDRESS 25 372#define SADB_X_EXT_KMADDRESS 25
361#define SADB_EXT_MAX 25 373#define SADB_X_EXT_FILTER 26
374#define SADB_EXT_MAX 26
362 375
363/* Identity Extension values */ 376/* Identity Extension values */
364#define SADB_IDENTTYPE_RESERVED 0 377#define SADB_IDENTTYPE_RESERVED 0
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index a8cd6a4a2970..6550c679584f 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -298,6 +298,8 @@ enum xfrm_attr_type_t {
298 XFRMA_TFCPAD, /* __u32 */ 298 XFRMA_TFCPAD, /* __u32 */
299 XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */ 299 XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */
300 XFRMA_SA_EXTRA_FLAGS, /* __u32 */ 300 XFRMA_SA_EXTRA_FLAGS, /* __u32 */
301 XFRMA_PROTO, /* __u8 */
302 XFRMA_FILTER, /* struct xfrm_filter */
301 __XFRMA_MAX 303 __XFRMA_MAX
302 304
303#define XFRMA_MAX (__XFRMA_MAX - 1) 305#define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -474,6 +476,14 @@ struct xfrm_user_mapping {
474 __be16 new_sport; 476 __be16 new_sport;
475}; 477};
476 478
479struct xfrm_filter {
480 xfrm_address_t saddr;
481 xfrm_address_t daddr;
482 __u16 family;
483 __u8 splen;
484 __u8 dplen;
485};
486
477#ifndef __KERNEL__ 487#ifndef __KERNEL__
478/* backwards compatibility for userspace */ 488/* backwards compatibility for userspace */
479#define XFRMGRP_ACQUIRE 1 489#define XFRMGRP_ACQUIRE 1
diff --git a/net/core/flow.c b/net/core/flow.c
index dfa602ceb8cd..344a184011fd 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -24,6 +24,7 @@
24#include <net/flow.h> 24#include <net/flow.h>
25#include <linux/atomic.h> 25#include <linux/atomic.h>
26#include <linux/security.h> 26#include <linux/security.h>
27#include <net/net_namespace.h>
27 28
28struct flow_cache_entry { 29struct flow_cache_entry {
29 union { 30 union {
@@ -38,37 +39,12 @@ struct flow_cache_entry {
38 struct flow_cache_object *object; 39 struct flow_cache_object *object;
39}; 40};
40 41
41struct flow_cache_percpu {
42 struct hlist_head *hash_table;
43 int hash_count;
44 u32 hash_rnd;
45 int hash_rnd_recalc;
46 struct tasklet_struct flush_tasklet;
47};
48
49struct flow_flush_info { 42struct flow_flush_info {
50 struct flow_cache *cache; 43 struct flow_cache *cache;
51 atomic_t cpuleft; 44 atomic_t cpuleft;
52 struct completion completion; 45 struct completion completion;
53}; 46};
54 47
55struct flow_cache {
56 u32 hash_shift;
57 struct flow_cache_percpu __percpu *percpu;
58 struct notifier_block hotcpu_notifier;
59 int low_watermark;
60 int high_watermark;
61 struct timer_list rnd_timer;
62};
63
64atomic_t flow_cache_genid = ATOMIC_INIT(0);
65EXPORT_SYMBOL(flow_cache_genid);
66static struct flow_cache flow_cache_global;
67static struct kmem_cache *flow_cachep __read_mostly;
68
69static DEFINE_SPINLOCK(flow_cache_gc_lock);
70static LIST_HEAD(flow_cache_gc_list);
71
72#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) 48#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
73#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) 49#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
74 50
@@ -84,46 +60,50 @@ static void flow_cache_new_hashrnd(unsigned long arg)
84 add_timer(&fc->rnd_timer); 60 add_timer(&fc->rnd_timer);
85} 61}
86 62
87static int flow_entry_valid(struct flow_cache_entry *fle) 63static int flow_entry_valid(struct flow_cache_entry *fle,
64 struct netns_xfrm *xfrm)
88{ 65{
89 if (atomic_read(&flow_cache_genid) != fle->genid) 66 if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
90 return 0; 67 return 0;
91 if (fle->object && !fle->object->ops->check(fle->object)) 68 if (fle->object && !fle->object->ops->check(fle->object))
92 return 0; 69 return 0;
93 return 1; 70 return 1;
94} 71}
95 72
96static void flow_entry_kill(struct flow_cache_entry *fle) 73static void flow_entry_kill(struct flow_cache_entry *fle,
74 struct netns_xfrm *xfrm)
97{ 75{
98 if (fle->object) 76 if (fle->object)
99 fle->object->ops->delete(fle->object); 77 fle->object->ops->delete(fle->object);
100 kmem_cache_free(flow_cachep, fle); 78 kmem_cache_free(xfrm->flow_cachep, fle);
101} 79}
102 80
103static void flow_cache_gc_task(struct work_struct *work) 81static void flow_cache_gc_task(struct work_struct *work)
104{ 82{
105 struct list_head gc_list; 83 struct list_head gc_list;
106 struct flow_cache_entry *fce, *n; 84 struct flow_cache_entry *fce, *n;
85 struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
86 flow_cache_gc_work);
107 87
108 INIT_LIST_HEAD(&gc_list); 88 INIT_LIST_HEAD(&gc_list);
109 spin_lock_bh(&flow_cache_gc_lock); 89 spin_lock_bh(&xfrm->flow_cache_gc_lock);
110 list_splice_tail_init(&flow_cache_gc_list, &gc_list); 90 list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
111 spin_unlock_bh(&flow_cache_gc_lock); 91 spin_unlock_bh(&xfrm->flow_cache_gc_lock);
112 92
113 list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) 93 list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
114 flow_entry_kill(fce); 94 flow_entry_kill(fce, xfrm);
115} 95}
116static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
117 96
118static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, 97static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
119 int deleted, struct list_head *gc_list) 98 int deleted, struct list_head *gc_list,
99 struct netns_xfrm *xfrm)
120{ 100{
121 if (deleted) { 101 if (deleted) {
122 fcp->hash_count -= deleted; 102 fcp->hash_count -= deleted;
123 spin_lock_bh(&flow_cache_gc_lock); 103 spin_lock_bh(&xfrm->flow_cache_gc_lock);
124 list_splice_tail(gc_list, &flow_cache_gc_list); 104 list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
125 spin_unlock_bh(&flow_cache_gc_lock); 105 spin_unlock_bh(&xfrm->flow_cache_gc_lock);
126 schedule_work(&flow_cache_gc_work); 106 schedule_work(&xfrm->flow_cache_gc_work);
127 } 107 }
128} 108}
129 109
@@ -135,6 +115,8 @@ static void __flow_cache_shrink(struct flow_cache *fc,
135 struct hlist_node *tmp; 115 struct hlist_node *tmp;
136 LIST_HEAD(gc_list); 116 LIST_HEAD(gc_list);
137 int i, deleted = 0; 117 int i, deleted = 0;
118 struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
119 flow_cache_global);
138 120
139 for (i = 0; i < flow_cache_hash_size(fc); i++) { 121 for (i = 0; i < flow_cache_hash_size(fc); i++) {
140 int saved = 0; 122 int saved = 0;
@@ -142,7 +124,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
142 hlist_for_each_entry_safe(fle, tmp, 124 hlist_for_each_entry_safe(fle, tmp,
143 &fcp->hash_table[i], u.hlist) { 125 &fcp->hash_table[i], u.hlist) {
144 if (saved < shrink_to && 126 if (saved < shrink_to &&
145 flow_entry_valid(fle)) { 127 flow_entry_valid(fle, xfrm)) {
146 saved++; 128 saved++;
147 } else { 129 } else {
148 deleted++; 130 deleted++;
@@ -152,7 +134,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
152 } 134 }
153 } 135 }
154 136
155 flow_cache_queue_garbage(fcp, deleted, &gc_list); 137 flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
156} 138}
157 139
158static void flow_cache_shrink(struct flow_cache *fc, 140static void flow_cache_shrink(struct flow_cache *fc,
@@ -208,7 +190,7 @@ struct flow_cache_object *
208flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, 190flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
209 flow_resolve_t resolver, void *ctx) 191 flow_resolve_t resolver, void *ctx)
210{ 192{
211 struct flow_cache *fc = &flow_cache_global; 193 struct flow_cache *fc = &net->xfrm.flow_cache_global;
212 struct flow_cache_percpu *fcp; 194 struct flow_cache_percpu *fcp;
213 struct flow_cache_entry *fle, *tfle; 195 struct flow_cache_entry *fle, *tfle;
214 struct flow_cache_object *flo; 196 struct flow_cache_object *flo;
@@ -248,7 +230,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
248 if (fcp->hash_count > fc->high_watermark) 230 if (fcp->hash_count > fc->high_watermark)
249 flow_cache_shrink(fc, fcp); 231 flow_cache_shrink(fc, fcp);
250 232
251 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); 233 fle = kmem_cache_alloc(net->xfrm.flow_cachep, GFP_ATOMIC);
252 if (fle) { 234 if (fle) {
253 fle->net = net; 235 fle->net = net;
254 fle->family = family; 236 fle->family = family;
@@ -258,7 +240,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
258 hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); 240 hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
259 fcp->hash_count++; 241 fcp->hash_count++;
260 } 242 }
261 } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) { 243 } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
262 flo = fle->object; 244 flo = fle->object;
263 if (!flo) 245 if (!flo)
264 goto ret_object; 246 goto ret_object;
@@ -279,7 +261,7 @@ nocache:
279 } 261 }
280 flo = resolver(net, key, family, dir, flo, ctx); 262 flo = resolver(net, key, family, dir, flo, ctx);
281 if (fle) { 263 if (fle) {
282 fle->genid = atomic_read(&flow_cache_genid); 264 fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
283 if (!IS_ERR(flo)) 265 if (!IS_ERR(flo))
284 fle->object = flo; 266 fle->object = flo;
285 else 267 else
@@ -303,12 +285,14 @@ static void flow_cache_flush_tasklet(unsigned long data)
303 struct hlist_node *tmp; 285 struct hlist_node *tmp;
304 LIST_HEAD(gc_list); 286 LIST_HEAD(gc_list);
305 int i, deleted = 0; 287 int i, deleted = 0;
288 struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
289 flow_cache_global);
306 290
307 fcp = this_cpu_ptr(fc->percpu); 291 fcp = this_cpu_ptr(fc->percpu);
308 for (i = 0; i < flow_cache_hash_size(fc); i++) { 292 for (i = 0; i < flow_cache_hash_size(fc); i++) {
309 hlist_for_each_entry_safe(fle, tmp, 293 hlist_for_each_entry_safe(fle, tmp,
310 &fcp->hash_table[i], u.hlist) { 294 &fcp->hash_table[i], u.hlist) {
311 if (flow_entry_valid(fle)) 295 if (flow_entry_valid(fle, xfrm))
312 continue; 296 continue;
313 297
314 deleted++; 298 deleted++;
@@ -317,7 +301,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
317 } 301 }
318 } 302 }
319 303
320 flow_cache_queue_garbage(fcp, deleted, &gc_list); 304 flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
321 305
322 if (atomic_dec_and_test(&info->cpuleft)) 306 if (atomic_dec_and_test(&info->cpuleft))
323 complete(&info->completion); 307 complete(&info->completion);
@@ -351,10 +335,9 @@ static void flow_cache_flush_per_cpu(void *data)
351 tasklet_schedule(tasklet); 335 tasklet_schedule(tasklet);
352} 336}
353 337
354void flow_cache_flush(void) 338void flow_cache_flush(struct net *net)
355{ 339{
356 struct flow_flush_info info; 340 struct flow_flush_info info;
357 static DEFINE_MUTEX(flow_flush_sem);
358 cpumask_var_t mask; 341 cpumask_var_t mask;
359 int i, self; 342 int i, self;
360 343
@@ -365,8 +348,8 @@ void flow_cache_flush(void)
365 348
366 /* Don't want cpus going down or up during this. */ 349 /* Don't want cpus going down or up during this. */
367 get_online_cpus(); 350 get_online_cpus();
368 mutex_lock(&flow_flush_sem); 351 mutex_lock(&net->xfrm.flow_flush_sem);
369 info.cache = &flow_cache_global; 352 info.cache = &net->xfrm.flow_cache_global;
370 for_each_online_cpu(i) 353 for_each_online_cpu(i)
371 if (!flow_cache_percpu_empty(info.cache, i)) 354 if (!flow_cache_percpu_empty(info.cache, i))
372 cpumask_set_cpu(i, mask); 355 cpumask_set_cpu(i, mask);
@@ -386,21 +369,23 @@ void flow_cache_flush(void)
386 wait_for_completion(&info.completion); 369 wait_for_completion(&info.completion);
387 370
388done: 371done:
389 mutex_unlock(&flow_flush_sem); 372 mutex_unlock(&net->xfrm.flow_flush_sem);
390 put_online_cpus(); 373 put_online_cpus();
391 free_cpumask_var(mask); 374 free_cpumask_var(mask);
392} 375}
393 376
394static void flow_cache_flush_task(struct work_struct *work) 377static void flow_cache_flush_task(struct work_struct *work)
395{ 378{
396 flow_cache_flush(); 379 struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
397} 380 flow_cache_gc_work);
381 struct net *net = container_of(xfrm, struct net, xfrm);
398 382
399static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task); 383 flow_cache_flush(net);
384}
400 385
401void flow_cache_flush_deferred(void) 386void flow_cache_flush_deferred(struct net *net)
402{ 387{
403 schedule_work(&flow_cache_flush_work); 388 schedule_work(&net->xfrm.flow_cache_flush_work);
404} 389}
405 390
406static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) 391static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
@@ -425,7 +410,8 @@ static int flow_cache_cpu(struct notifier_block *nfb,
425 unsigned long action, 410 unsigned long action,
426 void *hcpu) 411 void *hcpu)
427{ 412{
428 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); 413 struct flow_cache *fc = container_of(nfb, struct flow_cache,
414 hotcpu_notifier);
429 int res, cpu = (unsigned long) hcpu; 415 int res, cpu = (unsigned long) hcpu;
430 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); 416 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
431 417
@@ -444,9 +430,20 @@ static int flow_cache_cpu(struct notifier_block *nfb,
444 return NOTIFY_OK; 430 return NOTIFY_OK;
445} 431}
446 432
447static int __init flow_cache_init(struct flow_cache *fc) 433int flow_cache_init(struct net *net)
448{ 434{
449 int i; 435 int i;
436 struct flow_cache *fc = &net->xfrm.flow_cache_global;
437
438 /* Initialize per-net flow cache global variables here */
439 net->xfrm.flow_cachep = kmem_cache_create("flow_cache",
440 sizeof(struct flow_cache_entry),
441 0, SLAB_PANIC, NULL);
442 spin_lock_init(&net->xfrm.flow_cache_gc_lock);
443 INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
444 INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
445 INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
446 mutex_init(&net->xfrm.flow_flush_sem);
450 447
451 fc->hash_shift = 10; 448 fc->hash_shift = 10;
452 fc->low_watermark = 2 * flow_cache_hash_size(fc); 449 fc->low_watermark = 2 * flow_cache_hash_size(fc);
@@ -484,14 +481,4 @@ err:
484 481
485 return -ENOMEM; 482 return -ENOMEM;
486} 483}
487 484EXPORT_SYMBOL(flow_cache_init);
488static int __init flow_cache_init_global(void)
489{
490 flow_cachep = kmem_cache_create("flow_cache",
491 sizeof(struct flow_cache_entry),
492 0, SLAB_PANIC, NULL);
493
494 return flow_cache_init(&flow_cache_global);
495}
496
497module_init(flow_cache_init_global);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5976ef0846bd..f28c37996aad 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3281,6 +3281,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
3281 return elt; 3281 return elt;
3282} 3282}
3283 3283
3284/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
3285 * sglist without mark the sg which contain last skb data as the end.
3286 * So the caller can mannipulate sg list as will when padding new data after
3287 * the first call without calling sg_unmark_end to expend sg list.
3288 *
3289 * Scenario to use skb_to_sgvec_nomark:
3290 * 1. sg_init_table
3291 * 2. skb_to_sgvec_nomark(payload1)
3292 * 3. skb_to_sgvec_nomark(payload2)
3293 *
3294 * This is equivalent to:
3295 * 1. sg_init_table
3296 * 2. skb_to_sgvec(payload1)
3297 * 3. sg_unmark_end
3298 * 4. skb_to_sgvec(payload2)
3299 *
3300 * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
3301 * is more preferable.
3302 */
3303int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
3304 int offset, int len)
3305{
3306 return __skb_to_sgvec(skb, sg, offset, len);
3307}
3308EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
3309
3284int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) 3310int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
3285{ 3311{
3286 int nsg = __skb_to_sgvec(skb, sg, offset, len); 3312 int nsg = __skb_to_sgvec(skb, sg, offset, len);
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 717902669d2f..54b965ddcb19 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -155,6 +155,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
155 struct iphdr *iph, *top_iph; 155 struct iphdr *iph, *top_iph;
156 struct ip_auth_hdr *ah; 156 struct ip_auth_hdr *ah;
157 struct ah_data *ahp; 157 struct ah_data *ahp;
158 int seqhi_len = 0;
159 __be32 *seqhi;
160 int sglists = 0;
161 struct scatterlist *seqhisg;
158 162
159 ahp = x->data; 163 ahp = x->data;
160 ahash = ahp->ahash; 164 ahash = ahp->ahash;
@@ -167,14 +171,19 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
167 ah = ip_auth_hdr(skb); 171 ah = ip_auth_hdr(skb);
168 ihl = ip_hdrlen(skb); 172 ihl = ip_hdrlen(skb);
169 173
174 if (x->props.flags & XFRM_STATE_ESN) {
175 sglists = 1;
176 seqhi_len = sizeof(*seqhi);
177 }
170 err = -ENOMEM; 178 err = -ENOMEM;
171 iph = ah_alloc_tmp(ahash, nfrags, ihl); 179 iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + seqhi_len);
172 if (!iph) 180 if (!iph)
173 goto out; 181 goto out;
174 182 seqhi = (__be32 *)((char *)iph + ihl);
175 icv = ah_tmp_icv(ahash, iph, ihl); 183 icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
176 req = ah_tmp_req(ahash, icv); 184 req = ah_tmp_req(ahash, icv);
177 sg = ah_req_sg(ahash, req); 185 sg = ah_req_sg(ahash, req);
186 seqhisg = sg + nfrags;
178 187
179 memset(ah->auth_data, 0, ahp->icv_trunc_len); 188 memset(ah->auth_data, 0, ahp->icv_trunc_len);
180 189
@@ -210,10 +219,15 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
210 ah->spi = x->id.spi; 219 ah->spi = x->id.spi;
211 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); 220 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
212 221
213 sg_init_table(sg, nfrags); 222 sg_init_table(sg, nfrags + sglists);
214 skb_to_sgvec(skb, sg, 0, skb->len); 223 skb_to_sgvec_nomark(skb, sg, 0, skb->len);
215 224
216 ahash_request_set_crypt(req, sg, icv, skb->len); 225 if (x->props.flags & XFRM_STATE_ESN) {
226 /* Attach seqhi sg right after packet payload */
227 *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
228 sg_set_buf(seqhisg, seqhi, seqhi_len);
229 }
230 ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
217 ahash_request_set_callback(req, 0, ah_output_done, skb); 231 ahash_request_set_callback(req, 0, ah_output_done, skb);
218 232
219 AH_SKB_CB(skb)->tmp = iph; 233 AH_SKB_CB(skb)->tmp = iph;
@@ -295,6 +309,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
295 struct ip_auth_hdr *ah; 309 struct ip_auth_hdr *ah;
296 struct ah_data *ahp; 310 struct ah_data *ahp;
297 int err = -ENOMEM; 311 int err = -ENOMEM;
312 int seqhi_len = 0;
313 __be32 *seqhi;
314 int sglists = 0;
315 struct scatterlist *seqhisg;
298 316
299 if (!pskb_may_pull(skb, sizeof(*ah))) 317 if (!pskb_may_pull(skb, sizeof(*ah)))
300 goto out; 318 goto out;
@@ -335,14 +353,22 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
335 iph = ip_hdr(skb); 353 iph = ip_hdr(skb);
336 ihl = ip_hdrlen(skb); 354 ihl = ip_hdrlen(skb);
337 355
338 work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len); 356 if (x->props.flags & XFRM_STATE_ESN) {
357 sglists = 1;
358 seqhi_len = sizeof(*seqhi);
359 }
360
361 work_iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl +
362 ahp->icv_trunc_len + seqhi_len);
339 if (!work_iph) 363 if (!work_iph)
340 goto out; 364 goto out;
341 365
342 auth_data = ah_tmp_auth(work_iph, ihl); 366 seqhi = (__be32 *)((char *)work_iph + ihl);
367 auth_data = ah_tmp_auth(seqhi, seqhi_len);
343 icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); 368 icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
344 req = ah_tmp_req(ahash, icv); 369 req = ah_tmp_req(ahash, icv);
345 sg = ah_req_sg(ahash, req); 370 sg = ah_req_sg(ahash, req);
371 seqhisg = sg + nfrags;
346 372
347 memcpy(work_iph, iph, ihl); 373 memcpy(work_iph, iph, ihl);
348 memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); 374 memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -361,10 +387,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
361 387
362 skb_push(skb, ihl); 388 skb_push(skb, ihl);
363 389
364 sg_init_table(sg, nfrags); 390 sg_init_table(sg, nfrags + sglists);
365 skb_to_sgvec(skb, sg, 0, skb->len); 391 skb_to_sgvec_nomark(skb, sg, 0, skb->len);
366 392
367 ahash_request_set_crypt(req, sg, icv, skb->len); 393 if (x->props.flags & XFRM_STATE_ESN) {
394 /* Attach seqhi sg right after packet payload */
395 *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
396 sg_set_buf(seqhisg, seqhi, seqhi_len);
397 }
398 ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
368 ahash_request_set_callback(req, 0, ah_input_done, skb); 399 ahash_request_set_callback(req, 0, ah_input_done, skb);
369 400
370 AH_SKB_CB(skb)->tmp = work_iph; 401 AH_SKB_CB(skb)->tmp = work_iph;
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 81e496a2e008..6c5f0949e0ab 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
346 struct ip_auth_hdr *ah; 346 struct ip_auth_hdr *ah;
347 struct ah_data *ahp; 347 struct ah_data *ahp;
348 struct tmp_ext *iph_ext; 348 struct tmp_ext *iph_ext;
349 int seqhi_len = 0;
350 __be32 *seqhi;
351 int sglists = 0;
352 struct scatterlist *seqhisg;
349 353
350 ahp = x->data; 354 ahp = x->data;
351 ahash = ahp->ahash; 355 ahash = ahp->ahash;
@@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
359 if (extlen) 363 if (extlen)
360 extlen += sizeof(*iph_ext); 364 extlen += sizeof(*iph_ext);
361 365
366 if (x->props.flags & XFRM_STATE_ESN) {
367 sglists = 1;
368 seqhi_len = sizeof(*seqhi);
369 }
362 err = -ENOMEM; 370 err = -ENOMEM;
363 iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen); 371 iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
372 extlen + seqhi_len);
364 if (!iph_base) 373 if (!iph_base)
365 goto out; 374 goto out;
366 375
367 iph_ext = ah_tmp_ext(iph_base); 376 iph_ext = ah_tmp_ext(iph_base);
368 icv = ah_tmp_icv(ahash, iph_ext, extlen); 377 seqhi = (__be32 *)((char *)iph_ext + extlen);
378 icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
369 req = ah_tmp_req(ahash, icv); 379 req = ah_tmp_req(ahash, icv);
370 sg = ah_req_sg(ahash, req); 380 sg = ah_req_sg(ahash, req);
381 seqhisg = sg + nfrags;
371 382
372 ah = ip_auth_hdr(skb); 383 ah = ip_auth_hdr(skb);
373 memset(ah->auth_data, 0, ahp->icv_trunc_len); 384 memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
411 ah->spi = x->id.spi; 422 ah->spi = x->id.spi;
412 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); 423 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
413 424
414 sg_init_table(sg, nfrags); 425 sg_init_table(sg, nfrags + sglists);
415 skb_to_sgvec(skb, sg, 0, skb->len); 426 skb_to_sgvec_nomark(skb, sg, 0, skb->len);
416 427
417 ahash_request_set_crypt(req, sg, icv, skb->len); 428 if (x->props.flags & XFRM_STATE_ESN) {
429 /* Attach seqhi sg right after packet payload */
430 *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
431 sg_set_buf(seqhisg, seqhi, seqhi_len);
432 }
433 ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
418 ahash_request_set_callback(req, 0, ah6_output_done, skb); 434 ahash_request_set_callback(req, 0, ah6_output_done, skb);
419 435
420 AH_SKB_CB(skb)->tmp = iph_base; 436 AH_SKB_CB(skb)->tmp = iph_base;
@@ -514,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
514 int nexthdr; 530 int nexthdr;
515 int nfrags; 531 int nfrags;
516 int err = -ENOMEM; 532 int err = -ENOMEM;
533 int seqhi_len = 0;
534 __be32 *seqhi;
535 int sglists = 0;
536 struct scatterlist *seqhisg;
517 537
518 if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) 538 if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
519 goto out; 539 goto out;
@@ -550,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
550 570
551 skb_push(skb, hdr_len); 571 skb_push(skb, hdr_len);
552 572
553 work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len); 573 if (x->props.flags & XFRM_STATE_ESN) {
574 sglists = 1;
575 seqhi_len = sizeof(*seqhi);
576 }
577
578 work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
579 ahp->icv_trunc_len + seqhi_len);
554 if (!work_iph) 580 if (!work_iph)
555 goto out; 581 goto out;
556 582
557 auth_data = ah_tmp_auth(work_iph, hdr_len); 583 auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
558 icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); 584 seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
585 icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
559 req = ah_tmp_req(ahash, icv); 586 req = ah_tmp_req(ahash, icv);
560 sg = ah_req_sg(ahash, req); 587 sg = ah_req_sg(ahash, req);
588 seqhisg = sg + nfrags;
561 589
562 memcpy(work_iph, ip6h, hdr_len); 590 memcpy(work_iph, ip6h, hdr_len);
563 memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); 591 memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -572,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
572 ip6h->flow_lbl[2] = 0; 600 ip6h->flow_lbl[2] = 0;
573 ip6h->hop_limit = 0; 601 ip6h->hop_limit = 0;
574 602
575 sg_init_table(sg, nfrags); 603 sg_init_table(sg, nfrags + sglists);
576 skb_to_sgvec(skb, sg, 0, skb->len); 604 skb_to_sgvec_nomark(skb, sg, 0, skb->len);
605
606 if (x->props.flags & XFRM_STATE_ESN) {
607 /* Attach seqhi sg right after packet payload */
608 *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
609 sg_set_buf(seqhisg, seqhi, seqhi_len);
610 }
577 611
578 ahash_request_set_crypt(req, sg, icv, skb->len); 612 ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
579 ahash_request_set_callback(req, 0, ah6_input_done, skb); 613 ahash_request_set_callback(req, 0, ah6_input_done, skb);
580 614
581 AH_SKB_CB(skb)->tmp = work_iph; 615 AH_SKB_CB(skb)->tmp = work_iph;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1a04c1329362..a50d979b5926 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -365,6 +365,7 @@ static const u8 sadb_ext_min_len[] = {
365 [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), 365 [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address),
366 [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), 366 [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx),
367 [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), 367 [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress),
368 [SADB_X_EXT_FILTER] = (u8) sizeof(struct sadb_x_filter),
368}; 369};
369 370
370/* Verify sadb_address_{len,prefixlen} against sa_family. */ 371/* Verify sadb_address_{len,prefixlen} against sa_family. */
@@ -1798,6 +1799,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
1798static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) 1799static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
1799{ 1800{
1800 u8 proto; 1801 u8 proto;
1802 struct xfrm_filter *filter = NULL;
1801 struct pfkey_sock *pfk = pfkey_sk(sk); 1803 struct pfkey_sock *pfk = pfkey_sk(sk);
1802 1804
1803 if (pfk->dump.dump != NULL) 1805 if (pfk->dump.dump != NULL)
@@ -1807,11 +1809,27 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
1807 if (proto == 0) 1809 if (proto == 0)
1808 return -EINVAL; 1810 return -EINVAL;
1809 1811
1812 if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
1813 struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
1814
1815 filter = kmalloc(sizeof(*filter), GFP_KERNEL);
1816 if (filter == NULL)
1817 return -ENOMEM;
1818
1819 memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr,
1820 sizeof(xfrm_address_t));
1821 memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr,
1822 sizeof(xfrm_address_t));
1823 filter->family = xfilter->sadb_x_filter_family;
1824 filter->splen = xfilter->sadb_x_filter_splen;
1825 filter->dplen = xfilter->sadb_x_filter_dplen;
1826 }
1827
1810 pfk->dump.msg_version = hdr->sadb_msg_version; 1828 pfk->dump.msg_version = hdr->sadb_msg_version;
1811 pfk->dump.msg_portid = hdr->sadb_msg_pid; 1829 pfk->dump.msg_portid = hdr->sadb_msg_pid;
1812 pfk->dump.dump = pfkey_dump_sa; 1830 pfk->dump.dump = pfkey_dump_sa;
1813 pfk->dump.done = pfkey_dump_sa_done; 1831 pfk->dump.done = pfkey_dump_sa_done;
1814 xfrm_state_walk_init(&pfk->dump.u.state, proto); 1832 xfrm_state_walk_init(&pfk->dump.u.state, proto, filter);
1815 1833
1816 return pfkey_do_dump(pfk); 1834 return pfkey_do_dump(pfk);
1817} 1835}
@@ -3059,6 +3077,24 @@ static u32 get_acqseq(void)
3059 return res; 3077 return res;
3060} 3078}
3061 3079
3080static bool pfkey_is_alive(const struct km_event *c)
3081{
3082 struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id);
3083 struct sock *sk;
3084 bool is_alive = false;
3085
3086 rcu_read_lock();
3087 sk_for_each_rcu(sk, &net_pfkey->table) {
3088 if (pfkey_sk(sk)->registered) {
3089 is_alive = true;
3090 break;
3091 }
3092 }
3093 rcu_read_unlock();
3094
3095 return is_alive;
3096}
3097
3062static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp) 3098static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
3063{ 3099{
3064 struct sk_buff *skb; 3100 struct sk_buff *skb;
@@ -3784,6 +3820,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
3784 .new_mapping = pfkey_send_new_mapping, 3820 .new_mapping = pfkey_send_new_mapping,
3785 .notify_policy = pfkey_send_policy_notify, 3821 .notify_policy = pfkey_send_policy_notify,
3786 .migrate = pfkey_send_migrate, 3822 .migrate = pfkey_send_migrate,
3823 .is_alive = pfkey_is_alive,
3787}; 3824};
3788 3825
3789static int __net_init pfkey_net_init(struct net *net) 3826static int __net_init pfkey_net_init(struct net *net)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4b98b25793c5..bb3669d973a7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -39,8 +39,6 @@
39#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) 39#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
40#define XFRM_MAX_QUEUE_LEN 100 40#define XFRM_MAX_QUEUE_LEN 100
41 41
42static struct dst_entry *xfrm_policy_sk_bundles;
43
44static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); 42static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
45static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] 43static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
46 __read_mostly; 44 __read_mostly;
@@ -661,7 +659,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
661 hlist_add_head(&policy->bydst, chain); 659 hlist_add_head(&policy->bydst, chain);
662 xfrm_pol_hold(policy); 660 xfrm_pol_hold(policy);
663 net->xfrm.policy_count[dir]++; 661 net->xfrm.policy_count[dir]++;
664 atomic_inc(&flow_cache_genid); 662 atomic_inc(&net->xfrm.flow_cache_genid);
665 663
666 /* After previous checking, family can either be AF_INET or AF_INET6 */ 664 /* After previous checking, family can either be AF_INET or AF_INET6 */
667 if (policy->family == AF_INET) 665 if (policy->family == AF_INET)
@@ -2109,13 +2107,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2109 goto no_transform; 2107 goto no_transform;
2110 } 2108 }
2111 2109
2112 dst_hold(&xdst->u.dst);
2113
2114 spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
2115 xdst->u.dst.next = xfrm_policy_sk_bundles;
2116 xfrm_policy_sk_bundles = &xdst->u.dst;
2117 spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
2118
2119 route = xdst->route; 2110 route = xdst->route;
2120 } 2111 }
2121 } 2112 }
@@ -2549,33 +2540,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2549 return dst; 2540 return dst;
2550} 2541}
2551 2542
2552static void __xfrm_garbage_collect(struct net *net)
2553{
2554 struct dst_entry *head, *next;
2555
2556 spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
2557 head = xfrm_policy_sk_bundles;
2558 xfrm_policy_sk_bundles = NULL;
2559 spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
2560
2561 while (head) {
2562 next = head->next;
2563 dst_free(head);
2564 head = next;
2565 }
2566}
2567
2568void xfrm_garbage_collect(struct net *net) 2543void xfrm_garbage_collect(struct net *net)
2569{ 2544{
2570 flow_cache_flush(); 2545 flow_cache_flush(net);
2571 __xfrm_garbage_collect(net);
2572} 2546}
2573EXPORT_SYMBOL(xfrm_garbage_collect); 2547EXPORT_SYMBOL(xfrm_garbage_collect);
2574 2548
2575static void xfrm_garbage_collect_deferred(struct net *net) 2549static void xfrm_garbage_collect_deferred(struct net *net)
2576{ 2550{
2577 flow_cache_flush_deferred(); 2551 flow_cache_flush_deferred(net);
2578 __xfrm_garbage_collect(net);
2579} 2552}
2580 2553
2581static void xfrm_init_pmtu(struct dst_entry *dst) 2554static void xfrm_init_pmtu(struct dst_entry *dst)
@@ -2944,9 +2917,9 @@ static int __net_init xfrm_net_init(struct net *net)
2944 /* Initialize the per-net locks here */ 2917 /* Initialize the per-net locks here */
2945 spin_lock_init(&net->xfrm.xfrm_state_lock); 2918 spin_lock_init(&net->xfrm.xfrm_state_lock);
2946 rwlock_init(&net->xfrm.xfrm_policy_lock); 2919 rwlock_init(&net->xfrm.xfrm_policy_lock);
2947 spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock);
2948 mutex_init(&net->xfrm.xfrm_cfg_mutex); 2920 mutex_init(&net->xfrm.xfrm_cfg_mutex);
2949 2921
2922 flow_cache_init(net);
2950 return 0; 2923 return 0;
2951 2924
2952out_sysctl: 2925out_sysctl:
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a26b7aa79475..5339c26bb0cf 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -161,6 +161,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
161int __xfrm_state_delete(struct xfrm_state *x); 161int __xfrm_state_delete(struct xfrm_state *x);
162 162
163int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); 163int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
164bool km_is_alive(const struct km_event *c);
164void km_state_expired(struct xfrm_state *x, int hard, u32 portid); 165void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
165 166
166static DEFINE_SPINLOCK(xfrm_type_lock); 167static DEFINE_SPINLOCK(xfrm_type_lock);
@@ -788,6 +789,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
788 struct xfrm_state *best = NULL; 789 struct xfrm_state *best = NULL;
789 u32 mark = pol->mark.v & pol->mark.m; 790 u32 mark = pol->mark.v & pol->mark.m;
790 unsigned short encap_family = tmpl->encap_family; 791 unsigned short encap_family = tmpl->encap_family;
792 struct km_event c;
791 793
792 to_put = NULL; 794 to_put = NULL;
793 795
@@ -832,6 +834,17 @@ found:
832 error = -EEXIST; 834 error = -EEXIST;
833 goto out; 835 goto out;
834 } 836 }
837
838 c.net = net;
839 /* If the KMs have no listeners (yet...), avoid allocating an SA
840 * for each and every packet - garbage collection might not
841 * handle the flood.
842 */
843 if (!km_is_alive(&c)) {
844 error = -ESRCH;
845 goto out;
846 }
847
835 x = xfrm_state_alloc(net); 848 x = xfrm_state_alloc(net);
836 if (x == NULL) { 849 if (x == NULL) {
837 error = -ENOMEM; 850 error = -ENOMEM;
@@ -1135,10 +1148,9 @@ out:
1135EXPORT_SYMBOL(xfrm_state_add); 1148EXPORT_SYMBOL(xfrm_state_add);
1136 1149
1137#ifdef CONFIG_XFRM_MIGRATE 1150#ifdef CONFIG_XFRM_MIGRATE
1138static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) 1151static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
1139{ 1152{
1140 struct net *net = xs_net(orig); 1153 struct net *net = xs_net(orig);
1141 int err = -ENOMEM;
1142 struct xfrm_state *x = xfrm_state_alloc(net); 1154 struct xfrm_state *x = xfrm_state_alloc(net);
1143 if (!x) 1155 if (!x)
1144 goto out; 1156 goto out;
@@ -1187,15 +1199,13 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1187 } 1199 }
1188 1200
1189 if (orig->replay_esn) { 1201 if (orig->replay_esn) {
1190 err = xfrm_replay_clone(x, orig); 1202 if (xfrm_replay_clone(x, orig))
1191 if (err)
1192 goto error; 1203 goto error;
1193 } 1204 }
1194 1205
1195 memcpy(&x->mark, &orig->mark, sizeof(x->mark)); 1206 memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1196 1207
1197 err = xfrm_init_state(x); 1208 if (xfrm_init_state(x) < 0)
1198 if (err)
1199 goto error; 1209 goto error;
1200 1210
1201 x->props.flags = orig->props.flags; 1211 x->props.flags = orig->props.flags;
@@ -1210,8 +1220,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1210 error: 1220 error:
1211 xfrm_state_put(x); 1221 xfrm_state_put(x);
1212out: 1222out:
1213 if (errp)
1214 *errp = err;
1215 return NULL; 1223 return NULL;
1216} 1224}
1217 1225
@@ -1263,9 +1271,8 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
1263 struct xfrm_migrate *m) 1271 struct xfrm_migrate *m)
1264{ 1272{
1265 struct xfrm_state *xc; 1273 struct xfrm_state *xc;
1266 int err;
1267 1274
1268 xc = xfrm_state_clone(x, &err); 1275 xc = xfrm_state_clone(x);
1269 if (!xc) 1276 if (!xc)
1270 return NULL; 1277 return NULL;
1271 1278
@@ -1278,7 +1285,7 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
1278 state is to be updated as it is a part of triplet */ 1285 state is to be updated as it is a part of triplet */
1279 xfrm_state_insert(xc); 1286 xfrm_state_insert(xc);
1280 } else { 1287 } else {
1281 if ((err = xfrm_state_add(xc)) < 0) 1288 if (xfrm_state_add(xc) < 0)
1282 goto error; 1289 goto error;
1283 } 1290 }
1284 1291
@@ -1590,6 +1597,23 @@ unlock:
1590} 1597}
1591EXPORT_SYMBOL(xfrm_alloc_spi); 1598EXPORT_SYMBOL(xfrm_alloc_spi);
1592 1599
1600static bool __xfrm_state_filter_match(struct xfrm_state *x,
1601 struct xfrm_filter *filter)
1602{
1603 if (filter) {
1604 if ((filter->family == AF_INET ||
1605 filter->family == AF_INET6) &&
1606 x->props.family != filter->family)
1607 return false;
1608
1609 return addr_match(&x->props.saddr, &filter->saddr,
1610 filter->splen) &&
1611 addr_match(&x->id.daddr, &filter->daddr,
1612 filter->dplen);
1613 }
1614 return true;
1615}
1616
1593int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, 1617int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1594 int (*func)(struct xfrm_state *, int, void*), 1618 int (*func)(struct xfrm_state *, int, void*),
1595 void *data) 1619 void *data)
@@ -1612,6 +1636,8 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1612 state = container_of(x, struct xfrm_state, km); 1636 state = container_of(x, struct xfrm_state, km);
1613 if (!xfrm_id_proto_match(state->id.proto, walk->proto)) 1637 if (!xfrm_id_proto_match(state->id.proto, walk->proto))
1614 continue; 1638 continue;
1639 if (!__xfrm_state_filter_match(state, walk->filter))
1640 continue;
1615 err = func(state, walk->seq, data); 1641 err = func(state, walk->seq, data);
1616 if (err) { 1642 if (err) {
1617 list_move_tail(&walk->all, &x->all); 1643 list_move_tail(&walk->all, &x->all);
@@ -1630,17 +1656,21 @@ out:
1630} 1656}
1631EXPORT_SYMBOL(xfrm_state_walk); 1657EXPORT_SYMBOL(xfrm_state_walk);
1632 1658
1633void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) 1659void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
1660 struct xfrm_filter *filter)
1634{ 1661{
1635 INIT_LIST_HEAD(&walk->all); 1662 INIT_LIST_HEAD(&walk->all);
1636 walk->proto = proto; 1663 walk->proto = proto;
1637 walk->state = XFRM_STATE_DEAD; 1664 walk->state = XFRM_STATE_DEAD;
1638 walk->seq = 0; 1665 walk->seq = 0;
1666 walk->filter = filter;
1639} 1667}
1640EXPORT_SYMBOL(xfrm_state_walk_init); 1668EXPORT_SYMBOL(xfrm_state_walk_init);
1641 1669
1642void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net) 1670void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
1643{ 1671{
1672 kfree(walk->filter);
1673
1644 if (list_empty(&walk->all)) 1674 if (list_empty(&walk->all))
1645 return; 1675 return;
1646 1676
@@ -1793,6 +1823,24 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
1793} 1823}
1794EXPORT_SYMBOL(km_report); 1824EXPORT_SYMBOL(km_report);
1795 1825
1826bool km_is_alive(const struct km_event *c)
1827{
1828 struct xfrm_mgr *km;
1829 bool is_alive = false;
1830
1831 rcu_read_lock();
1832 list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1833 if (km->is_alive && km->is_alive(c)) {
1834 is_alive = true;
1835 break;
1836 }
1837 }
1838 rcu_read_unlock();
1839
1840 return is_alive;
1841}
1842EXPORT_SYMBOL(km_is_alive);
1843
1796int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) 1844int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1797{ 1845{
1798 int err; 1846 int err;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1ae3ec7c18b0..023e5e7ea4c6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -142,7 +142,8 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
142 if (!rt) 142 if (!rt)
143 return 0; 143 return 0;
144 144
145 if (p->id.proto != IPPROTO_ESP) 145 /* As only ESP and AH support ESN feature. */
146 if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
146 return -EINVAL; 147 return -EINVAL;
147 148
148 if (p->replay_window != 0) 149 if (p->replay_window != 0)
@@ -886,6 +887,7 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb)
886 return 0; 887 return 0;
887} 888}
888 889
890static const struct nla_policy xfrma_policy[XFRMA_MAX+1];
889static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) 891static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
890{ 892{
891 struct net *net = sock_net(skb->sk); 893 struct net *net = sock_net(skb->sk);
@@ -901,8 +903,31 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
901 info.nlmsg_flags = NLM_F_MULTI; 903 info.nlmsg_flags = NLM_F_MULTI;
902 904
903 if (!cb->args[0]) { 905 if (!cb->args[0]) {
906 struct nlattr *attrs[XFRMA_MAX+1];
907 struct xfrm_filter *filter = NULL;
908 u8 proto = 0;
909 int err;
910
904 cb->args[0] = 1; 911 cb->args[0] = 1;
905 xfrm_state_walk_init(walk, 0); 912
913 err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
914 xfrma_policy);
915 if (err < 0)
916 return err;
917
918 if (attrs[XFRMA_FILTER]) {
919 filter = kmalloc(sizeof(*filter), GFP_KERNEL);
920 if (filter == NULL)
921 return -ENOMEM;
922
923 memcpy(filter, nla_data(attrs[XFRMA_FILTER]),
924 sizeof(*filter));
925 }
926
927 if (attrs[XFRMA_PROTO])
928 proto = nla_get_u8(attrs[XFRMA_PROTO]);
929
930 xfrm_state_walk_init(walk, proto, filter);
906 } 931 }
907 932
908 (void) xfrm_state_walk(net, walk, dump_one_state, &info); 933 (void) xfrm_state_walk(net, walk, dump_one_state, &info);
@@ -2308,6 +2333,8 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
2308 [XFRMA_TFCPAD] = { .type = NLA_U32 }, 2333 [XFRMA_TFCPAD] = { .type = NLA_U32 },
2309 [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, 2334 [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
2310 [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, 2335 [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
2336 [XFRMA_PROTO] = { .type = NLA_U8 },
2337 [XFRMA_FILTER] = { .len = sizeof(struct xfrm_filter) },
2311}; 2338};
2312 2339
2313static const struct xfrm_link { 2340static const struct xfrm_link {
@@ -2981,6 +3008,11 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
2981 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); 3008 return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
2982} 3009}
2983 3010
3011static bool xfrm_is_alive(const struct km_event *c)
3012{
3013 return (bool)xfrm_acquire_is_on(c->net);
3014}
3015
2984static struct xfrm_mgr netlink_mgr = { 3016static struct xfrm_mgr netlink_mgr = {
2985 .id = "netlink", 3017 .id = "netlink",
2986 .notify = xfrm_send_state_notify, 3018 .notify = xfrm_send_state_notify,
@@ -2990,6 +3022,7 @@ static struct xfrm_mgr netlink_mgr = {
2990 .report = xfrm_send_report, 3022 .report = xfrm_send_report,
2991 .migrate = xfrm_send_migrate, 3023 .migrate = xfrm_send_migrate,
2992 .new_mapping = xfrm_send_mapping, 3024 .new_mapping = xfrm_send_mapping,
3025 .is_alive = xfrm_is_alive,
2993}; 3026};
2994 3027
2995static int __net_init xfrm_user_net_init(struct net *net) 3028static int __net_init xfrm_user_net_init(struct net *net)
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 48c3cc94c168..dfe3fda7e5f7 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -45,10 +45,11 @@ static inline void selinux_xfrm_notify_policyload(void)
45{ 45{
46 struct net *net; 46 struct net *net;
47 47
48 atomic_inc(&flow_cache_genid);
49 rtnl_lock(); 48 rtnl_lock();
50 for_each_net(net) 49 for_each_net(net) {
50 atomic_inc(&net->xfrm.flow_cache_genid);
51 rt_genid_bump_all(net); 51 rt_genid_bump_all(net);
52 }
52 rtnl_unlock(); 53 rtnl_unlock();
53} 54}
54#else 55#else