diff options
author | Eric Dumazet <edumazet@google.com> | 2015-03-11 21:53:14 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-03-11 21:55:28 -0400 |
commit | 33cf7c90fe2f97afb1cadaa0cfb782cb9d1b9ee2 (patch) | |
tree | 7a0c80d0b2bb618919d966ce5b827c7eb8f843f6 | |
parent | 654eff45166c7e89d18fc476325c975768b2e347 (diff) |
net: add real socket cookies
A long standing problem in netlink socket dumps is the use
of kernel socket addresses as cookies.
1) It is a security concern.
2) Sockets can be reused quite quickly, so there is
no guarantee a cookie is used once and identify
a flow.
3) request sock, establish sock, and timewait socks
for a given flow have different cookies.
Part of our effort to bring better TCP statistics requires
to switch to a different allocator.
In this patch, I chose to use a per network namespace 64bit generator,
and to use it only in the case a socket needs to be dumped to netlink.
(This might be refined later if needed)
Note that I tried to carry cookies from request sock, to establish sock,
then timewait sockets.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eric Salo <salo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/sock_diag.h | 4 | ||||
-rw-r--r-- | include/net/inet_sock.h | 2 | ||||
-rw-r--r-- | include/net/inet_timewait_sock.h | 1 | ||||
-rw-r--r-- | include/net/net_namespace.h | 2 | ||||
-rw-r--r-- | include/net/sock.h | 3 | ||||
-rw-r--r-- | net/core/sock.c | 1 | ||||
-rw-r--r-- | net/core/sock_diag.c | 37 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 14 | ||||
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 1 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 2 |
13 files changed, 55 insertions, 17 deletions
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index b5ad7d35a636..083ac388098e 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h | |||
@@ -19,8 +19,8 @@ void sock_diag_unregister(const struct sock_diag_handler *h); | |||
19 | void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); | 19 | void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); |
20 | void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); | 20 | void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); |
21 | 21 | ||
22 | int sock_diag_check_cookie(void *sk, const __u32 *cookie); | 22 | int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); |
23 | void sock_diag_save_cookie(void *sk, __u32 *cookie); | 23 | void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); |
24 | 24 | ||
25 | int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); | 25 | int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); |
26 | int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, | 26 | int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, |
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index eb16c7beed1e..e565afdc14ad 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h | |||
@@ -77,6 +77,8 @@ struct inet_request_sock { | |||
77 | #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr | 77 | #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr |
78 | #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr | 78 | #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr |
79 | #define ir_iif req.__req_common.skc_bound_dev_if | 79 | #define ir_iif req.__req_common.skc_bound_dev_if |
80 | #define ir_cookie req.__req_common.skc_cookie | ||
81 | #define ireq_net req.__req_common.skc_net | ||
80 | 82 | ||
81 | kmemcheck_bitfield_begin(flags); | 83 | kmemcheck_bitfield_begin(flags); |
82 | u16 snd_wscale : 4, | 84 | u16 snd_wscale : 4, |
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 6c566034e26d..b7ce1003c429 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h | |||
@@ -122,6 +122,7 @@ struct inet_timewait_sock { | |||
122 | #define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr | 122 | #define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr |
123 | #define tw_dport __tw_common.skc_dport | 123 | #define tw_dport __tw_common.skc_dport |
124 | #define tw_num __tw_common.skc_num | 124 | #define tw_num __tw_common.skc_num |
125 | #define tw_cookie __tw_common.skc_cookie | ||
125 | 126 | ||
126 | int tw_timeout; | 127 | int tw_timeout; |
127 | volatile unsigned char tw_substate; | 128 | volatile unsigned char tw_substate; |
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2cb9acb618e9..e086f4030dd2 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h | |||
@@ -56,6 +56,8 @@ struct net { | |||
56 | #endif | 56 | #endif |
57 | spinlock_t rules_mod_lock; | 57 | spinlock_t rules_mod_lock; |
58 | 58 | ||
59 | atomic64_t cookie_gen; | ||
60 | |||
59 | struct list_head list; /* list of network namespaces */ | 61 | struct list_head list; /* list of network namespaces */ |
60 | struct list_head cleanup_list; /* namespaces on death row */ | 62 | struct list_head cleanup_list; /* namespaces on death row */ |
61 | struct list_head exit_list; /* Use only net_mutex */ | 63 | struct list_head exit_list; /* Use only net_mutex */ |
diff --git a/include/net/sock.h b/include/net/sock.h index 250822cc1e02..d996c633bec2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -199,6 +199,8 @@ struct sock_common { | |||
199 | struct in6_addr skc_v6_rcv_saddr; | 199 | struct in6_addr skc_v6_rcv_saddr; |
200 | #endif | 200 | #endif |
201 | 201 | ||
202 | atomic64_t skc_cookie; | ||
203 | |||
202 | /* | 204 | /* |
203 | * fields between dontcopy_begin/dontcopy_end | 205 | * fields between dontcopy_begin/dontcopy_end |
204 | * are not copied in sock_copy() | 206 | * are not copied in sock_copy() |
@@ -329,6 +331,7 @@ struct sock { | |||
329 | #define sk_net __sk_common.skc_net | 331 | #define sk_net __sk_common.skc_net |
330 | #define sk_v6_daddr __sk_common.skc_v6_daddr | 332 | #define sk_v6_daddr __sk_common.skc_v6_daddr |
331 | #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr | 333 | #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr |
334 | #define sk_cookie __sk_common.skc_cookie | ||
332 | 335 | ||
333 | socket_lock_t sk_lock; | 336 | socket_lock_t sk_lock; |
334 | struct sk_buff_head sk_receive_queue; | 337 | struct sk_buff_head sk_receive_queue; |
diff --git a/net/core/sock.c b/net/core/sock.c index 726e1f99aa8d..a9a9c2ff9260 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1538,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | |||
1538 | newsk->sk_err = 0; | 1538 | newsk->sk_err = 0; |
1539 | newsk->sk_priority = 0; | 1539 | newsk->sk_priority = 0; |
1540 | newsk->sk_incoming_cpu = raw_smp_processor_id(); | 1540 | newsk->sk_incoming_cpu = raw_smp_processor_id(); |
1541 | atomic64_set(&newsk->sk_cookie, 0); | ||
1541 | /* | 1542 | /* |
1542 | * Before updating sk_refcnt, we must commit prior changes to memory | 1543 | * Before updating sk_refcnt, we must commit prior changes to memory |
1543 | * (Documentation/RCU/rculist_nulls.txt for details) | 1544 | * (Documentation/RCU/rculist_nulls.txt for details) |
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 96e70ee05a8d..74dddf84adcd 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c | |||
@@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; | |||
13 | static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); | 13 | static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); |
14 | static DEFINE_MUTEX(sock_diag_table_mutex); | 14 | static DEFINE_MUTEX(sock_diag_table_mutex); |
15 | 15 | ||
16 | int sock_diag_check_cookie(void *sk, const __u32 *cookie) | 16 | static u64 sock_gen_cookie(struct sock *sk) |
17 | { | 17 | { |
18 | if ((cookie[0] != INET_DIAG_NOCOOKIE || | 18 | while (1) { |
19 | cookie[1] != INET_DIAG_NOCOOKIE) && | 19 | u64 res = atomic64_read(&sk->sk_cookie); |
20 | ((u32)(unsigned long)sk != cookie[0] || | 20 | |
21 | (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) | 21 | if (res) |
22 | return -ESTALE; | 22 | return res; |
23 | else | 23 | res = atomic64_inc_return(&sock_net(sk)->cookie_gen); |
24 | atomic64_cmpxchg(&sk->sk_cookie, 0, res); | ||
25 | } | ||
26 | } | ||
27 | |||
28 | int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie) | ||
29 | { | ||
30 | u64 res; | ||
31 | |||
32 | if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE) | ||
24 | return 0; | 33 | return 0; |
34 | |||
35 | res = sock_gen_cookie(sk); | ||
36 | if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1]) | ||
37 | return -ESTALE; | ||
38 | |||
39 | return 0; | ||
25 | } | 40 | } |
26 | EXPORT_SYMBOL_GPL(sock_diag_check_cookie); | 41 | EXPORT_SYMBOL_GPL(sock_diag_check_cookie); |
27 | 42 | ||
28 | void sock_diag_save_cookie(void *sk, __u32 *cookie) | 43 | void sock_diag_save_cookie(struct sock *sk, __u32 *cookie) |
29 | { | 44 | { |
30 | cookie[0] = (u32)(unsigned long)sk; | 45 | u64 res = sock_gen_cookie(sk); |
31 | cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); | 46 | |
47 | cookie[0] = (u32)res; | ||
48 | cookie[1] = (u32)(res >> 32); | ||
32 | } | 49 | } |
33 | EXPORT_SYMBOL_GPL(sock_diag_save_cookie); | 50 | EXPORT_SYMBOL_GPL(sock_diag_save_cookie); |
34 | 51 | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e45b968613a4..207281ae3536 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -641,6 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
641 | ireq = inet_rsk(req); | 641 | ireq = inet_rsk(req); |
642 | ireq->ir_loc_addr = ip_hdr(skb)->daddr; | 642 | ireq->ir_loc_addr = ip_hdr(skb)->daddr; |
643 | ireq->ir_rmt_addr = ip_hdr(skb)->saddr; | 643 | ireq->ir_rmt_addr = ip_hdr(skb)->saddr; |
644 | ireq->ireq_net = sock_net(sk); | ||
645 | atomic64_set(&ireq->ir_cookie, 0); | ||
644 | 646 | ||
645 | /* | 647 | /* |
646 | * Step 3: Process LISTEN state | 648 | * Step 3: Process LISTEN state |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 14d02ea905b6..34581f928afa 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -678,6 +678,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, | |||
678 | newsk->sk_write_space = sk_stream_write_space; | 678 | newsk->sk_write_space = sk_stream_write_space; |
679 | 679 | ||
680 | newsk->sk_mark = inet_rsk(req)->ir_mark; | 680 | newsk->sk_mark = inet_rsk(req)->ir_mark; |
681 | atomic64_set(&newsk->sk_cookie, | ||
682 | atomic64_read(&inet_rsk(req)->ir_cookie)); | ||
681 | 683 | ||
682 | newicsk->icsk_retransmits = 0; | 684 | newicsk->icsk_retransmits = 0; |
683 | newicsk->icsk_backoff = 0; | 685 | newicsk->icsk_backoff = 0; |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ac3bfb458afd..29317ff4a007 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -221,12 +221,13 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
221 | user_ns, portid, seq, nlmsg_flags, unlh); | 221 | user_ns, portid, seq, nlmsg_flags, unlh); |
222 | } | 222 | } |
223 | 223 | ||
224 | static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | 224 | static int inet_twsk_diag_fill(struct sock *sk, |
225 | struct sk_buff *skb, | 225 | struct sk_buff *skb, |
226 | const struct inet_diag_req_v2 *req, | 226 | const struct inet_diag_req_v2 *req, |
227 | u32 portid, u32 seq, u16 nlmsg_flags, | 227 | u32 portid, u32 seq, u16 nlmsg_flags, |
228 | const struct nlmsghdr *unlh) | 228 | const struct nlmsghdr *unlh) |
229 | { | 229 | { |
230 | struct inet_timewait_sock *tw = inet_twsk(sk); | ||
230 | struct inet_diag_msg *r; | 231 | struct inet_diag_msg *r; |
231 | struct nlmsghdr *nlh; | 232 | struct nlmsghdr *nlh; |
232 | s32 tmo; | 233 | s32 tmo; |
@@ -247,7 +248,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | |||
247 | r->idiag_retrans = 0; | 248 | r->idiag_retrans = 0; |
248 | 249 | ||
249 | r->id.idiag_if = tw->tw_bound_dev_if; | 250 | r->id.idiag_if = tw->tw_bound_dev_if; |
250 | sock_diag_save_cookie(tw, r->id.idiag_cookie); | 251 | sock_diag_save_cookie(sk, r->id.idiag_cookie); |
251 | 252 | ||
252 | r->id.idiag_sport = tw->tw_sport; | 253 | r->id.idiag_sport = tw->tw_sport; |
253 | r->id.idiag_dport = tw->tw_dport; | 254 | r->id.idiag_dport = tw->tw_dport; |
@@ -283,7 +284,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, | |||
283 | const struct nlmsghdr *unlh) | 284 | const struct nlmsghdr *unlh) |
284 | { | 285 | { |
285 | if (sk->sk_state == TCP_TIME_WAIT) | 286 | if (sk->sk_state == TCP_TIME_WAIT) |
286 | return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq, | 287 | return inet_twsk_diag_fill(sk, skb, r, portid, seq, |
287 | nlmsg_flags, unlh); | 288 | nlmsg_flags, unlh); |
288 | 289 | ||
289 | return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, | 290 | return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, |
@@ -675,7 +676,7 @@ static int inet_twsk_diag_dump(struct sock *sk, | |||
675 | if (!inet_diag_bc_sk(bc, sk)) | 676 | if (!inet_diag_bc_sk(bc, sk)) |
676 | return 0; | 677 | return 0; |
677 | 678 | ||
678 | return inet_twsk_diag_fill(inet_twsk(sk), skb, r, | 679 | return inet_twsk_diag_fill(sk, skb, r, |
679 | NETLINK_CB(cb->skb).portid, | 680 | NETLINK_CB(cb->skb).portid, |
680 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); | 681 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); |
681 | } | 682 | } |
@@ -734,7 +735,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
734 | r->idiag_retrans = req->num_retrans; | 735 | r->idiag_retrans = req->num_retrans; |
735 | 736 | ||
736 | r->id.idiag_if = sk->sk_bound_dev_if; | 737 | r->id.idiag_if = sk->sk_bound_dev_if; |
737 | sock_diag_save_cookie(req, r->id.idiag_cookie); | 738 | |
739 | BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != | ||
740 | offsetof(struct sock, sk_cookie)); | ||
741 | sock_diag_save_cookie((struct sock *)ireq, r->id.idiag_cookie); | ||
738 | 742 | ||
739 | tmo = req->expires - jiffies; | 743 | tmo = req->expires - jiffies; |
740 | if (tmo < 0) | 744 | if (tmo < 0) |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 6d592f8555fb..2bd980526631 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -195,6 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat | |||
195 | tw->tw_ipv6only = 0; | 195 | tw->tw_ipv6only = 0; |
196 | tw->tw_transparent = inet->transparent; | 196 | tw->tw_transparent = inet->transparent; |
197 | tw->tw_prot = sk->sk_prot_creator; | 197 | tw->tw_prot = sk->sk_prot_creator; |
198 | atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); | ||
198 | twsk_net_set(tw, hold_net(sock_net(sk))); | 199 | twsk_net_set(tw, hold_net(sock_net(sk))); |
199 | /* | 200 | /* |
200 | * Because we use RCU lookups, we should not set tw_refcnt | 201 | * Because we use RCU lookups, we should not set tw_refcnt |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 45fe60c5238e..ece31b426013 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -346,6 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) | |||
346 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; | 346 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; |
347 | treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; | 347 | treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; |
348 | treq->listener = NULL; | 348 | treq->listener = NULL; |
349 | ireq->ireq_net = sock_net(sk); | ||
349 | 350 | ||
350 | /* We throwed the options of the initial SYN away, so we hope | 351 | /* We throwed the options of the initial SYN away, so we hope |
351 | * the ACK carries the same options again (see RFC1122 4.2.3.8) | 352 | * the ACK carries the same options again (see RFC1122 4.2.3.8) |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fb4cf8b8e121..d7045f5f6ebf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -5965,6 +5965,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
5965 | 5965 | ||
5966 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | 5966 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; |
5967 | tcp_openreq_init(req, &tmp_opt, skb, sk); | 5967 | tcp_openreq_init(req, &tmp_opt, skb, sk); |
5968 | inet_rsk(req)->ireq_net = sock_net(sk); | ||
5969 | atomic64_set(&inet_rsk(req)->ir_cookie, 0); | ||
5968 | 5970 | ||
5969 | af_ops->init_req(req, sk, skb); | 5971 | af_ops->init_req(req, sk, skb); |
5970 | 5972 | ||