diff options
| author | Eric Dumazet <edumazet@google.com> | 2015-03-11 21:53:14 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2015-03-11 21:55:28 -0400 |
| commit | 33cf7c90fe2f97afb1cadaa0cfb782cb9d1b9ee2 (patch) | |
| tree | 7a0c80d0b2bb618919d966ce5b827c7eb8f843f6 /net | |
| parent | 654eff45166c7e89d18fc476325c975768b2e347 (diff) | |
net: add real socket cookies
A long standing problem in netlink socket dumps is the use
of kernel socket addresses as cookies.
1) It is a security concern.
2) Sockets can be reused quite quickly, so there is
no guarantee a cookie is used once and identify
a flow.
3) request sock, establish sock, and timewait socks
for a given flow have different cookies.
Part of our effort to bring better TCP statistics requires
to switch to a different allocator.
In this patch, I chose to use a per network namespace 64bit generator,
and to use it only in the case a socket needs to be dumped to netlink.
(This might be refined later if needed)
Note that I tried to carry cookies from request sock, to establish sock,
then timewait sockets.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eric Salo <salo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
| -rw-r--r-- | net/core/sock.c | 1 | ||||
| -rw-r--r-- | net/core/sock_diag.c | 37 | ||||
| -rw-r--r-- | net/dccp/ipv4.c | 2 | ||||
| -rw-r--r-- | net/ipv4/inet_connection_sock.c | 2 | ||||
| -rw-r--r-- | net/ipv4/inet_diag.c | 14 | ||||
| -rw-r--r-- | net/ipv4/inet_timewait_sock.c | 1 | ||||
| -rw-r--r-- | net/ipv4/syncookies.c | 1 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 2 |
8 files changed, 45 insertions, 15 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 726e1f99aa8d..a9a9c2ff9260 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -1538,6 +1538,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | |||
| 1538 | newsk->sk_err = 0; | 1538 | newsk->sk_err = 0; |
| 1539 | newsk->sk_priority = 0; | 1539 | newsk->sk_priority = 0; |
| 1540 | newsk->sk_incoming_cpu = raw_smp_processor_id(); | 1540 | newsk->sk_incoming_cpu = raw_smp_processor_id(); |
| 1541 | atomic64_set(&newsk->sk_cookie, 0); | ||
| 1541 | /* | 1542 | /* |
| 1542 | * Before updating sk_refcnt, we must commit prior changes to memory | 1543 | * Before updating sk_refcnt, we must commit prior changes to memory |
| 1543 | * (Documentation/RCU/rculist_nulls.txt for details) | 1544 | * (Documentation/RCU/rculist_nulls.txt for details) |
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 96e70ee05a8d..74dddf84adcd 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c | |||
| @@ -13,22 +13,39 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; | |||
| 13 | static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); | 13 | static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); |
| 14 | static DEFINE_MUTEX(sock_diag_table_mutex); | 14 | static DEFINE_MUTEX(sock_diag_table_mutex); |
| 15 | 15 | ||
| 16 | int sock_diag_check_cookie(void *sk, const __u32 *cookie) | 16 | static u64 sock_gen_cookie(struct sock *sk) |
| 17 | { | 17 | { |
| 18 | if ((cookie[0] != INET_DIAG_NOCOOKIE || | 18 | while (1) { |
| 19 | cookie[1] != INET_DIAG_NOCOOKIE) && | 19 | u64 res = atomic64_read(&sk->sk_cookie); |
| 20 | ((u32)(unsigned long)sk != cookie[0] || | 20 | |
| 21 | (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1])) | 21 | if (res) |
| 22 | return -ESTALE; | 22 | return res; |
| 23 | else | 23 | res = atomic64_inc_return(&sock_net(sk)->cookie_gen); |
| 24 | atomic64_cmpxchg(&sk->sk_cookie, 0, res); | ||
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie) | ||
| 29 | { | ||
| 30 | u64 res; | ||
| 31 | |||
| 32 | if (cookie[0] == INET_DIAG_NOCOOKIE && cookie[1] == INET_DIAG_NOCOOKIE) | ||
| 24 | return 0; | 33 | return 0; |
| 34 | |||
| 35 | res = sock_gen_cookie(sk); | ||
| 36 | if ((u32)res != cookie[0] || (u32)(res >> 32) != cookie[1]) | ||
| 37 | return -ESTALE; | ||
| 38 | |||
| 39 | return 0; | ||
| 25 | } | 40 | } |
| 26 | EXPORT_SYMBOL_GPL(sock_diag_check_cookie); | 41 | EXPORT_SYMBOL_GPL(sock_diag_check_cookie); |
| 27 | 42 | ||
| 28 | void sock_diag_save_cookie(void *sk, __u32 *cookie) | 43 | void sock_diag_save_cookie(struct sock *sk, __u32 *cookie) |
| 29 | { | 44 | { |
| 30 | cookie[0] = (u32)(unsigned long)sk; | 45 | u64 res = sock_gen_cookie(sk); |
| 31 | cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); | 46 | |
| 47 | cookie[0] = (u32)res; | ||
| 48 | cookie[1] = (u32)(res >> 32); | ||
| 32 | } | 49 | } |
| 33 | EXPORT_SYMBOL_GPL(sock_diag_save_cookie); | 50 | EXPORT_SYMBOL_GPL(sock_diag_save_cookie); |
| 34 | 51 | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e45b968613a4..207281ae3536 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
| @@ -641,6 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 641 | ireq = inet_rsk(req); | 641 | ireq = inet_rsk(req); |
| 642 | ireq->ir_loc_addr = ip_hdr(skb)->daddr; | 642 | ireq->ir_loc_addr = ip_hdr(skb)->daddr; |
| 643 | ireq->ir_rmt_addr = ip_hdr(skb)->saddr; | 643 | ireq->ir_rmt_addr = ip_hdr(skb)->saddr; |
| 644 | ireq->ireq_net = sock_net(sk); | ||
| 645 | atomic64_set(&ireq->ir_cookie, 0); | ||
| 644 | 646 | ||
| 645 | /* | 647 | /* |
| 646 | * Step 3: Process LISTEN state | 648 | * Step 3: Process LISTEN state |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 14d02ea905b6..34581f928afa 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
| @@ -678,6 +678,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, | |||
| 678 | newsk->sk_write_space = sk_stream_write_space; | 678 | newsk->sk_write_space = sk_stream_write_space; |
| 679 | 679 | ||
| 680 | newsk->sk_mark = inet_rsk(req)->ir_mark; | 680 | newsk->sk_mark = inet_rsk(req)->ir_mark; |
| 681 | atomic64_set(&newsk->sk_cookie, | ||
| 682 | atomic64_read(&inet_rsk(req)->ir_cookie)); | ||
| 681 | 683 | ||
| 682 | newicsk->icsk_retransmits = 0; | 684 | newicsk->icsk_retransmits = 0; |
| 683 | newicsk->icsk_backoff = 0; | 685 | newicsk->icsk_backoff = 0; |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ac3bfb458afd..29317ff4a007 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
| @@ -221,12 +221,13 @@ static int inet_csk_diag_fill(struct sock *sk, | |||
| 221 | user_ns, portid, seq, nlmsg_flags, unlh); | 221 | user_ns, portid, seq, nlmsg_flags, unlh); |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | 224 | static int inet_twsk_diag_fill(struct sock *sk, |
| 225 | struct sk_buff *skb, | 225 | struct sk_buff *skb, |
| 226 | const struct inet_diag_req_v2 *req, | 226 | const struct inet_diag_req_v2 *req, |
| 227 | u32 portid, u32 seq, u16 nlmsg_flags, | 227 | u32 portid, u32 seq, u16 nlmsg_flags, |
| 228 | const struct nlmsghdr *unlh) | 228 | const struct nlmsghdr *unlh) |
| 229 | { | 229 | { |
| 230 | struct inet_timewait_sock *tw = inet_twsk(sk); | ||
| 230 | struct inet_diag_msg *r; | 231 | struct inet_diag_msg *r; |
| 231 | struct nlmsghdr *nlh; | 232 | struct nlmsghdr *nlh; |
| 232 | s32 tmo; | 233 | s32 tmo; |
| @@ -247,7 +248,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, | |||
| 247 | r->idiag_retrans = 0; | 248 | r->idiag_retrans = 0; |
| 248 | 249 | ||
| 249 | r->id.idiag_if = tw->tw_bound_dev_if; | 250 | r->id.idiag_if = tw->tw_bound_dev_if; |
| 250 | sock_diag_save_cookie(tw, r->id.idiag_cookie); | 251 | sock_diag_save_cookie(sk, r->id.idiag_cookie); |
| 251 | 252 | ||
| 252 | r->id.idiag_sport = tw->tw_sport; | 253 | r->id.idiag_sport = tw->tw_sport; |
| 253 | r->id.idiag_dport = tw->tw_dport; | 254 | r->id.idiag_dport = tw->tw_dport; |
| @@ -283,7 +284,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, | |||
| 283 | const struct nlmsghdr *unlh) | 284 | const struct nlmsghdr *unlh) |
| 284 | { | 285 | { |
| 285 | if (sk->sk_state == TCP_TIME_WAIT) | 286 | if (sk->sk_state == TCP_TIME_WAIT) |
| 286 | return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq, | 287 | return inet_twsk_diag_fill(sk, skb, r, portid, seq, |
| 287 | nlmsg_flags, unlh); | 288 | nlmsg_flags, unlh); |
| 288 | 289 | ||
| 289 | return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, | 290 | return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, |
| @@ -675,7 +676,7 @@ static int inet_twsk_diag_dump(struct sock *sk, | |||
| 675 | if (!inet_diag_bc_sk(bc, sk)) | 676 | if (!inet_diag_bc_sk(bc, sk)) |
| 676 | return 0; | 677 | return 0; |
| 677 | 678 | ||
| 678 | return inet_twsk_diag_fill(inet_twsk(sk), skb, r, | 679 | return inet_twsk_diag_fill(sk, skb, r, |
| 679 | NETLINK_CB(cb->skb).portid, | 680 | NETLINK_CB(cb->skb).portid, |
| 680 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); | 681 | cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); |
| 681 | } | 682 | } |
| @@ -734,7 +735,10 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
| 734 | r->idiag_retrans = req->num_retrans; | 735 | r->idiag_retrans = req->num_retrans; |
| 735 | 736 | ||
| 736 | r->id.idiag_if = sk->sk_bound_dev_if; | 737 | r->id.idiag_if = sk->sk_bound_dev_if; |
| 737 | sock_diag_save_cookie(req, r->id.idiag_cookie); | 738 | |
| 739 | BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != | ||
| 740 | offsetof(struct sock, sk_cookie)); | ||
| 741 | sock_diag_save_cookie((struct sock *)ireq, r->id.idiag_cookie); | ||
| 738 | 742 | ||
| 739 | tmo = req->expires - jiffies; | 743 | tmo = req->expires - jiffies; |
| 740 | if (tmo < 0) | 744 | if (tmo < 0) |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 6d592f8555fb..2bd980526631 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
| @@ -195,6 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat | |||
| 195 | tw->tw_ipv6only = 0; | 195 | tw->tw_ipv6only = 0; |
| 196 | tw->tw_transparent = inet->transparent; | 196 | tw->tw_transparent = inet->transparent; |
| 197 | tw->tw_prot = sk->sk_prot_creator; | 197 | tw->tw_prot = sk->sk_prot_creator; |
| 198 | atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); | ||
| 198 | twsk_net_set(tw, hold_net(sock_net(sk))); | 199 | twsk_net_set(tw, hold_net(sock_net(sk))); |
| 199 | /* | 200 | /* |
| 200 | * Because we use RCU lookups, we should not set tw_refcnt | 201 | * Because we use RCU lookups, we should not set tw_refcnt |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 45fe60c5238e..ece31b426013 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
| @@ -346,6 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) | |||
| 346 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; | 346 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; |
| 347 | treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; | 347 | treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; |
| 348 | treq->listener = NULL; | 348 | treq->listener = NULL; |
| 349 | ireq->ireq_net = sock_net(sk); | ||
| 349 | 350 | ||
| 350 | /* We throwed the options of the initial SYN away, so we hope | 351 | /* We throwed the options of the initial SYN away, so we hope |
| 351 | * the ACK carries the same options again (see RFC1122 4.2.3.8) | 352 | * the ACK carries the same options again (see RFC1122 4.2.3.8) |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fb4cf8b8e121..d7045f5f6ebf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -5965,6 +5965,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, | |||
| 5965 | 5965 | ||
| 5966 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | 5966 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; |
| 5967 | tcp_openreq_init(req, &tmp_opt, skb, sk); | 5967 | tcp_openreq_init(req, &tmp_opt, skb, sk); |
| 5968 | inet_rsk(req)->ireq_net = sock_net(sk); | ||
| 5969 | atomic64_set(&inet_rsk(req)->ir_cookie, 0); | ||
| 5968 | 5970 | ||
| 5969 | af_ops->init_req(req, sk, skb); | 5971 | af_ops->init_req(req, sk, skb); |
| 5970 | 5972 | ||
