From aff65da0f1be5daec44231972b6b5fc45bfa7a58 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 22 Aug 2011 23:44:59 +0000 Subject: net: ipv4: convert to SKB frag APIs Signed-off-by: Ian Campbell Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: "Pekka Savola (ipv6)" Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46febcacb729..5fe632c763f4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3035,7 +3035,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; - sg_set_page(&sg, f->page, f->size, f->page_offset); + struct page *page = skb_frag_page(f); + sg_set_page(&sg, page, f->size, f->page_offset); if (crypto_hash_update(desc, &sg, f->size)) return 1; } -- cgit v1.2.2 From 765cf9976e937f1cfe9159bf4534967c8bf8eb6d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Sep 2011 20:28:37 +0000 Subject: tcp: md5: remove one indirection level in tcp_md5sig_pool tcp_md5sig_pool is currently an 'array' (a percpu object) of pointers to struct tcp_md5sig_pool. Only the pointers are NUMA aware, but objects themselves are all allocated on a single node. Remove this extra indirection to get proper percpu memory (NUMA aware) and make code simpler. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5fe632c763f4..cc0d5dead30c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2857,26 +2857,25 @@ EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { int cpu; + for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); - if (p) { - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - kfree(p); - } + struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); + + if (p->md5_desc.tfm) + crypto_free_hash(p->md5_desc.tfm); } free_percpu(pool); } void tcp_free_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *pool = NULL; + struct tcp_md5sig_pool __percpu *pool = NULL; spin_lock_bh(&tcp_md5sig_pool_lock); if (--tcp_md5sig_users == 0) { @@ -2889,30 +2888,24 @@ void tcp_free_md5sig_pool(void) } EXPORT_SYMBOL(tcp_free_md5sig_pool); -static struct tcp_md5sig_pool * __percpu * +static struct tcp_md5sig_pool __percpu * __tcp_alloc_md5sig_pool(struct sock *sk) { int cpu; - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; - pool = alloc_percpu(struct tcp_md5sig_pool *); + pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) return NULL; for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p; struct crypto_hash *hash; - p = kzalloc(sizeof(*p), sk->sk_allocation); - if (!p) - goto out_free; - *per_cpu_ptr(pool, cpu) = p; - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); if (!hash || IS_ERR(hash)) goto out_free; - p->md5_desc.tfm = hash; + per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } return pool; out_free: @@ -2920,9 +2913,9 @@ out_free: return NULL; } -struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) { - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; int alloc = 0; retry: @@ -2941,7 +2934,7 @@ retry: if (alloc) { /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; p = __tcp_alloc_md5sig_pool(sk); spin_lock_bh(&tcp_md5sig_pool_lock); @@ -2974,7 +2967,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; local_bh_disable(); @@ -2985,7 +2978,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) spin_unlock(&tcp_md5sig_pool_lock); if (p) - return *this_cpu_ptr(p); + return this_cpu_ptr(p); local_bh_enable(); return NULL; -- cgit v1.2.2 From 4de075e0438ba54b8f42cbbc1263d404229dc997 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Sep 2011 13:25:05 -0400 Subject: tcp: rename tcp_skb_cb flags Rename struct tcp_skb_cb "flags" to "tcp_flags" to ease code review and maintenance. Its content is a combination of FIN/SYN/RST/PSH/ACK/URG/ECE/CWR flags Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cc0d5dead30c..131c45f93373 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -524,7 +524,7 @@ EXPORT_SYMBOL(tcp_ioctl); static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; } @@ -540,7 +540,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; - tcb->flags = TCPHDR_ACK; + tcb->tcp_flags = TCPHDR_ACK; tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); @@ -830,7 +830,7 @@ new_segment: skb_shinfo(skb)->gso_segs = 0; if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; copied += copy; poffset += copy; @@ -1074,7 +1074,7 @@ new_segment: } if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; -- cgit v1.2.2 From b5c5693bb723a019deac3cd1345f3e7233c8a67e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Oct 2011 14:01:21 -0400 Subject: tcp: report ECN_SEEN in tcp_info Allows ss command (iproute2) to display "ecnseen" if at least one packet with ECT(0) or ECT(1) or ECN was received by this socket. "ecn" means ECN was negotiated at session establishment (TCP level) "ecnseen" means we received at least one packet with ECT fields set (IP level) ss -i ... ESTAB 0 0 192.168.20.110:22 192.168.20.144:38016 ino:5950 sk:f178e400 mem:(r0,w0,f0,t0) ts sack ecn ecnseen bic wscale:7,8 rto:210 rtt:12.5/7.5 cwnd:10 send 9.3Mbps rcv_space:14480 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 131c45f93373..4c0da24fb649 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2455,8 +2455,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; } - if (tp->ecn_flags&TCP_ECN_OK) + if (tp->ecn_flags & TCP_ECN_OK) info->tcpi_options |= TCPI_OPT_ECN; + if (tp->ecn_flags & TCP_ECN_SEEN) + info->tcpi_options |= TCPI_OPT_ECN_SEEN; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); -- cgit v1.2.2 From 9e903e085262ffbf1fc44a17ac06058aca03524a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Oct 2011 21:00:24 +0000 Subject: net: add skb frag size accessors To ease skb->truesize sanitization, its better to be able to localize all references to skb frags size. Define accessors : skb_frag_size() to fetch frag size, and skb_frag_size_{set|add|sub}() to manipulate it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4c0da24fb649..132be081cd00 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -813,7 +813,7 @@ new_segment: goto wait_for_memory; if (can_coalesce) { - skb_shinfo(skb)->frags[i - 1].size += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); @@ -1058,8 +1058,7 @@ new_segment: /* Update the skb. */ if (merge) { - skb_shinfo(skb)->frags[i - 1].size += - copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, page, off, copy); if (TCP_PAGE(sk)) { @@ -3031,8 +3030,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; struct page *page = skb_frag_page(f); - sg_set_page(&sg, page, f->size, f->page_offset); - if (crypto_hash_update(desc, &sg, f->size)) + sg_set_page(&sg, page, skb_frag_size(f), f->page_offset); + if (crypto_hash_update(desc, &sg, skb_frag_size(f))) return 1; } -- cgit v1.2.2 From cf533ea53ebfae41be15b103d78e7ebec30b9969 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Oct 2011 05:22:42 -0400 Subject: tcp: add const qualifiers where possible Adding const qualifiers to pointers can ease code review, and spot some bugs. It might allow compiler to optimize code further. For example, is it legal to temporary write a null cksum into tcphdr in tcp_md5_hash_header() ? I am afraid a sniffer could catch the temporary null value... Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 132be081cd00..704adad8f07f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -374,7 +374,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask; struct sock *sk = sock->sk; - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) @@ -528,7 +528,7 @@ static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) tp->pushed_seq = tp->write_seq; } -static inline int forced_push(struct tcp_sock *tp) +static inline int forced_push(const struct tcp_sock *tp) { return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } @@ -891,9 +891,9 @@ EXPORT_SYMBOL(tcp_sendpage); #define TCP_PAGE(sk) (sk->sk_sndmsg_page) #define TCP_OFF(sk) (sk->sk_sndmsg_off) -static inline int select_size(struct sock *sk, int sg) +static inline int select_size(const struct sock *sk, int sg) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { @@ -2408,7 +2408,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) return icsk->icsk_af_ops->setsockopt(sk, level, optname, @@ -2430,9 +2430,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); #endif /* Return information about state of tcp endpoint in API format. */ -void tcp_get_info(struct sock *sk, struct tcp_info *info) +void tcp_get_info(const struct sock *sk, struct tcp_info *info) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; @@ -3010,7 +3010,7 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, EXPORT_SYMBOL(tcp_md5_hash_header); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - struct sk_buff *skb, unsigned header_len) + const struct sk_buff *skb, unsigned int header_len) { struct scatterlist sg; const struct tcphdr *tp = tcp_hdr(skb); @@ -3043,7 +3043,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, } EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) { struct scatterlist sg; -- cgit v1.2.2 From ca35a0ef85e8ed6df6d5ab01fb6c3530cca0c469 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Oct 2011 01:52:35 -0400 Subject: tcp: md5: dont write skb head in tcp_md5_hash_header() tcp_md5_hash_header() writes into skb header a temporary zero value, this might confuse other users of this area. Since tcphdr is small (20 bytes), copy it in a temporary variable and make the change in the copy. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 704adad8f07f..eefc61e3d0e4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2994,17 +2994,19 @@ void tcp_put_md5sig_pool(void) EXPORT_SYMBOL(tcp_put_md5sig_pool); int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, - struct tcphdr *th) + const struct tcphdr *th) { struct scatterlist sg; + struct tcphdr hdr; int err; - __sum16 old_checksum = th->check; - th->check = 0; + /* We are not allowed to change tcphdr, make a local copy */ + memcpy(&hdr, th, sizeof(hdr)); + hdr.check = 0; + /* options aren't included in the hash */ - sg_init_one(&sg, th, sizeof(struct tcphdr)); - err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr)); - th->check = old_checksum; + sg_init_one(&sg, &hdr, sizeof(hdr)); + err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr)); return err; } EXPORT_SYMBOL(tcp_md5_hash_header); -- cgit v1.2.2 From 78d81d15b74246c7cedf84894434890b33da3907 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Mon, 24 Oct 2011 08:15:10 +0000 Subject: TCP: remove TCP_DEBUG It was enabled by default and the messages guarded by the define are useful. Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eefc61e3d0e4..34f5db1e1c8b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1193,13 +1193,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) struct tcp_sock *tp = tcp_sk(sk); int time_to_ack = 0; -#if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); -#endif if (inet_csk_ack_scheduled(sk)) { const struct inet_connection_sock *icsk = inet_csk(sk); -- cgit v1.2.2