From 3b885787ea4112eaa80945999ea0901bf742707f Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 12 Oct 2009 13:26:31 -0700 Subject: net: Generalize socket rx gap / receive queue overflow cmsg Create a new socket level option to report number of queue overflows Recently I augmented the AF_PACKET protocol to report the number of frames lost on the socket receive queue between any two enqueued frames. This value was exported via a SOL_PACKET level cmsg. AFter I completed that work it was requested that this feature be generalized so that any datagram oriented socket could make use of this option. As such I've created this patch, It creates a new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue overflowed between any two given frames. It also augments the AF_PACKET protocol to take advantage of this new feature (as it previously did not touch sk->sk_drops, which this patch uses to record the overflow count). Tested successfully by me. Notes: 1) Unlike my previous patch, this patch simply records the sk_drops value, which is not a number of drops between packets, but rather a total number of drops. Deltas must be computed in user space. 2) While this patch currently works with datagram oriented protocols, it will also be accepted by non-datagram oriented protocols. I'm not sure if thats agreeable to everyone, but my argument in favor of doing so is that, for those protocols which aren't applicable to this option, sk_drops will always be zero, and reporting no drops on a receive queue that isn't used for those non-participating protocols seems reasonable to me. This also saves us having to code in a per-protocol opt in mechanism. 3) This applies cleanly to net-next assuming that commit 977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted Signed-off-by: Neil Horman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c8d05758661d..0970e92c6acd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1958,7 +1958,7 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, if (err) goto out_free; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; sp->pf->event_msgname(event, msg->msg_name, addr_len); -- cgit v1.2.2 From c720c7e8383aff1cb219bddf474ed89d850336e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Oct 2009 06:30:45 +0000 Subject: inet: rename some inet_sock fields In order to have better cache layouts of struct sock (separate zones for rx/tx paths), we need this preliminary patch. Goal is to transfert fields used at lookup time in the first read-mostly cache line (inside struct sock_common) and move sk_refcnt to a separate cache line (only written by rx path) This patch adds inet_ prefix to daddr, rcv_saddr, dport, num, saddr, sport and id fields. This allows a future patch to define these fields as macros, like sk_refcnt, without name clashes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/protocol.c | 8 ++++---- net/sctp/socket.c | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 612dc878e05c..d9f4cc2c7869 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -296,19 +296,19 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = 0; - addr->v4.sin_addr.s_addr = inet_sk(sk)->rcv_saddr; + addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v4_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { - inet_sk(sk)->rcv_saddr = addr->v4.sin_addr.s_addr; + inet_sk(sk)->inet_rcv_saddr = addr->v4.sin_addr.s_addr; } /* Initialize sk->sk_daddr from sctp_addr. */ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { - inet_sk(sk)->daddr = addr->v4.sin_addr.s_addr; + inet_sk(sk)->inet_daddr = addr->v4.sin_addr.s_addr; } /* Initialize a sctp_addr from an address parameter. */ @@ -598,7 +598,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, newinet = inet_sk(newsk); - newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; + newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; sk_refcnt_debug_inc(newsk); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0970e92c6acd..4085db99033d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -394,7 +394,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Refresh ephemeral port. */ if (!bp->port) - bp->port = inet_sk(sk)->num; + bp->port = inet_sk(sk)->inet_num; /* Add the address to the bind address list. * Use GFP_ATOMIC since BHs will be disabled. @@ -403,7 +403,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Copy back into socket for getsockname() use. */ if (!ret) { - inet_sk(sk)->sport = htons(inet_sk(sk)->num); + inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num); af->to_sk_saddr(addr, sk); } @@ -1115,7 +1115,7 @@ static int __sctp_connect(struct sock* sk, } /* Initialize sk's dport and daddr for getpeername() */ - inet_sk(sk)->dport = htons(asoc->peer.port); + inet_sk(sk)->inet_dport = htons(asoc->peer.port); af = sctp_get_af_specific(sa_addr->sa.sa_family); af->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; @@ -5851,7 +5851,7 @@ pp_not_found: */ success: if (!sctp_sk(sk)->bind_hash) { - inet_sk(sk)->num = snum; + inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &pp->owner); sctp_sk(sk)->bind_hash = pp; } @@ -5923,7 +5923,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) if (sctp_autobind(sk)) return -EAGAIN; } else { - if (sctp_get_port(sk, inet_sk(sk)->num)) { + if (sctp_get_port(sk, inet_sk(sk)->inet_num)) { sk->sk_state = SCTP_SS_CLOSED; return -EADDRINUSE; } @@ -6094,14 +6094,14 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) static inline void __sctp_put_port(struct sock *sk) { struct sctp_bind_hashbucket *head = - &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->num)]; + &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)]; struct sctp_bind_bucket *pp; sctp_spin_lock(&head->lock); pp = sctp_sk(sk)->bind_hash; __sk_del_bind_node(sk); sctp_sk(sk)->bind_hash = NULL; - inet_sk(sk)->num = 0; + inet_sk(sk)->inet_num = 0; sctp_bucket_destroy(pp); sctp_spin_unlock(&head->lock); } @@ -6128,7 +6128,7 @@ static int sctp_autobind(struct sock *sk) /* Initialize a local sockaddr structure to INADDR_ANY. */ af = sctp_sk(sk)->pf->af; - port = htons(inet_sk(sk)->num); + port = htons(inet_sk(sk)->inet_num); af->inaddr_any(&autoaddr, port); return sctp_do_bind(sk, &autoaddr, af->sockaddr_len); @@ -6697,12 +6697,12 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, /* Initialize sk's sport, dport, rcv_saddr and daddr for * getsockname() and getpeername() */ - newinet->sport = inet->sport; - newinet->saddr = inet->saddr; - newinet->rcv_saddr = inet->rcv_saddr; - newinet->dport = htons(asoc->peer.port); + newinet->inet_sport = inet->inet_sport; + newinet->inet_saddr = inet->inet_saddr; + newinet->inet_rcv_saddr = inet->inet_rcv_saddr; + newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->id = asoc->next_tsn ^ jiffies; + newinet->inet_id = asoc->next_tsn ^ jiffies; newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; @@ -6741,13 +6741,13 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsp->hmac = NULL; /* Hook this new socket in to the bind_hash list. */ - head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->num)]; + head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)]; sctp_local_bh_disable(); sctp_spin_lock(&head->lock); pp = sctp_sk(oldsk)->bind_hash; sk_add_bind_node(newsk, &pp->owner); sctp_sk(newsk)->bind_hash = pp; - inet_sk(newsk)->num = inet_sk(oldsk)->num; + inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num; sctp_spin_unlock(&head->lock); sctp_local_bh_enable(); -- cgit v1.2.2 From c6d14c84566d6b70ad9dc1618db0dec87cca9300 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Nov 2009 05:43:23 -0800 Subject: net: Introduce for_each_netdev_rcu() iterator Adds RCU management to the list of netdevices. Convert some for_each_netdev() users to RCU version, if it can avoid read_lock-ing dev_base_lock Ie: read_lock(&dev_base_loack); for_each_netdev(net, dev) some_action(); read_unlock(&dev_base_lock); becomes : rcu_read_lock(); for_each_netdev_rcu(net, dev) some_action(); rcu_read_unlock(); Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/protocol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d9f4cc2c7869..fe44c57101de 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -205,14 +205,14 @@ static void sctp_get_local_addr_list(void) struct list_head *pos; struct sctp_af *af; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } /* Free the existing local addresses. */ -- cgit v1.2.2 From 13f18aa05f5abe135f47b6417537ae2b2fedc18c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 5 Nov 2009 20:44:37 -0800 Subject: net: drop capability from protocol definitions struct can_proto had a capability field which wasn't ever used. It is dropped entirely. struct inet_protosw had a capability field which can be more clearly expressed in the code by just checking if sock->type = SOCK_RAW. Signed-off-by: Eric Paris Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 2 -- net/sctp/protocol.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index bb280e60e00a..bacd6a7318be 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -930,7 +930,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -939,7 +938,6 @@ static struct inet_protosw sctpv6_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG, }; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index fe44c57101de..08ef203d36ac 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -909,7 +909,6 @@ static struct inet_protosw sctp_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -918,7 +917,6 @@ static struct inet_protosw sctp_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; -- cgit v1.2.2 From 40c9c31e388bfd513269df737f874d0cd53c1616 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Nov 2009 20:56:46 -0800 Subject: sctp: ipv6: avoid touching device refcount Avoid touching device refcount in sctp/ipv6, thanks to RCU Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index bacd6a7318be..cc50fbe99291 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -837,15 +837,16 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); - if (!dev) - return 0; - if (!ipv6_chk_addr(&init_net, &addr->v6.sin6_addr, + rcu_read_lock(); + dev = dev_get_by_index_rcu(&init_net, + addr->v6.sin6_scope_id); + if (!dev || + !ipv6_chk_addr(&init_net, &addr->v6.sin6_addr, dev, 0)) { - dev_put(dev); + rcu_read_unlock(); return 0; } - dev_put(dev); + rcu_read_unlock(); } else if (type == IPV6_ADDR_MAPPED) { if (!opt->v4mapped) return 0; @@ -873,10 +874,12 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); + rcu_read_lock(); + dev = dev_get_by_index_rcu(&init_net, + addr->v6.sin6_scope_id); + rcu_read_unlock(); if (!dev) return 0; - dev_put(dev); } af = opt->pf->af; } -- cgit v1.2.2 From f8572d8f2a2ba75408b97dc24ef47c83671795d7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 5 Nov 2009 13:32:03 -0800 Subject: sysctl net: Remove unused binary sysctl code Now that sys_sysctl is a compatiblity wrapper around /proc/sys all sysctl strategy routines, and all ctl_name and strategy entries in the sysctl tables are unused, and can be revmoed. In addition neigh_sysctl_register has been modified to no longer take a strategy argument and it's callers have been modified not to pass one. Cc: "David Miller" Cc: Hideaki YOSHIFUJI Cc: netdev@vger.kernel.org Signed-off-by: Eric W. Biederman --- net/sctp/sysctl.c | 49 +++---------------------------------------------- 1 file changed, 3 insertions(+), 46 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ab7151da120f..c4ece9829541 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -59,180 +59,145 @@ extern int sysctl_sctp_wmem[3]; static ctl_table sctp_table[] = { { - .ctl_name = NET_SCTP_RTO_INITIAL, .procname = "rto_initial", .data = &sctp_rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_RTO_MIN, .procname = "rto_min", .data = &sctp_rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_RTO_MAX, .procname = "rto_max", .data = &sctp_rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, .procname = "valid_cookie_life", .data = &sctp_valid_cookie_life, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_MAX_BURST, .procname = "max_burst", .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &zero, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, .procname = "association_max_retrans", .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_SNDBUF_POLICY, .procname = "sndbuf_policy", .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_RCVBUF_POLICY, .procname = "rcvbuf_policy", .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_PATH_MAX_RETRANS, .procname = "path_max_retrans", .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, .procname = "max_init_retransmits", .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_HB_INTERVAL, .procname = "hb_interval", .data = &sctp_hb_interval, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_PRESERVE_ENABLE, .procname = "cookie_preserve_enable", .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_RTO_ALPHA, .procname = "rto_alpha_exp_divisor", .data = &sctp_rto_alpha, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_RTO_BETA, .procname = "rto_beta_exp_divisor", .data = &sctp_rto_beta, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_ADDIP_ENABLE, .procname = "addip_enable", .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_PRSCTP_ENABLE, .procname = "prsctp_enable", .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_SACK_TIMEOUT, .procname = "sack_timeout", .data = &sctp_sack_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "sctp_mem", .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), @@ -240,7 +205,6 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "sctp_rmem", .data = &sysctl_sctp_rmem, .maxlen = sizeof(sysctl_sctp_rmem), @@ -248,7 +212,6 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "sctp_wmem", .data = &sysctl_sctp_wmem, .maxlen = sizeof(sysctl_sctp_wmem), @@ -256,40 +219,34 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "auth_enable", .data = &sctp_auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "addip_noauth_enable", .data = &sctp_addip_noauth, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "addr_scope_policy", .data = &sctp_scope_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &addr_scope_max, }, - { .ctl_name = 0 } + { } }; static struct ctl_path sctp_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "sctp", .ctl_name = NET_SCTP, }, + { .procname = "net", }, + { .procname = "sctp", }, { } }; -- cgit v1.2.2 From 6d4561110a3e9fa742aeec6717248a491dfb1878 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 16 Nov 2009 03:11:48 -0800 Subject: sysctl: Drop & in front of every proc_handler. For consistency drop & in front of every proc_handler. Explicity taking the address is unnecessary and it prevents optimizations like stubbing the proc_handlers to NULL. Cc: Alexey Dobriyan Cc: Ingo Molnar Cc: Joe Perches Signed-off-by: Eric W. Biederman --- net/sctp/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index c4ece9829541..d50a042f9552 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -237,7 +237,7 @@ static ctl_table sctp_table[] = { .data = &sctp_scope_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &addr_scope_max, }, -- cgit v1.2.2 From 6dc7694f9df20f148076d82d00cb3663afb0b000 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 23 Nov 2009 15:53:53 -0500 Subject: sctp: implement the receiver side for SACK-IMMEDIATELY extension This patch implement the receiver side for SACK-IMMEDIATELY extension: Section 4.2. Receiver Side Considerations On reception of an SCTP packet containing a DATA chunk with the I-bit set, the receiver SHOULD NOT delay the sending of the corresponding SACK chunk and SHOULD send it back immediately. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/sm_statefuns.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d4df45022ffa..8ee24c9dc7e9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2868,6 +2868,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; + sctp_arg_t force = SCTP_NOFORCE(); int error; if (!sctp_vtag_verify(chunk, asoc)) { @@ -2901,6 +2902,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, BUG(); } + if (chunk->chunk_hdr->flags & SCTP_DATA_SACK_IMM) + force = SCTP_FORCE(); + if (asoc->autoclose) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -2929,7 +2933,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, * more aggressive than the following algorithms allow. */ if (chunk->end_of_packet) - sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); + sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, force); return SCTP_DISPOSITION_CONSUME; @@ -2954,7 +2958,7 @@ discard_force: discard_noforce: if (chunk->end_of_packet) - sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); + sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, force); return SCTP_DISPOSITION_DISCARD; consume: -- cgit v1.2.2 From b93d6471748de2ce02cc24774b774deb306a57a8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 23 Nov 2009 15:53:56 -0500 Subject: sctp: implement the sender side for SACK-IMMEDIATELY extension This patch implement the sender side for SACK-IMMEDIATELY extension. Section 4.1. Sender Side Considerations Whenever the sender of a DATA chunk can benefit from the corresponding SACK chunk being sent back without delay, the sender MAY set the I-bit in the DATA chunk header. Reasons for setting the I-bit include o The sender is in the SHUTDOWN-PENDING state. o The application requests to set the I-bit of the last DATA chunk of a user message when providing the user message to the SCTP implementation. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich --- net/sctp/chunk.c | 15 ++++++++++++++- net/sctp/outqueue.c | 7 +++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index acf7c4d128f7..8e4320040f05 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -263,9 +263,18 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (0 == i) frag |= SCTP_DATA_FIRST_FRAG; - if ((i == (whole - 1)) && !over) + if ((i == (whole - 1)) && !over) { frag |= SCTP_DATA_LAST_FRAG; + /* The application requests to set the I-bit of the + * last DATA chunk of a user message when providing + * the user message to the SCTP implementation. + */ + if ((sinfo->sinfo_flags & SCTP_EOF) || + (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) + frag |= SCTP_DATA_SACK_IMM; + } + chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0); if (!chunk) @@ -297,6 +306,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, else frag = SCTP_DATA_LAST_FRAG; + if ((sinfo->sinfo_flags & SCTP_EOF) || + (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) + frag |= SCTP_DATA_SACK_IMM; + chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0); if (!chunk) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c9f20e28521b..5732661c87d3 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1011,6 +1011,13 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) break; case SCTP_XMIT_OK: + /* The sender is in the SHUTDOWN-PENDING state, + * The sender MAY set the I-bit in the DATA + * chunk header. + */ + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) + chunk->chunk_hdr->flags |= SCTP_DATA_SACK_IMM; + break; default: -- cgit v1.2.2 From 6383cfb3ed3c5c0bea06da0099c219ef4237ecf5 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:56 -0500 Subject: sctp: Fix malformed "Invalid Stream Identifier" error The "Invalid Stream Identifier" error has a 16 bit reserved field at the end, thus making the parameter length be 8 bytes. We've never supplied that reserved field making wireshark tag the packet as malformed. Reported-by: Chris Dischino Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 13 +++++++++---- net/sctp/sm_statefuns.c | 13 ++++++++----- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9d881a61ac02..9e732916b671 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -987,7 +987,10 @@ static void *sctp_addto_param(struct sctp_chunk *chunk, int len, target = skb_put(chunk->skb, len); - memcpy(target, data, len); + if (data) + memcpy(target, data, len); + else + memset(target, 0, len); /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(chunklen + len); @@ -1129,16 +1132,18 @@ nodata: struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, __be16 cause_code, const void *payload, - size_t paylen) + size_t paylen, size_t reserve_tail) { struct sctp_chunk *retval; - retval = sctp_make_op_error_space(asoc, chunk, paylen); + retval = sctp_make_op_error_space(asoc, chunk, paylen + reserve_tail); if (!retval) goto nodata; - sctp_init_cause(retval, cause_code, paylen); + sctp_init_cause(retval, cause_code, paylen + reserve_tail); sctp_addto_chunk(retval, paylen, payload); + if (reserve_tail) + sctp_addto_param(retval, reserve_tail, NULL); nodata: return retval; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8ee24c9dc7e9..16a603527df2 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1720,7 +1720,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_COOKIE_IN_SHUTDOWN, - NULL, 0); + NULL, 0, 0); if (err) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err)); @@ -3977,7 +3977,7 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, err_chunk = sctp_make_op_error(asoc, chunk, SCTP_ERROR_UNSUP_HMAC, &auth_hdr->hmac_id, - sizeof(__u16)); + sizeof(__u16), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -4069,7 +4069,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length))); + WORD_ROUND(ntohs(hdr->length)), + 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -4088,7 +4089,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length))); + WORD_ROUND(ntohs(hdr->length)), + 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -6052,7 +6054,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, &data_hdr->stream, - sizeof(data_hdr->stream)); + sizeof(data_hdr->stream), + sizeof(u16)); if (err) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err)); -- cgit v1.2.2 From e0e9db178a5ba4dbb5f16f958f1affbdc63d2cc4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: Select a working primary during sctp_connectx() When sctp_connectx() is used, we pick the first address as primary, even though it may not have worked. This results in excessive retransmits and poor performance. We should select the address that the association was established with. Reported-by: Thomas Dreibholz Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 6 ++++++ net/sctp/sm_sideeffect.c | 2 ++ 2 files changed, 8 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 7eed77a39d0d..8e755ebff3b8 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -512,7 +512,13 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, * to this destination address earlier. The sender MUST set * CYCLING_CHANGEOVER to indicate that this switch is a * double switch to the same destination address. + * + * Really, only bother is we have data queued or outstanding on + * the association. */ + if (!asoc->outqueue.outstanding_bytes && !asoc->outqueue.out_qlen) + return; + if (transport->cacc.changeover_active) transport->cacc.cycling_changeover = changeover; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d4919556..eda4fe783be5 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1418,6 +1418,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, asoc->init_last_sent_to = t; chunk->transport = t; t->init_sent_count++; + /* Set the new transport as primary */ + sctp_assoc_set_primary(asoc, t); break; case SCTP_CMD_INIT_RESTART: -- cgit v1.2.2 From 90f2f5318b3a5b0898fef0fec9b91376c7de7a2c Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: Update SWS avaoidance receiver side algorithm We currently send window update SACKs every time we free up 1 PMTU worth of data. That a lot more SACKs then necessary. Instead, we'll now send back the actuall window every time we send a sack, and do window-update SACKs when a fraction of the receive buffer has been opened. The fraction is controlled with a sysctl. Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 5 +++-- net/sctp/protocol.c | 3 +++ net/sctp/sm_sideeffect.c | 3 +-- net/sctp/sysctl.c | 13 +++++++++++++ 4 files changed, 20 insertions(+), 4 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8e755ebff3b8..37e982510bea 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1383,8 +1383,9 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_RECEIVED: case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && - ((asoc->rwnd - asoc->a_rwnd) >= - min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu))) + ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, + (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + asoc->pathmtu))) return 1; break; default: diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 08ef203d36ac..a3c8988758b1 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1258,6 +1258,9 @@ SCTP_STATIC __init int sctp_init(void) /* Set SCOPE policy to enabled */ sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; + /* Set the default rwnd update threshold */ + sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index eda4fe783be5..8ae67098e094 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -217,8 +217,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { - if (asoc->a_rwnd > asoc->rwnd) - asoc->a_rwnd = asoc->rwnd; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (!sack) goto nomem; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ab7151da120f..ae03ded2bf1a 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -52,6 +52,7 @@ static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ +static int rwnd_scale_max = 16; extern int sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -284,6 +285,18 @@ static ctl_table sctp_table[] = { .extra1 = &zero, .extra2 = &addr_scope_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rwnd_update_shift", + .data = &sctp_rwnd_upd_shift, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &rwnd_scale_max, + }, + { .ctl_name = 0 } }; -- cgit v1.2.2 From 37051f73862d15862aecc08b887b2884137af327 Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Mon, 23 Nov 2009 15:53:57 -0500 Subject: sctp: allow setting path_maxrxt independent of SPP_PMTUD_ENABLE Since draft-ietf-tsvwg-sctpsocket-15.txt, setting the SPP_MTUD_ENABLE flag when changing pathmaxrxt via the SCTP_PEER_ADDR_PARAMS setsockopt is not required any longer. Signed-off-by: Andrei Pelinescu-Onciul Signed-off-by: Vlad Yasevich --- net/sctp/socket.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 66b1f02b17ba..a4577a75c6c0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2311,11 +2311,10 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, } } - /* Note that unless the spp_flag is set to SPP_PMTUD_ENABLE the value - * of this field is ignored. Note also that a value of zero - * indicates the current setting should be left unchanged. + /* Note that a value of zero indicates the current setting should be + left unchanged. */ - if ((params->spp_flags & SPP_PMTUD_ENABLE) && params->spp_pathmaxrxt) { + if (params->spp_pathmaxrxt) { if (trans) { trans->pathmaxrxt = params->spp_pathmaxrxt; } else if (asoc) { -- cgit v1.2.2 From a242b41dedfe0fd51ab1c906daa703c09b196744 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 23 Nov 2009 15:53:58 -0500 Subject: sctp: remove deprecated SCTP_GET_*_OLD stuffs SCTP_GET_*_OLD stuffs are schedlued to be removed. Cc: Vlad Yasevich Signed-off-by: WANG Cong Signed-off-by: Vlad Yasevich --- net/sctp/socket.c | 325 ------------------------------------------------------ 1 file changed, 325 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a4577a75c6c0..d2681a6bc6fa 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4348,90 +4348,6 @@ static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval return 0; } -static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - sctp_assoc_t id; - struct sctp_association *asoc; - struct list_head *pos; - int cnt = 0; - - if (len < sizeof(sctp_assoc_t)) - return -EINVAL; - - if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) - return -EFAULT; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_NUM_OLD " - "socket option deprecated\n"); - /* For UDP-style sockets, id specifies the association to query. */ - asoc = sctp_id2assoc(sk, id); - if (!asoc) - return -EINVAL; - - list_for_each(pos, &asoc->peer.transport_addr_list) { - cnt ++; - } - - return cnt; -} - -/* - * Old API for getting list of peer addresses. Does not work for 32-bit - * programs running on a 64-bit kernel - */ -static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - struct sctp_association *asoc; - int cnt = 0; - struct sctp_getaddrs_old getaddrs; - struct sctp_transport *from; - void __user *to; - union sctp_addr temp; - struct sctp_sock *sp = sctp_sk(sk); - int addrlen; - - if (len < sizeof(struct sctp_getaddrs_old)) - return -EINVAL; - - len = sizeof(struct sctp_getaddrs_old); - - if (copy_from_user(&getaddrs, optval, len)) - return -EFAULT; - - if (getaddrs.addr_num <= 0) return -EINVAL; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_OLD " - "socket option deprecated\n"); - - /* For UDP-style sockets, id specifies the association to query. */ - asoc = sctp_id2assoc(sk, getaddrs.assoc_id); - if (!asoc) - return -EINVAL; - - to = (void __user *)getaddrs.addrs; - list_for_each_entry(from, &asoc->peer.transport_addr_list, - transports) { - memcpy(&temp, &from->ipaddr, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; - if (copy_to_user(to, &temp, addrlen)) - return -EFAULT; - to += addrlen ; - cnt ++; - if (cnt >= getaddrs.addr_num) break; - } - getaddrs.addr_num = cnt; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &getaddrs, len)) - return -EFAULT; - - return 0; -} static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -4484,125 +4400,6 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, return 0; } -static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - sctp_assoc_t id; - struct sctp_bind_addr *bp; - struct sctp_association *asoc; - struct sctp_sockaddr_entry *addr; - int cnt = 0; - - if (len < sizeof(sctp_assoc_t)) - return -EINVAL; - - if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) - return -EFAULT; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_NUM_OLD " - "socket option deprecated\n"); - - /* - * For UDP-style sockets, id specifies the association to query. - * If the id field is set to the value '0' then the locally bound - * addresses are returned without regard to any particular - * association. - */ - if (0 == id) { - bp = &sctp_sk(sk)->ep->base.bind_addr; - } else { - asoc = sctp_id2assoc(sk, id); - if (!asoc) - return -EINVAL; - bp = &asoc->base.bind_addr; - } - - /* If the endpoint is bound to 0.0.0.0 or ::0, count the valid - * addresses from the global local address list. - */ - if (sctp_list_single_entry(&bp->address_list)) { - addr = list_entry(bp->address_list.next, - struct sctp_sockaddr_entry, list); - if (sctp_is_any(sk, &addr->a)) { - rcu_read_lock(); - list_for_each_entry_rcu(addr, - &sctp_local_addr_list, list) { - if (!addr->valid) - continue; - - if ((PF_INET == sk->sk_family) && - (AF_INET6 == addr->a.sa.sa_family)) - continue; - - if ((PF_INET6 == sk->sk_family) && - inet_v6_ipv6only(sk) && - (AF_INET == addr->a.sa.sa_family)) - continue; - - cnt++; - } - rcu_read_unlock(); - } else { - cnt = 1; - } - goto done; - } - - /* Protection on the bound address list is not needed, - * since in the socket option context we hold the socket lock, - * so there is no way that the bound address list can change. - */ - list_for_each_entry(addr, &bp->address_list, list) { - cnt ++; - } -done: - return cnt; -} - -/* Helper function that copies local addresses to user and returns the number - * of addresses copied. - */ -static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, - int max_addrs, void *to, - int *bytes_copied) -{ - struct sctp_sockaddr_entry *addr; - union sctp_addr temp; - int cnt = 0; - int addrlen; - - rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { - if (!addr->valid) - continue; - - if ((PF_INET == sk->sk_family) && - (AF_INET6 == addr->a.sa.sa_family)) - continue; - if ((PF_INET6 == sk->sk_family) && - inet_v6_ipv6only(sk) && - (AF_INET == addr->a.sa.sa_family)) - continue; - memcpy(&temp, &addr->a, sizeof(temp)); - if (!temp.v4.sin_port) - temp.v4.sin_port = htons(port); - - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), - &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - memcpy(to, &temp, addrlen); - - to += addrlen; - *bytes_copied += addrlen; - cnt ++; - if (cnt >= max_addrs) break; - } - rcu_read_unlock(); - - return cnt; -} - static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, size_t space_left, int *bytes_copied) { @@ -4646,112 +4443,6 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, return cnt; } -/* Old API for getting list of local addresses. Does not work for 32-bit - * programs running on a 64-bit kernel - */ -static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, - char __user *optval, int __user *optlen) -{ - struct sctp_bind_addr *bp; - struct sctp_association *asoc; - int cnt = 0; - struct sctp_getaddrs_old getaddrs; - struct sctp_sockaddr_entry *addr; - void __user *to; - union sctp_addr temp; - struct sctp_sock *sp = sctp_sk(sk); - int addrlen; - int err = 0; - void *addrs; - void *buf; - int bytes_copied = 0; - - if (len < sizeof(struct sctp_getaddrs_old)) - return -EINVAL; - - len = sizeof(struct sctp_getaddrs_old); - if (copy_from_user(&getaddrs, optval, len)) - return -EFAULT; - - if (getaddrs.addr_num <= 0 || - getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) - return -EINVAL; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_OLD " - "socket option deprecated\n"); - - /* - * For UDP-style sockets, id specifies the association to query. - * If the id field is set to the value '0' then the locally bound - * addresses are returned without regard to any particular - * association. - */ - if (0 == getaddrs.assoc_id) { - bp = &sctp_sk(sk)->ep->base.bind_addr; - } else { - asoc = sctp_id2assoc(sk, getaddrs.assoc_id); - if (!asoc) - return -EINVAL; - bp = &asoc->base.bind_addr; - } - - to = getaddrs.addrs; - - /* Allocate space for a local instance of packed array to hold all - * the data. We store addresses here first and then put write them - * to the user in one shot. - */ - addrs = kmalloc(sizeof(union sctp_addr) * getaddrs.addr_num, - GFP_KERNEL); - if (!addrs) - return -ENOMEM; - - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid - * addresses from the global local address list. - */ - if (sctp_list_single_entry(&bp->address_list)) { - addr = list_entry(bp->address_list.next, - struct sctp_sockaddr_entry, list); - if (sctp_is_any(sk, &addr->a)) { - cnt = sctp_copy_laddrs_old(sk, bp->port, - getaddrs.addr_num, - addrs, &bytes_copied); - goto copy_getaddrs; - } - } - - buf = addrs; - /* Protection on the bound address list is not needed since - * in the socket option context we hold a socket lock and - * thus the bound address list can't change. - */ - list_for_each_entry(addr, &bp->address_list, list) { - memcpy(&temp, &addr->a, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - memcpy(buf, &temp, addrlen); - buf += addrlen; - bytes_copied += addrlen; - cnt ++; - if (cnt >= getaddrs.addr_num) break; - } - -copy_getaddrs: - /* copy the entire address list into the user provided space */ - if (copy_to_user(to, addrs, bytes_copied)) { - err = -EFAULT; - goto error; - } - - /* copy the leading structure back to user */ - getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, len)) - err = -EFAULT; - -error: - kfree(addrs); - return err; -} static int sctp_getsockopt_local_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -5602,22 +5293,6 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_INITMSG: retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); break; - case SCTP_GET_PEER_ADDRS_NUM_OLD: - retval = sctp_getsockopt_peer_addrs_num_old(sk, len, optval, - optlen); - break; - case SCTP_GET_LOCAL_ADDRS_NUM_OLD: - retval = sctp_getsockopt_local_addrs_num_old(sk, len, optval, - optlen); - break; - case SCTP_GET_PEER_ADDRS_OLD: - retval = sctp_getsockopt_peer_addrs_old(sk, len, optval, - optlen); - break; - case SCTP_GET_LOCAL_ADDRS_OLD: - retval = sctp_getsockopt_local_addrs_old(sk, len, optval, - optlen); - break; case SCTP_GET_PEER_ADDRS: retval = sctp_getsockopt_peer_addrs(sk, len, optval, optlen); -- cgit v1.2.2 From 245cba7e55929dc2b10b7d915bfba0168eeeed17 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:53:58 -0500 Subject: sctp: Remove useless last_time_used variable The transport last_time_used variable is rather useless. It was only used when determining if CWND needs to be updated due to idle transport. However, idle transport detection was based on a Heartbeat timer and last_time_used was not incremented when sending Heartbeats. As a result the check for cwnd reduction was always true. We can get rid of the variable and just base our cwnd manipulation on the HB timer (like the code comment sais). We also have to call into the cwnd manipulation function regardless of whether HBs are enabled or not. That way we will detect idle transports if the user has disabled Heartbeats. Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 2 -- net/sctp/sm_statefuns.c | 5 +++-- net/sctp/transport.c | 7 ++----- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index 5cbda8f1ddfd..9e8e0ea844be 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -557,8 +557,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct timer_list *timer; unsigned long timeout; - tp->last_time_used = jiffies; - /* Restart the AUTOCLOSE timer when sending data. */ if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 16a603527df2..1ef9de9bbae9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -996,14 +996,15 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, sctp_sf_heartbeat(ep, asoc, type, arg, commands)) return SCTP_DISPOSITION_NOMEM; + /* Set transport error counter and association error counter * when sending heartbeat. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, - SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, SCTP_TRANSPORT(transport)); } + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, + SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, SCTP_TRANSPORT(transport)); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3b141bb32faf..2df29cbdaf5a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -83,7 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->fast_recovery = 0; peer->last_time_heard = jiffies; - peer->last_time_used = jiffies; peer->last_time_ecne_reduced = jiffies; peer->init_sent_count = 0; @@ -565,10 +564,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * to be done every RTO interval, we do it every hearbeat * interval. */ - if (time_after(jiffies, transport->last_time_used + - transport->rto)) - transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + transport->cwnd = max(transport->cwnd/2, + 4*transport->asoc->pathmtu); break; } -- cgit v1.2.2 From 46d5a808558181e03a4760d2188cc9879445738a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:54:00 -0500 Subject: sctp: Update max.burst implementation Current implementation of max.burst ends up limiting new data during cwnd decay period. The decay is happening becuase the connection is idle and we are allowed to fill the congestion window. The point of max.burst is to limit micro-bursts in response to large acks. This still happens, as max.burst is still applied to each transmit opportunity. It will also apply if a very large send is made (greater then allowed by burst). Tested-by: Florian Niederbacher Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 1 + net/sctp/output.c | 23 ----------------------- net/sctp/outqueue.c | 15 +++++++++++++++ net/sctp/transport.c | 38 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 23 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 37e982510bea..880dae2ca87b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -738,6 +738,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, peer->partial_bytes_acked = 0; peer->flight_size = 0; + peer->burst_limited = 0; /* Set the transport's RTO.initial value */ peer->rto = asoc->rto_initial; diff --git a/net/sctp/output.c b/net/sctp/output.c index 9e8e0ea844be..b210d2077e28 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -615,7 +615,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, sctp_xmit_t retval = SCTP_XMIT_OK; size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; - __u32 max_burst_bytes; struct sctp_association *asoc = transport->asoc; struct sctp_outq *q = &asoc->outqueue; @@ -648,28 +647,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, } } - /* sctpimpguide-05 2.14.2 - * D) When the time comes for the sender to - * transmit new DATA chunks, the protocol parameter Max.Burst MUST - * first be applied to limit how many new DATA chunks may be sent. - * The limit is applied by adjusting cwnd as follows: - * if ((flightsize + Max.Burst * MTU) < cwnd) - * cwnd = flightsize + Max.Burst * MTU - */ - max_burst_bytes = asoc->max_burst * asoc->pathmtu; - if ((flight_size + max_burst_bytes) < transport->cwnd) { - transport->cwnd = flight_size + max_burst_bytes; - SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " - "transport: %p, cwnd: %d, " - "ssthresh: %d, flight_size: %d, " - "pba: %d\n", - __func__, transport, - transport->cwnd, - transport->ssthresh, - transport->flight_size, - transport->partial_bytes_acked); - } - /* RFC 2960 6.1 Transmission of DATA Chunks * * B) At any given time, the sender MUST NOT transmit new data diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 5732661c87d3..2f2377369e2b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -931,6 +931,14 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) goto sctp_flush_out; } + /* Apply Max.Burst limitation to the current transport in + * case it will be used for new data. We are going to + * rest it before we return, but we want to apply the limit + * to the currently queued data. + */ + if (transport) + sctp_transport_burst_limited(transport); + /* Finally, transmit new packets. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { /* RFC 2960 6.5 Every DATA chunk MUST carry a valid @@ -976,6 +984,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) packet = &transport->packet; sctp_packet_config(packet, vtag, asoc->peer.ecn_capable); + /* We've switched transports, so apply the + * Burst limit to the new transport. + */ + sctp_transport_burst_limited(transport); } SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ", @@ -1070,6 +1082,9 @@ sctp_flush_out: packet = &t->packet; if (!sctp_packet_empty(packet)) error = sctp_packet_transmit(packet); + + /* Clear the burst limited state, if any */ + sctp_transport_burst_reset(t); } return error; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2df29cbdaf5a..9a6cb22d129f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -576,6 +576,43 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, transport->cwnd, transport->ssthresh); } +/* Apply Max.Burst limit to the congestion window: + * sctpimpguide-05 2.14.2 + * D) When the time comes for the sender to + * transmit new DATA chunks, the protocol parameter Max.Burst MUST + * first be applied to limit how many new DATA chunks may be sent. + * The limit is applied by adjusting cwnd as follows: + * if ((flightsize+ Max.Burst * MTU) < cwnd) + * cwnd = flightsize + Max.Burst * MTU + */ + +void sctp_transport_burst_limited(struct sctp_transport *t) +{ + struct sctp_association *asoc = t->asoc; + u32 old_cwnd = t->cwnd; + u32 max_burst_bytes; + + if (t->burst_limited) + return; + + max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); + if (max_burst_bytes < old_cwnd) { + t->cwnd = max_burst_bytes; + t->burst_limited = old_cwnd; + } +} + +/* Restore the old cwnd congestion window, after the burst had it's + * desired effect. + */ +void sctp_transport_burst_reset(struct sctp_transport *t) +{ + if (t->burst_limited) { + t->cwnd = t->burst_limited; + t->burst_limited = 0; + } +} + /* What is the next timeout value for this transport? */ unsigned long sctp_transport_timeout(struct sctp_transport *t) { @@ -598,6 +635,7 @@ void sctp_transport_reset(struct sctp_transport *t) * (see Section 6.2.1) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); + t->burst_limited = 0; t->ssthresh = asoc->peer.i.a_rwnd; t->last_rto = t->rto = asoc->rto_initial; t->rtt = 0; -- cgit v1.2.2 From d8dd15781dd621c5ceab79083f4c5112787863f5 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 23 Nov 2009 15:54:00 -0500 Subject: sctp: Fix mis-ordering of user space data when multihoming in use Recently had a bug reported to me, in which the user was sending packets with a payload containing a sequence number. The packets were getting delivered in order according the chunk TSN values, but the sequence values in the payload were arriving out of order. At first I thought it must be an application error, but we eventually found it to be a problem on the transmit side in the sctp stack. The conditions for the error are that multihoming must be in use, and it helps if each transport has a different pmtu. The problem occurs in sctp_outq_flush. Basically we dequeue packets from the data queue, and attempt to append them to the orrered packet for a given transport. After we append a data chunk we add the trasport to the end of a list of transports to have their packets sent at the end of sctp_outq_flush. The problem occurs when a data chunks fills up a offered packet on a transport. The function that does the appending (sctp_packet_transmit_chunk), will try to call sctp_packet_transmit on the full packet, and then append the chunk to a new packet. This call to sctp_packet_transmit, sends that packet ahead of the others that may be queued in the transport_list in sctp_outq_flush. The result is that frames that were sent in one order from the user space sending application get re-ordered prior to tsn assignment in sctp_packet_transmit, resulting in mis-sequencing of data payloads, even though tsn ordering is correct. The fix is to change where we assign a tsn. By doing this earlier, we are then free to place chunks in packets, whatever way we see fit and the protocol will make sure to do all the appropriate re-ordering on receive as is needed. Signed-off-by: Neil Horman Reported-by: William Reich Signed-off-by: Vlad Yasevich --- net/sctp/output.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/output.c b/net/sctp/output.c index b210d2077e28..7c5589363433 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -429,23 +429,22 @@ int sctp_packet_transmit(struct sctp_packet *packet) list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { - if (!chunk->has_tsn) { - sctp_chunk_assign_ssn(chunk); - sctp_chunk_assign_tsn(chunk); - - /* 6.3.1 C4) When data is in flight and when allowed - * by rule C5, a new RTT measurement MUST be made each - * round trip. Furthermore, new RTT measurements - * SHOULD be made no more than once per round-trip - * for a given destination transport address. - */ + if (!chunk->resent) { + + /* 6.3.1 C4) When data is in flight and when allowed + * by rule C5, a new RTT measurement MUST be made each + * round trip. Furthermore, new RTT measurements + * SHOULD be made no more than once per round-trip + * for a given destination transport address. + */ if (!tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } - } else - chunk->resent = 1; + } + + chunk->resent = 1; has_data = 1; } @@ -722,6 +721,8 @@ static void sctp_packet_append_data(struct sctp_packet *packet, /* Has been accepted for transmission. */ if (!asoc->peer.prsctp_capable) chunk->msg->can_abandon = 0; + sctp_chunk_assign_tsn(chunk); + sctp_chunk_assign_ssn(chunk); } static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, -- cgit v1.2.2 From f6778aab6ccc4b510b4dcfa770d9949b696b4545 Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Mon, 23 Nov 2009 15:54:01 -0500 Subject: sctp: limit maximum autoclose setsockopt value To avoid overflowing the maximum timer interval when transforming the autoclose interval from seconds to jiffies, limit the maximum autoclose value to MAX_SCHEDULE_TIMEOUT/HZ. Signed-off-by: Andrei Pelinescu-Onciul Signed-off-by: Vlad Yasevich --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d2681a6bc6fa..71513b3926a5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2086,6 +2086,9 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, return -EINVAL; if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; + /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */ + if (sp->autoclose > (MAX_SCHEDULE_TIMEOUT / HZ) ) + sp->autoclose = MAX_SCHEDULE_TIMEOUT / HZ ; return 0; } -- cgit v1.2.2 From da85b7396f3b6cb3fea7d77091498bfa1051ef7c Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Mon, 23 Nov 2009 15:54:01 -0500 Subject: sctp: fix integer overflow when setting the autoclose timer When setting the autoclose timeout in jiffies there is a possible integer overflow if the value in seconds is very large (e.g. for 2^22 s with HZ=1024). The problem appears even on 64-bit due to the integer promotion rules. The fix is just a cast to unsigned long. Signed-off-by: Andrei Pelinescu-Onciul Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 880dae2ca87b..6e96f83570c9 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -167,7 +167,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - sp->autoclose * HZ; + (unsigned long)sp->autoclose * HZ; /* Initilizes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) -- cgit v1.2.2 From 4814326b59db0cfd18ac652626d955ad3f57fb0f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 23 Nov 2009 15:54:01 -0500 Subject: sctp: prevent too-fast association id reuse We use the idr subsystem and always ask for an id at or above 1. This results in a id reuse when one association is terminated while another is created. To prevent re-use, we keep track of the last id returned and ask for that id + 1 as a base for each query. We let the idr spin lock protect this base id as well. Signed-off-by: Vlad Yasevich --- net/sctp/associola.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 6e96f83570c9..df5abbff63e2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -63,6 +63,12 @@ static void sctp_assoc_bh_rcv(struct work_struct *work); static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc); +/* Keep track of the new idr low so that we don't re-use association id + * numbers too fast. It is protected by they idr spin lock is in the + * range of 1 - INT_MAX. + */ +static u32 idr_low = 1; + /* 1st Level Abstractions. */ @@ -1553,7 +1559,12 @@ retry: spin_lock_bh(&sctp_assocs_id_lock); error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, - 1, &assoc_id); + idr_low, &assoc_id); + if (!error) { + idr_low = assoc_id + 1; + if (idr_low == INT_MAX) + idr_low = 1; + } spin_unlock_bh(&sctp_assocs_id_lock); if (error == -EAGAIN) goto retry; -- cgit v1.2.2 From f64f9e719261a87818dd192a3a2352e5b20fbd0f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 29 Nov 2009 16:55:45 -0800 Subject: net: Move && and || to end of previous line Not including net/atm/ Compiled tested x86 allyesconfig only Added a > 80 column line or two, which I ignored. Existing checkpatch plaints willfully, cheerfully ignored. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 4 ++-- net/sctp/socket.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e231a9831016..229690f02a1d 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -191,8 +191,8 @@ static inline int sctp_cacc_skip(struct sctp_transport *primary, __u32 tsn) { if (primary->cacc.changeover_active && - (sctp_cacc_skip_3_1(primary, transport, count_of_newacks) - || sctp_cacc_skip_3_2(primary, tsn))) + (sctp_cacc_skip_3_1(primary, transport, count_of_newacks) || + sctp_cacc_skip_3_2(primary, tsn))) return 1; return 0; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 71513b3926a5..5fa9ac52e137 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2356,8 +2356,8 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, pmtud_change == SPP_PMTUD || sackdelay_change == SPP_SACKDELAY || params.spp_sackdelay > 500 || - (params.spp_pathmtu - && params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) + (params.spp_pathmtu && + params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) return -EINVAL; /* If an address other than INADDR_ANY is specified, and -- cgit v1.2.2 From 810c07194f6ef541625e65b53392e9f605611a1a Mon Sep 17 00:00:00 2001 From: Andrei Pelinescu-Onciul Date: Wed, 2 Dec 2009 01:16:49 -0800 Subject: sctp: fix sctp_setsockopt_autoclose compile warning Fix the following warning, when building on 64 bits: net/sctp/socket.c:2091: warning: large integer implicitly truncated to unsigned type Signed-off-by: Andrei Pelinescu-Onciul Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5fa9ac52e137..89ab66e54740 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2088,7 +2088,7 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, return -EFAULT; /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */ if (sp->autoclose > (MAX_SCHEDULE_TIMEOUT / HZ) ) - sp->autoclose = MAX_SCHEDULE_TIMEOUT / HZ ; + sp->autoclose = (__u32)(MAX_SCHEDULE_TIMEOUT / HZ) ; return 0; } -- cgit v1.2.2 From 94e2bd688820aed72b4f8092f88c2ccf64e003de Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 16 Oct 2009 15:20:49 +0200 Subject: tree-wide: fix some typos and punctuation in comments fix some typos and punctuation in comments Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Jiri Kosina --- net/sctp/sm_statefuns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index c8fae1983dd1..ba2f66d5f0cd 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3569,7 +3569,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, * To do this properly, we'll set the destination address of the chunk * and at the transmit time, will try look up the transport to use. * Since ASCONFs may be bundled, the correct transport may not be - * created untill we process the entire packet, thus this workaround. + * created until we process the entire packet, thus this workaround. */ asconf_ack->dest = chunk->source; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); -- cgit v1.2.2 From af901ca181d92aac3a7dc265144a9081a86d8f39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= Date: Sat, 14 Nov 2009 13:09:05 -0200 Subject: tree-wide: fix assorted typos all over the place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That is "success", "unknown", "through", "performance", "[re|un]mapping" , "access", "default", "reasonable", "[con]currently", "temperature" , "channel", "[un]used", "application", "example","hierarchy", "therefore" , "[over|under]flow", "contiguous", "threshold", "enough" and others. Signed-off-by: André Goddard Rosa Signed-off-by: Jiri Kosina --- net/sctp/sm_sideeffect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d4919556..29d8501bf156 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -719,7 +719,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, if (sctp_style(sk, TCP)) { /* Change the sk->sk_state of a TCP-style socket that has - * sucessfully completed a connect() call. + * successfully completed a connect() call. */ if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) sk->sk_state = SCTP_SS_ESTABLISHED; -- cgit v1.2.2 From a252e749f1ae17e43ccc5824f7b1b5854417c98b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 8 Dec 2009 12:51:22 -0800 Subject: sctp: fix compile error due to sysctl mismerge I messed up the merge in d7fc02c7bae7b1cf69269992cf880a43a350cdaa, where the conflict in question wasn't just about CTL_UNNUMBERED being removed, but the 'strategy' field is too (sysctl handling is now done through the /proc interface, with no duplicate protocols for reading the data). Reported-by: Larry Finger Reported-by: Ingo Molnar Signed-off-by: Linus Torvalds --- net/sctp/sysctl.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 419e1e962c55..832590bbe0c0 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -248,7 +248,6 @@ static ctl_table sctp_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, .extra1 = &one, .extra2 = &rwnd_scale_max, }, -- cgit v1.2.2 From 8ffd32083c849dcf476e56e6c5f728c80797ecdd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 21 Dec 2009 14:25:06 +0000 Subject: net/sctp/socket.c: squish warning net/sctp/socket.c: In function 'sctp_setsockopt_autoclose': net/sctp/socket.c:2090: warning: comparison is always false due to limited range of data type Cc: Andrei Pelinescu-Onciul Cc: Vlad Yasevich Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 89ab66e54740..67fdac9d2d33 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2087,8 +2087,7 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */ - if (sp->autoclose > (MAX_SCHEDULE_TIMEOUT / HZ) ) - sp->autoclose = (__u32)(MAX_SCHEDULE_TIMEOUT / HZ) ; + sp->autoclose = min_t(long, sp->autoclose, MAX_SCHEDULE_TIMEOUT / HZ); return 0; } -- cgit v1.2.2 From 09cb47a2c68f9596927bc05dab0453edb35cd32d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 21 Jan 2010 02:43:20 -0800 Subject: net/sctp: Eliminate useless code The variable newinet is initialized twice to the same (side effect-free) expression. Drop one initialization. A simplified version of the semantic match that finds this problem is: (http://coccinelle.lip6.fr/) // @forall@ idexpression *x; identifier f!=ERR_PTR; @@ x = f(...) ... when != x ( x = f(...,<+...x...+>,...) | * x = f(...) ) // Signed-off-by: Julia Lawall Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 67fdac9d2d33..f6d1e59c4151 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6359,7 +6359,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc) { struct inet_sock *inet = inet_sk(sk); - struct inet_sock *newinet = inet_sk(newsk); + struct inet_sock *newinet; newsk->sk_type = sk->sk_type; newsk->sk_bound_dev_if = sk->sk_bound_dev_if; -- cgit v1.2.2 From 5833929cc2ad2b3064b4fac8c44e293972d240d8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 22 Jan 2010 10:17:26 +0000 Subject: net: constify MIB name tables Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/sctp/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index d093cbfeaac4..a5ac6e0a8d9c 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -40,7 +40,7 @@ #include #include /* for snmp_fold_field */ -static struct snmp_mib sctp_snmp_list[] = { +static const struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpCurrEstab", SCTP_MIB_CURRESTAB), SNMP_MIB_ITEM("SctpActiveEstabs", SCTP_MIB_ACTIVEESTABS), SNMP_MIB_ITEM("SctpPassiveEstabs", SCTP_MIB_PASSIVEESTABS), -- cgit v1.2.2 From 3ad2f3fbb961429d2aa627465ae4829758bc7e07 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 3 Feb 2010 08:01:28 +0800 Subject: tree-wide: Assorted spelling fixes In particular, several occurances of funny versions of 'success', 'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address', 'beginning', 'desirable', 'separate' and 'necessary' are fixed. Signed-off-by: Daniel Mack Cc: Joe Perches Cc: Junio C Hamano Signed-off-by: Jiri Kosina --- net/sctp/sm_sideeffect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 4e4ca65cd320..500886bda9b4 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -475,7 +475,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * used to provide an upper bound to this doubling operation. * * Special Case: the first HB doesn't trigger exponential backoff. - * The first unacknowleged HB triggers it. We do this with a flag + * The first unacknowledged HB triggers it. We do this with a flag * that indicates that we have an outstanding HB. */ if (!is_hb || transport->hb_sent) { -- cgit v1.2.2 From 7d720c3e4f0c4fc152a6bf17e24244a3c85412d2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 16 Feb 2010 15:20:26 +0000 Subject: percpu: add __percpu sparse annotations to net Add __percpu sparse annotations to net. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. The macro and type tricks around snmp stats make things a bit interesting. DEFINE/DECLARE_SNMP_STAT() macros mark the target field as __percpu and SNMP_UPD_PO_STATS() macro is updated accordingly. All snmp_mib_*() users which used to cast the argument to (void **) are updated to cast it to (void __percpu **). Signed-off-by: Tejun Heo Acked-by: David S. Miller Cc: Patrick McHardy Cc: Arnaldo Carvalho de Melo Cc: Vlad Yasevich Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/sctp/proc.c | 2 +- net/sctp/protocol.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index a5ac6e0a8d9c..784bcc9a979d 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -83,7 +83,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field((void **)sctp_statistics, + snmp_fold_field((void __percpu **)sctp_statistics, sctp_snmp_list[i].entry)); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a3c8988758b1..9687177b026b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -996,12 +996,13 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) static inline int init_sctp_mibs(void) { - return snmp_mib_init((void**)sctp_statistics, sizeof(struct sctp_mib)); + return snmp_mib_init((void __percpu **)sctp_statistics, + sizeof(struct sctp_mib)); } static inline void cleanup_sctp_mibs(void) { - snmp_mib_free((void**)sctp_statistics); + snmp_mib_free((void __percpu **)sctp_statistics); } static void sctp_v4_pf_init(void) -- cgit v1.2.2 From dc4c2c31053ba5bf685d273cd62ecca406dddb2d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 12 Feb 2010 11:41:39 +0000 Subject: net: remove INIT_RCU_HEAD() usage call_rcu() will unconditionally reinitialize RCU head anyway. Signed-off-by: Alexey Dobriyan Acked-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/sctp/bind_addr.c | 1 - net/sctp/ipv6.c | 1 - net/sctp/protocol.c | 1 - 3 files changed, 3 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 13a6fba41077..bef133731683 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -186,7 +186,6 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, addr->valid = 1; INIT_LIST_HEAD(&addr->list); - INIT_RCU_HEAD(&addr->rcu); /* We always hold a socket lock when calling this function, * and that acts as a writer synchronizing lock. diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index cc50fbe99291..1d7ac70ba39f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -381,7 +381,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; INIT_LIST_HEAD(&addr->list); - INIT_RCU_HEAD(&addr->rcu); list_add_tail(&addr->list, addrlist); } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 9687177b026b..e771690f6d5d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -188,7 +188,6 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; INIT_LIST_HEAD(&addr->list); - INIT_RCU_HEAD(&addr->rcu); list_add_tail(&addr->list, addrlist); } } -- cgit v1.2.2 From 50b1a782f845140f4138f14a1ce8a4a6dd0cc82f Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:44 +0000 Subject: sctp: use limited socket backlog Make sctp adapt to the limited socket backlog change. Cc: Vlad Yasevich Cc: Sridhar Samudrala Signed-off-by: Zhu Yi Signed-off-by: David S. Miller --- net/sctp/input.c | 42 +++++++++++++++++++++++++++--------------- net/sctp/socket.c | 3 +++ 2 files changed, 30 insertions(+), 15 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/input.c b/net/sctp/input.c index c0c973e67add..cbc063665e6b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -75,7 +75,7 @@ static struct sctp_association *__sctp_lookup_association( const union sctp_addr *peer, struct sctp_transport **pt); -static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb); +static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ @@ -265,8 +265,13 @@ int sctp_rcv(struct sk_buff *skb) } if (sock_owned_by_user(sk)) { + if (sctp_add_backlog(sk, skb)) { + sctp_bh_unlock_sock(sk); + sctp_chunk_free(chunk); + skb = NULL; /* sctp_chunk_free already freed the skb */ + goto discard_release; + } SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); - sctp_add_backlog(sk, skb); } else { SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); @@ -336,8 +341,10 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) sctp_bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk_add_backlog(sk, skb); - backloged = 1; + if (sk_add_backlog_limited(sk, skb)) + sctp_chunk_free(chunk); + else + backloged = 1; } else sctp_inq_push(inqueue, chunk); @@ -362,22 +369,27 @@ done: return 0; } -static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) +static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_ep_common *rcvr = chunk->rcvr; + int ret; - /* Hold the assoc/ep while hanging on the backlog queue. - * This way, we know structures we need will not disappear from us - */ - if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_hold(sctp_assoc(rcvr)); - else if (SCTP_EP_TYPE_SOCKET == rcvr->type) - sctp_endpoint_hold(sctp_ep(rcvr)); - else - BUG(); + ret = sk_add_backlog_limited(sk, skb); + if (!ret) { + /* Hold the assoc/ep while hanging on the backlog queue. + * This way, we know structures we need will not disappear + * from us + */ + if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) + sctp_association_hold(sctp_assoc(rcvr)); + else if (SCTP_EP_TYPE_SOCKET == rcvr->type) + sctp_endpoint_hold(sctp_ep(rcvr)); + else + BUG(); + } + return ret; - sk_add_backlog(sk, skb); } /* Handle icmp frag needed error. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f6d1e59c4151..dfc5c127efd4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3720,6 +3720,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); percpu_counter_inc(&sctp_sockets_allocated); + /* Set socket backlog limit. */ + sk->sk_backlog.limit = sysctl_sctp_rmem[1]; + local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); -- cgit v1.2.2 From a3a858ff18a72a8d388e31ab0d98f7e944841a62 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:47 +0000 Subject: net: backlog functions rename sk_add_backlog -> __sk_add_backlog sk_add_backlog_limited -> sk_add_backlog Signed-off-by: Zhu Yi Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/input.c b/net/sctp/input.c index cbc063665e6b..3d74b264ea22 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -341,7 +341,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) sctp_bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - if (sk_add_backlog_limited(sk, skb)) + if (sk_add_backlog(sk, skb)) sctp_chunk_free(chunk); else backloged = 1; @@ -375,7 +375,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) struct sctp_ep_common *rcvr = chunk->rcvr; int ret; - ret = sk_add_backlog_limited(sk, skb); + ret = sk_add_backlog(sk, skb); if (!ret) { /* Hold the assoc/ep while hanging on the backlog queue. * This way, we know structures we need will not disappear -- cgit v1.2.2 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- net/sctp/auth.c | 1 + net/sctp/bind_addr.c | 1 + net/sctp/chunk.c | 1 + net/sctp/input.c | 1 + net/sctp/inqueue.c | 1 + net/sctp/ipv6.c | 1 + net/sctp/output.c | 1 + net/sctp/outqueue.c | 1 + net/sctp/primitive.c | 1 + net/sctp/protocol.c | 1 + net/sctp/sm_make_chunk.c | 1 + net/sctp/sm_sideeffect.c | 1 + net/sctp/sm_statefuns.c | 1 + net/sctp/socket.c | 1 + net/sctp/ssnmap.c | 1 + net/sctp/transport.c | 1 + net/sctp/tsnmap.c | 1 + net/sctp/ulpevent.c | 1 + net/sctp/ulpqueue.c | 1 + 19 files changed, 19 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 56935bbc1496..86366390038a 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -34,6 +34,7 @@ * be incorporated into the next SCTP release. */ +#include #include #include #include diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index bef133731683..faf71d179e46 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -43,6 +43,7 @@ */ #include +#include #include #include #include diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 8e4320040f05..3eab6db59a37 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/input.c b/net/sctp/input.c index 3d74b264ea22..2a570184e5a9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -53,6 +53,7 @@ #include #include #include /* For struct timeval */ +#include #include #include #include diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index bbf5dd2a97c4..ccb6dc48d15b 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -46,6 +46,7 @@ #include #include #include +#include /* Initialize an SCTP inqueue. */ void sctp_inq_init(struct sctp_inq *queue) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 1d7ac70ba39f..9fb5d37c37ad 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include diff --git a/net/sctp/output.c b/net/sctp/output.c index 7c5589363433..fad261d41ec2 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 229690f02a1d..abfc0b8dee74 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -50,6 +50,7 @@ #include /* For struct list_head */ #include #include +#include #include /* For skb_set_owner_w */ #include diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c index 8cb4f060bce6..534c7eae9d15 100644 --- a/net/sctp/primitive.c +++ b/net/sctp/primitive.c @@ -50,6 +50,7 @@ #include #include #include /* For struct timeval */ +#include #include #include #include diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e771690f6d5d..a56f98e82f92 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9e732916b671..17cb400ecd6a 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 500886bda9b4..4c5bed9af4e3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 47bc20d3a85b..abf601a1b847 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/socket.c b/net/sctp/socket.c index dfc5c127efd4..007e8baba089 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 737d330e5ffc..442ad4ed6315 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -37,6 +37,7 @@ */ #include +#include #include #include diff --git a/net/sctp/transport.c b/net/sctp/transport.c index b827d21dbe54..be4d63d5a5cc 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -48,6 +48,7 @@ * be incorporated into the next SCTP release. */ +#include #include #include #include diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 9bd64565021a..747d5412c463 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -42,6 +42,7 @@ * be incorporated into the next SCTP release. */ +#include #include #include #include diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 8b3560fd876d..aa72e89c3ee1 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -43,6 +43,7 @@ * be incorporated into the next SCTP release. */ +#include #include #include #include diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 7b23803343cc..3a448536f0b6 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -41,6 +41,7 @@ * be incorporated into the next SCTP release. */ +#include #include #include #include -- cgit v1.2.2 From 561b1733a465cf9677356b40c27653dd45f1ac56 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 28 Apr 2010 08:47:18 +0000 Subject: sctp: avoid irq lock inversion while call sk->sk_data_ready() sk->sk_data_ready() of sctp socket can be called from both BH and non-BH contexts, but the default sk->sk_data_ready(), sock_def_readable(), can not be used in this case. Therefore, we have to make a new function sctp_data_ready() to grab sk->sk_data_ready() with BH disabling. ========================================================= [ INFO: possible irq lock inversion dependency detected ] 2.6.33-rc6 #129 --------------------------------------------------------- sctp_darn/1517 just changed the state of lock: (clock-AF_INET){++.?..}, at: [] sock_def_readable+0x20/0x80 but this lock took another, SOFTIRQ-unsafe lock in the past: (slock-AF_INET){+.-...} and interrupts could create inverse lock ordering between them. other info that might help us debug this: 1 lock held by sctp_darn/1517: #0: (sk_lock-AF_INET){+.+.+.}, at: [] sctp_sendmsg+0x23d/0xc00 [sctp] Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 1 + net/sctp/socket.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 905fda582b92..7ec09ba03a1c 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -144,6 +144,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Use SCTP specific send buffer space queues. */ ep->sndbuf_policy = sctp_sndbuf_policy; + sk->sk_data_ready = sctp_data_ready; sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 007e8baba089..efa2bc3f0028 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6189,6 +6189,16 @@ do_nonblock: goto out; } +void sctp_data_ready(struct sock *sk, int len) +{ + read_lock_bh(&sk->sk_callback_lock); + if (sk_has_sleeper(sk)) + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | + POLLRDNORM | POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + read_unlock_bh(&sk->sk_callback_lock); +} + /* If socket sndbuf has changed, wake up all per association waiters. */ void sctp_write_space(struct sock *sk) { -- cgit v1.2.2 From 0c42749cffbb4a06be86c5e5db6c7ebad548781f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 28 Apr 2010 08:47:19 +0000 Subject: sctp: fix potential reference of a freed pointer When sctp attempts to update an assocition, it removes any addresses that were not in the updated INITs. However, the loop may attempt to refrence a transport with address after removing it. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index df5abbff63e2..99c93ee98ad9 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1194,8 +1194,10 @@ void sctp_assoc_update(struct sctp_association *asoc, /* Remove any peer addresses not present in the new association. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { trans = list_entry(pos, struct sctp_transport, transports); - if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr)) - sctp_assoc_del_peer(asoc, &trans->ipaddr); + if (!sctp_assoc_lookup_paddr(new, &trans->ipaddr)) { + sctp_assoc_rm_peer(asoc, trans); + continue; + } if (asoc->state >= SCTP_STATE_ESTABLISHED) sctp_transport_reset(trans); -- cgit v1.2.2 From 81419d862db743fe4450a021893f24bab4698c1d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 28 Apr 2010 08:47:20 +0000 Subject: sctp: per_cpu variables should be in bh_disabled section Since the change of the atomics to percpu variables, we now have to disable BH in process context when touching percpu variables. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index efa2bc3f0028..44a1ab03a3f0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3719,12 +3719,12 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); - percpu_counter_inc(&sctp_sockets_allocated); /* Set socket backlog limit. */ sk->sk_backlog.limit = sysctl_sctp_rmem[1]; local_bh_disable(); + percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); @@ -3741,8 +3741,8 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - percpu_counter_dec(&sctp_sockets_allocated); local_bh_disable(); + percpu_counter_dec(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); } -- cgit v1.2.2 From a8170c35e738d62e9919ce5b109cf4ed66e95bde Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 28 Apr 2010 08:47:21 +0000 Subject: sctp: fix to calc the INIT/INIT-ACK chunk length correctly is set When calculating the INIT/INIT-ACK chunk length, we should not only account the length of parameters, but also the parameters zero padding length, such as AUTH HMACS parameter and CHUNKS parameter. Without the parameters zero padding length we may get following oops. skb_over_panic: text:ce2068d2 len:130 put:6 head:cac3fe00 data:cac3fe00 tail:0xcac3fe82 end:0xcac3fe80 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:127! invalid opcode: 0000 [#2] SMP last sysfs file: /sys/module/aes_generic/initstate Modules linked in: authenc ...... Pid: 4102, comm: sctp_darn Tainted: G D 2.6.34-rc2 #6 EIP: 0060:[] EFLAGS: 00010282 CPU: 0 EIP is at skb_over_panic+0x37/0x3e EAX: 00000078 EBX: c07c024b ECX: c07c02b9 EDX: cb607b78 ESI: 00000000 EDI: cac3fe7a EBP: 00000002 ESP: cb607b74 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process sctp_darn (pid: 4102, ti=cb607000 task=cabdc990 task.ti=cb607000) Stack: c07c02b9 ce2068d2 00000082 00000006 cac3fe00 cac3fe00 cac3fe82 cac3fe80 <0> c07c024b cac3fe7c cac3fe7a c0608dec ca986e80 ce2068d2 00000006 0000007a <0> cb8120ca ca986e80 cb812000 00000003 cb8120c4 ce208a25 cb8120ca cadd9400 Call Trace: [] ? sctp_addto_chunk+0x45/0x85 [sctp] [] ? skb_put+0x2e/0x32 [] ? sctp_addto_chunk+0x45/0x85 [sctp] [] ? sctp_make_init+0x279/0x28c [sctp] [] ? apic_timer_interrupt+0x2a/0x30 [] ? sctp_sf_do_prm_asoc+0x2b/0x7b [sctp] [] ? sctp_do_sm+0xa0/0x14a [sctp] [] ? sctp_pname+0x0/0x14 [sctp] [] ? sctp_primitive_ASSOCIATE+0x2b/0x31 [sctp] [] ? sctp_sendmsg+0x7a0/0x9eb [sctp] [] ? inet_sendmsg+0x3b/0x43 [] ? task_tick_fair+0x2d/0xd9 [] ? sock_sendmsg+0xa7/0xc1 [] ? smp_apic_timer_interrupt+0x6b/0x75 [] ? dequeue_task_fair+0x34/0x19b [] ? sched_clock_local+0x17/0x11e [] ? _copy_from_user+0x2b/0x10c [] ? verify_iovec+0x3c/0x6a [] ? sys_sendmsg+0x186/0x1e2 [] ? __wake_up_common+0x34/0x5b [] ? __wake_up+0x2c/0x3b [] ? tty_wakeup+0x43/0x47 [] ? remove_wait_queue+0x16/0x24 [] ? n_tty_read+0x5b8/0x65e [] ? default_wake_function+0x0/0x8 [] ? sys_socketcall+0x17f/0x1cd [] ? sysenter_do_call+0x12/0x22 Code: 0f 45 de 53 ff b0 98 00 00 00 ff b0 94 ...... EIP: [] skb_over_panic+0x37/0x3e SS:ESP 0068:cb607b74 To reproduce: # modprobe sctp # echo 1 > /proc/sys/net/sctp/addip_enable # echo 1 > /proc/sys/net/sctp/auth_enable # sctp_test -H 3ffe:501:ffff:100:20c:29ff:fe4d:f37e -P 800 -l # sctp_darn -H 3ffe:501:ffff:100:20c:29ff:fe4d:f37e -P 900 -h 192.168.0.21 -p 800 -I -s -t sctp_darn ready to send... 3ffe:501:ffff:100:20c:29ff:fe4d:f37e:900-192.168.0.21:800 Interactive mode> bindx-add=192.168.0.21 3ffe:501:ffff:100:20c:29ff:fe4d:f37e:900-192.168.0.21:800 Interactive mode> bindx-add=192.168.1.21 3ffe:501:ffff:100:20c:29ff:fe4d:f37e:900-192.168.0.21:800 Interactive mode> snd=10 ------------------------------------------------------------------ eth0 has addresses: 3ffe:501:ffff:100:20c:29ff:fe4d:f37e and 192.168.0.21 eth1 has addresses: 192.168.1.21 ------------------------------------------------------------------ Reported-by: George Cheimonidis Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 17cb400ecd6a..f6fc5c1a4078 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -208,7 +208,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sp = sctp_sk(asoc->base.sk); num_types = sp->pf->supported_addrs(sp, types); - chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types); + chunksize = sizeof(init) + addrs_len; + chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); if (sctp_prsctp_enable) @@ -238,14 +239,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* Add HMACS parameter length if any were defined */ auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += ntohs(auth_hmacs->length); + chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += ntohs(auth_chunks->length); + chunksize += WORD_ROUND(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -255,7 +256,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += sizeof(sctp_supported_ext_param_t) + num_ext; + chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -397,13 +399,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += ntohs(auth_hmacs->length); + chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += ntohs(auth_chunks->length); + chunksize += WORD_ROUND(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -412,7 +414,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += sizeof(sctp_supported_ext_param_t) + num_ext; + chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize); -- cgit v1.2.2 From c0786693404cffd80ca3cb6e75ee7b35186b2825 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 28 Apr 2010 08:47:22 +0000 Subject: sctp: Fix oops when sending queued ASCONF chunks When we finish processing ASCONF_ACK chunk, we try to send the next queued ASCONF. This action runs the sctp state machine recursively and it's not prepared to do so. kernel BUG at kernel/timer.c:790! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/module/ipv6/initstate Modules linked in: sha256_generic sctp libcrc32c ipv6 dm_multipath uinput 8139too i2c_piix4 8139cp mii i2c_core pcspkr virtio_net joydev floppy virtio_blk virtio_pci [last unloaded: scsi_wait_scan] Pid: 0, comm: swapper Not tainted 2.6.34-rc4 #15 /Bochs EIP: 0060:[] EFLAGS: 00010286 CPU: 0 EIP is at add_timer+0xd/0x1b EAX: cecbab14 EBX: 000000f0 ECX: c0957b1c EDX: 03595cf4 ESI: cecba800 EDI: cf276f00 EBP: c0957aa0 ESP: c0957aa0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process swapper (pid: 0, ti=c0956000 task=c0988ba0 task.ti=c0956000) Stack: c0957ae0 d1851214 c0ab62e4 c0ab5f26 0500ffff 00000004 00000005 00000004 <0> 00000000 d18694fd 00000004 1666b892 cecba800 cecba800 c0957b14 00000004 <0> c0957b94 d1851b11 ceda8b00 cecba800 cf276f00 00000001 c0957b14 000000d0 Call Trace: [] ? sctp_side_effects+0x607/0xdfc [sctp] [] ? sctp_do_sm+0x108/0x159 [sctp] [] ? sctp_pname+0x0/0x1d [sctp] [] ? sctp_primitive_ASCONF+0x36/0x3b [sctp] [] ? sctp_process_asconf_ack+0x2a4/0x2d3 [sctp] [] ? sctp_sf_do_asconf_ack+0x1dd/0x2b4 [sctp] [] ? sctp_do_sm+0xb8/0x159 [sctp] [] ? sctp_cname+0x0/0x52 [sctp] [] ? sctp_assoc_bh_rcv+0xac/0xe1 [sctp] [] ? sctp_inq_push+0x2d/0x30 [sctp] [] ? sctp_rcv+0x797/0x82e [sctp] Tested-by: Wei Yongjun Signed-off-by: Yuansong Qiao Signed-off-by: Shuaijun Zhang Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 15 --------------- net/sctp/sm_sideeffect.c | 26 ++++++++++++++++++++++++++ net/sctp/sm_statefuns.c | 8 +++++++- 3 files changed, 33 insertions(+), 16 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f6fc5c1a4078..0fd5b4c88358 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3318,21 +3318,6 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, sctp_chunk_free(asconf); asoc->addip_last_asconf = NULL; - /* Send the next asconf chunk from the addip chunk queue. */ - if (!list_empty(&asoc->addip_chunk_list)) { - struct list_head *entry = asoc->addip_chunk_list.next; - asconf = list_entry(entry, struct sctp_chunk, list); - - list_del_init(entry); - - /* Hold the chunk until an ASCONF_ACK is received. */ - sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(asoc, asconf)) - sctp_chunk_free(asconf); - else - asoc->addip_last_asconf = asconf; - } - return retval; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 4c5bed9af4e3..d5ae450b6f02 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -962,6 +962,29 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc, } +/* Sent the next ASCONF packet currently stored in the association. + * This happens after the ASCONF_ACK was succeffully processed. + */ +static void sctp_cmd_send_asconf(struct sctp_association *asoc) +{ + /* Send the next asconf chunk from the addip chunk + * queue. + */ + if (!list_empty(&asoc->addip_chunk_list)) { + struct list_head *entry = asoc->addip_chunk_list.next; + struct sctp_chunk *asconf = list_entry(entry, + struct sctp_chunk, list); + list_del_init(entry); + + /* Hold the chunk until an ASCONF_ACK is received. */ + sctp_chunk_hold(asconf); + if (sctp_primitive_ASCONF(asoc, asconf)) + sctp_chunk_free(asconf); + else + asoc->addip_last_asconf = asconf; + } +} + /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real @@ -1617,6 +1640,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, } error = sctp_cmd_send_msg(asoc, cmd->obj.msg); break; + case SCTP_CMD_SEND_NEXT_ASCONF: + sctp_cmd_send_asconf(asoc); + break; default: printk(KERN_WARNING "Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index abf601a1b847..24b2cd555637 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3676,8 +3676,14 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); if (!sctp_process_asconf_ack((struct sctp_association *)asoc, - asconf_ack)) + asconf_ack)) { + /* Successfully processed ASCONF_ACK. We can + * release the next asconf if we have one. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF, + SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; + } abort = sctp_make_abort(asoc, asconf_ack, sizeof(sctp_errhdr_t)); -- cgit v1.2.2 From 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 28 Apr 2010 10:30:59 +0000 Subject: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4) Ok, version 4 Change Notes: 1) Minor cleanups, from Vlads notes Summary: Hey- Recently, it was reported to me that the kernel could oops in the following way: <5> kernel BUG at net/core/skbuff.c:91! <5> invalid operand: 0000 [#1] <5> Modules linked in: sctp netconsole nls_utf8 autofs4 sunrpc iptable_filter ip_tables cpufreq_powersave parport_pc lp parport vmblock(U) vsock(U) vmci(U) vmxnet(U) vmmemctl(U) vmhgfs(U) acpiphp dm_mirror dm_mod button battery ac md5 ipv6 uhci_hcd ehci_hcd snd_ens1371 snd_rawmidi snd_seq_device snd_pcm_oss snd_mixer_oss snd_pcm snd_timer snd_page_alloc snd_ac97_codec snd soundcore pcnet32 mii floppy ext3 jbd ata_piix libata mptscsih mptsas mptspi mptscsi mptbase sd_mod scsi_mod <5> CPU: 0 <5> EIP: 0060:[] Not tainted VLI <5> EFLAGS: 00010216 (2.6.9-89.0.25.EL) <5> EIP is at skb_over_panic+0x1f/0x2d <5> eax: 0000002c ebx: c033f461 ecx: c0357d96 edx: c040fd44 <5> esi: c033f461 edi: df653280 ebp: 00000000 esp: c040fd40 <5> ds: 007b es: 007b ss: 0068 <5> Process swapper (pid: 0, threadinfo=c040f000 task=c0370be0) <5> Stack: c0357d96 e0c29478 00000084 00000004 c033f461 df653280 d7883180 e0c2947d <5> 00000000 00000080 df653490 00000004 de4f1ac0 de4f1ac0 00000004 df653490 <5> 00000001 e0c2877a 08000800 de4f1ac0 df653490 00000000 e0c29d2e 00000004 <5> Call Trace: <5> [] sctp_addto_chunk+0xb0/0x128 [sctp] <5> [] sctp_addto_chunk+0xb5/0x128 [sctp] <5> [] sctp_init_cause+0x3f/0x47 [sctp] <5> [] sctp_process_unk_param+0xac/0xb8 [sctp] <5> [] sctp_verify_init+0xcc/0x134 [sctp] <5> [] sctp_sf_do_5_1B_init+0x83/0x28e [sctp] <5> [] sctp_do_sm+0x41/0x77 [sctp] <5> [] cache_grow+0x140/0x233 <5> [] sctp_endpoint_bh_rcv+0xc5/0x108 [sctp] <5> [] sctp_inq_push+0xe/0x10 [sctp] <5> [] sctp_rcv+0x454/0x509 [sctp] <5> [] ipt_hook+0x17/0x1c [iptable_filter] <5> [] nf_iterate+0x40/0x81 <5> [] ip_local_deliver_finish+0x0/0x151 <5> [] ip_local_deliver_finish+0xc6/0x151 <5> [] nf_hook_slow+0x83/0xb5 <5> [] ip_local_deliver+0x1a2/0x1a9 <5> [] ip_local_deliver_finish+0x0/0x151 <5> [] ip_rcv+0x334/0x3b4 <5> [] netif_receive_skb+0x320/0x35b <5> [] init_stall_timer+0x67/0x6a [uhci_hcd] <5> [] process_backlog+0x6c/0xd9 <5> [] net_rx_action+0xfe/0x1f8 <5> [] __do_softirq+0x35/0x79 <5> [] handle_IRQ_event+0x0/0x4f <5> [] do_softirq+0x46/0x4d Its an skb_over_panic BUG halt that results from processing an init chunk in which too many of its variable length parameters are in some way malformed. The problem is in sctp_process_unk_param: if (NULL == *errp) *errp = sctp_make_op_error_space(asoc, chunk, ntohs(chunk->chunk_hdr->length)); if (*errp) { sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, WORD_ROUND(ntohs(param.p->length))); sctp_addto_chunk(*errp, WORD_ROUND(ntohs(param.p->length)), param.v); When we allocate an error chunk, we assume that the worst case scenario requires that we have chunk_hdr->length data allocated, which would be correct nominally, given that we call sctp_addto_chunk for the violating parameter. Unfortunately, we also, in sctp_init_cause insert a sctp_errhdr_t structure into the error chunk, so the worst case situation in which all parameters are in violation requires chunk_hdr->length+(sizeof(sctp_errhdr_t)*param_count) bytes of data. The result of this error is that a deliberately malformed packet sent to a listening host can cause a remote DOS, described in CVE-2010-1173: http://cve.mitre.org/cgi-bin/cvename.cgi?name=2010-1173 I've tested the below fix and confirmed that it fixes the issue. We move to a strategy whereby we allocate a fixed size error chunk and ignore errors we don't have space to report. Tested by me successfully Signed-off-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 5 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 0fd5b4c88358..30c1767186b8 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -108,7 +108,7 @@ static const struct sctp_paramhdr prsctp_param = { cpu_to_be16(sizeof(struct sctp_paramhdr)), }; -/* A helper to initialize to initialize an op error inside a +/* A helper to initialize an op error inside a * provided chunk, as most cause codes will be embedded inside an * abort chunk. */ @@ -125,6 +125,29 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); } +/* A helper to initialize an op error inside a + * provided chunk, as most cause codes will be embedded inside an + * abort chunk. Differs from sctp_init_cause in that it won't oops + * if there isn't enough space in the op error chunk + */ +int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code, + size_t paylen) +{ + sctp_errhdr_t err; + __u16 len; + + /* Cause code constants are now defined in network order. */ + err.cause = cause_code; + len = sizeof(sctp_errhdr_t) + paylen; + err.length = htons(len); + + if (skb_tailroom(chunk->skb) > len) + return -ENOSPC; + chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, + sizeof(sctp_errhdr_t), + &err); + return 0; +} /* 3.3.2 Initiation (INIT) (1) * * This chunk is used to initiate a SCTP association between two @@ -1132,6 +1155,24 @@ nodata: return retval; } +/* Create an Operation Error chunk of a fixed size, + * specifically, max(asoc->pathmtu, SCTP_DEFAULT_MAXSEGMENT) + * This is a helper function to allocate an error chunk for + * for those invalid parameter codes in which we may not want + * to report all the errors, if the incomming chunk is large + */ +static inline struct sctp_chunk *sctp_make_op_error_fixed( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk) +{ + size_t size = asoc ? asoc->pathmtu : 0; + + if (!size) + size = SCTP_DEFAULT_MAXSEGMENT; + + return sctp_make_op_error_space(asoc, chunk, size); +} + /* Create an Operation Error chunk. */ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, @@ -1374,6 +1415,18 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) return target; } +/* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient + * space in the chunk + */ +void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, + int len, const void *data) +{ + if (skb_tailroom(chunk->skb) > len) + return sctp_addto_chunk(chunk, len, data); + else + return NULL; +} + /* Append bytes from user space to the end of a chunk. Will panic if * chunk is not big enough. * Returns a kernel err value. @@ -1977,13 +2030,12 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * returning multiple unknown parameters. */ if (NULL == *errp) - *errp = sctp_make_op_error_space(asoc, chunk, - ntohs(chunk->chunk_hdr->length)); + *errp = sctp_make_op_error_fixed(asoc, chunk); if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_UNKNOWN_PARAM, + sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, WORD_ROUND(ntohs(param.p->length))); - sctp_addto_chunk(*errp, + sctp_addto_chunk_fixed(*errp, WORD_ROUND(ntohs(param.p->length)), param.v); } else { -- cgit v1.2.2 From 50b5d6ad63821cea324a5a7a19854d4de1a0a819 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 6 May 2010 00:56:07 -0700 Subject: sctp: Fix a race between ICMP protocol unreachable and connect() ICMP protocol unreachable handling completely disregarded the fact that the user may have locked the socket. It proceeded to destroy the association, even though the user may have held the lock and had a ref on the association. This resulted in the following: Attempt to release alive inet socket f6afcc00 ========================= [ BUG: held lock freed! ] ------------------------- somenu/2672 is freeing memory f6afcc00-f6afcfff, with a lock still held there! (sk_lock-AF_INET){+.+.+.}, at: [] sctp_connect+0x13/0x4c 1 lock held by somenu/2672: #0: (sk_lock-AF_INET){+.+.+.}, at: [] sctp_connect+0x13/0x4c stack backtrace: Pid: 2672, comm: somenu Not tainted 2.6.32-telco #55 Call Trace: [] ? printk+0xf/0x11 [] debug_check_no_locks_freed+0xce/0xff [] kmem_cache_free+0x21/0x66 [] __sk_free+0x9d/0xab [] sk_free+0x1c/0x1e [] sctp_association_put+0x32/0x89 [] __sctp_connect+0x36d/0x3f4 [] ? sctp_connect+0x13/0x4c [] ? autoremove_wake_function+0x0/0x33 [] sctp_connect+0x31/0x4c [] inet_dgram_connect+0x4b/0x55 [] sys_connect+0x54/0x71 [] ? lock_release_non_nested+0x88/0x239 [] ? might_fault+0x42/0x7c [] ? might_fault+0x42/0x7c [] sys_socketcall+0x6d/0x178 [] ? trace_hardirqs_on_thunk+0xc/0x10 [] syscall_call+0x7/0xb This was because the sctp_wait_for_connect() would aqcure the socket lock and then proceed to release the last reference count on the association, thus cause the fully destruction path to finish freeing the socket. The simplest solution is to start a very short timer in case the socket is owned by user. When the timer expires, we can do some verification and be able to do the release properly. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/input.c | 22 ++++++++++++++++++---- net/sctp/sm_sideeffect.c | 35 +++++++++++++++++++++++++++++++++++ net/sctp/transport.c | 2 ++ 3 files changed, 55 insertions(+), 4 deletions(-) (limited to 'net/sctp') diff --git a/net/sctp/input.c b/net/sctp/input.c index 2a570184e5a9..ea2192444ce6 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -440,11 +440,25 @@ void sctp_icmp_proto_unreachable(struct sock *sk, { SCTP_DEBUG_PRINTK("%s\n", __func__); - sctp_do_sm(SCTP_EVENT_T_OTHER, - SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), - asoc->state, asoc->ep, asoc, t, - GFP_ATOMIC); + if (sock_owned_by_user(sk)) { + if (timer_pending(&t->proto_unreach_timer)) + return; + else { + if (!mod_timer(&t->proto_unreach_timer, + jiffies + (HZ/20))) + sctp_association_hold(asoc); + } + + } else { + if (timer_pending(&t->proto_unreach_timer) && + del_timer(&t->proto_unreach_timer)) + sctp_association_put(asoc); + sctp_do_sm(SCTP_EVENT_T_OTHER, + SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), + asoc->state, asoc->ep, asoc, t, + GFP_ATOMIC); + } } /* Common lookup code for icmp/icmpv6 error handler. */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d5ae450b6f02..eb1f42f45fdd 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -397,6 +397,41 @@ out_unlock: sctp_transport_put(transport); } +/* Handle the timeout of the ICMP protocol unreachable timer. Trigger + * the correct state machine transition that will close the association. + */ +void sctp_generate_proto_unreach_event(unsigned long data) +{ + struct sctp_transport *transport = (struct sctp_transport *) data; + struct sctp_association *asoc = transport->asoc; + + sctp_bh_lock_sock(asoc->base.sk); + if (sock_owned_by_user(asoc->base.sk)) { + SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + + /* Try again later. */ + if (!mod_timer(&transport->proto_unreach_timer, + jiffies + (HZ/20))) + sctp_association_hold(asoc); + goto out_unlock; + } + + /* Is this structure just waiting around for us to actually + * get destroyed? + */ + if (asoc->base.dead) + goto out_unlock; + + sctp_do_sm(SCTP_EVENT_T_OTHER, + SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), + asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); + +out_unlock: + sctp_bh_unlock_sock(asoc->base.sk); + sctp_association_put(asoc); +} + + /* Inject a SACK Timeout event into the state machine. */ static void sctp_generate_sack_event(unsigned long data) { diff --git a/net/sctp/transport.c b/net/sctp/transport.c index be4d63d5a5cc..4a368038d46f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -108,6 +108,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, (unsigned long)peer); setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, (unsigned long)peer); + setup_timer(&peer->proto_unreach_timer, + sctp_generate_proto_unreach_event, (unsigned long)peer); /* Initialize the 64-bit random nonce sent with heartbeat. */ get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); -- cgit v1.2.2 From 55fa0cfd7c3ac2ae34cac7dca2e3fbcfe661e6c3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 9 May 2010 16:56:07 +0000 Subject: sctp: delete active ICMP proto unreachable timer when free transport transport may be free before ICMP proto unreachable timer expire, so we should delete active ICMP proto unreachable timer when transport is going away. Signed-off-by: Wei Yongjun Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/transport.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/sctp') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4a368038d46f..165d54e07fcd 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -173,6 +173,10 @@ void sctp_transport_free(struct sctp_transport *transport) del_timer(&transport->T3_rtx_timer)) sctp_transport_put(transport); + /* Delete the ICMP proto unreachable timer if it's active. */ + if (timer_pending(&transport->proto_unreach_timer) && + del_timer(&transport->proto_unreach_timer)) + sctp_association_put(transport->asoc); sctp_transport_put(transport); } -- cgit v1.2.2