From 7351a22a3ae005422488139365e9a80f560c80b9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Nov 2007 20:33:46 -0800 Subject: [NETFILTER]: ip{,6}_queue: convert to seq_file interface I plan to kill ->get_info which means killing proc_net_create(). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_queue.c | 37 ++++++++++++++++++++----------------- net/ipv6/netfilter/ip6_queue.c | 37 ++++++++++++++++++++----------------- 2 files changed, 40 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 10a2ce09fd8e..14d64a383db1 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -607,15 +608,11 @@ static ctl_table ipq_root_table[] = { { .ctl_name = 0 } }; -#ifdef CONFIG_PROC_FS -static int -ipq_get_info(char *buffer, char **start, off_t offset, int length) +static int ip_queue_show(struct seq_file *m, void *v) { - int len; - read_lock_bh(&queue_lock); - len = sprintf(buffer, + seq_printf(m, "Peer PID : %d\n" "Copy mode : %hu\n" "Copy range : %u\n" @@ -632,16 +629,21 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) queue_user_dropped); read_unlock_bh(&queue_lock); + return 0; +} - *start = buffer + offset; - len -= offset; - if (len > length) - len = length; - else if (len < 0) - len = 0; - return len; +static int ip_queue_open(struct inode *inode, struct file *file) +{ + return single_open(file, ip_queue_show, NULL); } -#endif /* CONFIG_PROC_FS */ + +static const struct file_operations ip_queue_proc_fops = { + .open = ip_queue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; static struct nf_queue_handler nfqh = { .name = "ip_queue", @@ -661,10 +663,11 @@ static int __init ip_queue_init(void) goto cleanup_netlink_notifier; } - proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); - if (proc) + proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); + if (proc) { proc->owner = THIS_MODULE; - else { + proc->proc_fops = &ip_queue_proc_fops; + } else { printk(KERN_ERR "ip_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 6413a30d9f68..e273605eef85 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -596,15 +597,11 @@ static ctl_table ipq_root_table[] = { { .ctl_name = 0 } }; -#ifdef CONFIG_PROC_FS -static int -ipq_get_info(char *buffer, char **start, off_t offset, int length) +static int ip6_queue_show(struct seq_file *m, void *v) { - int len; - read_lock_bh(&queue_lock); - len = sprintf(buffer, + seq_printf(m, "Peer PID : %d\n" "Copy mode : %hu\n" "Copy range : %u\n" @@ -621,16 +618,21 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) queue_user_dropped); read_unlock_bh(&queue_lock); + return 0; +} - *start = buffer + offset; - len -= offset; - if (len > length) - len = length; - else if (len < 0) - len = 0; - return len; +static int ip6_queue_open(struct inode *inode, struct file *file) +{ + return single_open(file, ip6_queue_show, NULL); } -#endif /* CONFIG_PROC_FS */ + +static const struct file_operations ip6_queue_proc_fops = { + .open = ip6_queue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; static struct nf_queue_handler nfqh = { .name = "ip6_queue", @@ -650,10 +652,11 @@ static int __init ip6_queue_init(void) goto cleanup_netlink_notifier; } - proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); - if (proc) + proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); + if (proc) { proc->owner = THIS_MODULE; - else { + proc->proc_fops = &ip6_queue_proc_fops; + } else { printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } -- cgit v1.2.2 From ba5dc2756cc305c055dbb253b8fcdc459f0f8e73 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 5 Nov 2007 20:35:56 -0800 Subject: [NETFILTER]: Copyright/Email update Transfer all my copyright over to our company. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_connlimit.c | 5 +++-- net/netfilter/xt_time.c | 3 ++- net/netfilter/xt_u32.c | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 06cff1d13690..d7becf08a93a 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -4,7 +4,8 @@ * (c) 2000 Gerd Knorr * Nov 2002: Martin Bene : * only ignore TIME_WAIT or gone connections - * Copyright © Jan Engelhardt , 2007 + * (C) CC Computer Consultants GmbH, 2007 + * Contact: * * based on ... * @@ -306,7 +307,7 @@ static void __exit xt_connlimit_exit(void) module_init(xt_connlimit_init); module_exit(xt_connlimit_exit); -MODULE_AUTHOR("Jan Engelhardt "); +MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("netfilter xt_connlimit match module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connlimit"); diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index ef48bbd93573..ff44f86c24ce 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -1,6 +1,7 @@ /* * xt_time - * Copyright © Jan Engelhardt , 2007 + * Copyright © CC Computer Consultants GmbH, 2007 + * Contact: * * based on ipt_time by Fabrice MARIE * This is a module which is used for time matching diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index bec427915b30..af75b8c3f20b 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -2,7 +2,8 @@ * xt_u32 - kernel module to match u32 packet content * * Original author: Don Cohen - * © Jan Engelhardt , 2007 + * (C) CC Computer Consultants GmbH, 2007 + * Contact: */ #include @@ -129,7 +130,7 @@ static void __exit xt_u32_exit(void) module_init(xt_u32_init); module_exit(xt_u32_exit); -MODULE_AUTHOR("Jan Engelhardt "); +MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("netfilter u32 match module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_u32"); -- cgit v1.2.2 From 0795c65d9f8de2bf9a62ae1f56e928c6b5ed75ab Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 5 Nov 2007 20:42:54 -0800 Subject: [NETFILTER]: Clean up Makefile Sort matches and targets in the NF makefiles. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/Makefile | 20 ++++++++++---------- net/ipv6/netfilter/Makefile | 28 ++++++++++++++++------------ net/netfilter/Makefile | 14 +++++++------- 3 files changed, 33 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 409d273f6f82..7456833d6ade 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -41,27 +41,27 @@ obj-$(CONFIG_NF_NAT) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches +obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o +obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o +obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o -obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o -obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o +obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o -obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o # targets -obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o -obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o +obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o +obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o -obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o +obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o +obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o -obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o -obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o -obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o +obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o +obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 4513eab77397..e789ec44d23b 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -4,25 +4,29 @@ # Link order matters here. obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o -obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o -obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o -obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o -obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o -obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o -obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o -obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o -obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o -obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o -obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o -obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o -obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o # objects for l3 independent conntrack nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o + +# matches +obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o +obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o +obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o +obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o +obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o +obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o +obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o +obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o + +# targets +obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o +obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o +obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 93c58f973831..ad0e36ebea3d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -40,15 +40,15 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o -obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o +obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o -obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o -obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o @@ -59,22 +59,22 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o -obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o +obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o -obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o -obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o +obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o -obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o -- cgit v1.2.2 From d1332e0ab84479d941de5cf4a69c71dfd385a25e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 5 Nov 2007 20:43:30 -0800 Subject: [NETFILTER]: remove unneeded rcu_dereference() calls As noticed by Paul McKenney, the rcu_dereference calls in the init path of NAT modules are unneeded, remove them. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_amanda.c | 2 +- net/ipv4/netfilter/nf_nat_ftp.c | 2 +- net/ipv4/netfilter/nf_nat_h323.c | 18 +++++++++--------- net/ipv4/netfilter/nf_nat_irc.c | 2 +- net/ipv4/netfilter/nf_nat_pptp.c | 8 ++++---- net/ipv4/netfilter/nf_nat_sip.c | 4 ++-- net/ipv4/netfilter/nf_nat_tftp.c | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 35a5aa69cd92..c31b87668250 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -69,7 +69,7 @@ static void __exit nf_nat_amanda_fini(void) static int __init nf_nat_amanda_init(void) { - BUG_ON(rcu_dereference(nf_nat_amanda_hook)); + BUG_ON(nf_nat_amanda_hook != NULL); rcu_assign_pointer(nf_nat_amanda_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index e1a16d3ea4cb..a1d5d58a58bf 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -147,7 +147,7 @@ static void __exit nf_nat_ftp_fini(void) static int __init nf_nat_ftp_init(void) { - BUG_ON(rcu_dereference(nf_nat_ftp_hook)); + BUG_ON(nf_nat_ftp_hook != NULL); rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index a868c8c41328..93e18ef114f2 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -544,15 +544,15 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int __init init(void) { - BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL); - BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL); - BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL); - BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL); - BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL); - BUG_ON(rcu_dereference(nat_t120_hook) != NULL); - BUG_ON(rcu_dereference(nat_h245_hook) != NULL); - BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL); - BUG_ON(rcu_dereference(nat_q931_hook) != NULL); + BUG_ON(set_h245_addr_hook != NULL); + BUG_ON(set_h225_addr_hook != NULL); + BUG_ON(set_sig_addr_hook != NULL); + BUG_ON(set_ras_addr_hook != NULL); + BUG_ON(nat_rtp_rtcp_hook != NULL); + BUG_ON(nat_t120_hook != NULL); + BUG_ON(nat_h245_hook != NULL); + BUG_ON(nat_callforwarding_hook != NULL); + BUG_ON(nat_q931_hook != NULL); rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 766e2c16c6b9..fe6f9cef6c85 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -74,7 +74,7 @@ static void __exit nf_nat_irc_fini(void) static int __init nf_nat_irc_init(void) { - BUG_ON(rcu_dereference(nf_nat_irc_hook)); + BUG_ON(nf_nat_irc_hook != NULL); rcu_assign_pointer(nf_nat_irc_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index e1385a099079..6817e7995f35 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -281,16 +281,16 @@ static int __init nf_nat_helper_pptp_init(void) { nf_nat_need_gre(); - BUG_ON(rcu_dereference(nf_nat_pptp_hook_outbound)); + BUG_ON(nf_nat_pptp_hook_outbound != NULL); rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); - BUG_ON(rcu_dereference(nf_nat_pptp_hook_inbound)); + BUG_ON(nf_nat_pptp_hook_inbound != NULL); rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); - BUG_ON(rcu_dereference(nf_nat_pptp_hook_exp_gre)); + BUG_ON(nf_nat_pptp_hook_exp_gre != NULL); rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); - BUG_ON(rcu_dereference(nf_nat_pptp_hook_expectfn)); + BUG_ON(nf_nat_pptp_hook_expectfn != NULL); rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index ce9edbcc01e3..3ca98971a1e9 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -293,8 +293,8 @@ static void __exit nf_nat_sip_fini(void) static int __init nf_nat_sip_init(void) { - BUG_ON(rcu_dereference(nf_nat_sip_hook)); - BUG_ON(rcu_dereference(nf_nat_sdp_hook)); + BUG_ON(nf_nat_sip_hook != NULL); + BUG_ON(nf_nat_sdp_hook != NULL); rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp); return 0; diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 0ecec701cb44..1360a94766dd 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -43,7 +43,7 @@ static void __exit nf_nat_tftp_fini(void) static int __init nf_nat_tftp_init(void) { - BUG_ON(rcu_dereference(nf_nat_tftp_hook)); + BUG_ON(nf_nat_tftp_hook != NULL); rcu_assign_pointer(nf_nat_tftp_hook, help); return 0; } -- cgit v1.2.2 From 55d84acd366f08e11ff00139f32fe4394fb0016a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Nov 2007 20:44:06 -0800 Subject: [NETFILTER]: nf_sockopts list head cleanup Code is using knowledge that nf_sockopt_ops::list list_head is first field in structure by using casts. Switch to list_for_each_entry() itetators while I am at it. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_sockopt.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index aa2831587b82..2dfac3253569 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -23,14 +23,13 @@ static inline int overlap(int min1, int max1, int min2, int max2) /* Functions to register sockopt ranges (exclusive). */ int nf_register_sockopt(struct nf_sockopt_ops *reg) { - struct list_head *i; + struct nf_sockopt_ops *ops; int ret = 0; if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; - list_for_each(i, &nf_sockopts) { - struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; + list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == reg->pf && (overlap(ops->set_optmin, ops->set_optmax, reg->set_optmin, reg->set_optmax) @@ -65,7 +64,6 @@ EXPORT_SYMBOL(nf_unregister_sockopt); static int nf_sockopt(struct sock *sk, int pf, int val, char __user *opt, int *len, int get) { - struct list_head *i; struct nf_sockopt_ops *ops; int ret; @@ -75,8 +73,7 @@ static int nf_sockopt(struct sock *sk, int pf, int val, if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; - list_for_each(i, &nf_sockopts) { - ops = (struct nf_sockopt_ops *)i; + list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == pf) { if (!try_module_get(ops->owner)) goto out_nosup; @@ -124,7 +121,6 @@ EXPORT_SYMBOL(nf_getsockopt); static int compat_nf_sockopt(struct sock *sk, int pf, int val, char __user *opt, int *len, int get) { - struct list_head *i; struct nf_sockopt_ops *ops; int ret; @@ -135,8 +131,7 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; - list_for_each(i, &nf_sockopts) { - ops = (struct nf_sockopt_ops *)i; + list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == pf) { if (!try_module_get(ops->owner)) goto out_nosup; -- cgit v1.2.2 From e011ff48abc1b0ee97cde26b7700d2cca689e7c3 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Mon, 5 Nov 2007 20:59:47 -0800 Subject: [NETFILTER]: ebt_arp: fix --arp-gratuitous matching dependence on --arp-ip-{src,dst} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix --arp-gratuitous matching dependence on --arp-ip-{src,dst} Signed-off-by: Bart De Schuymer Signed-off-by: Lutz Preßler Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/netfilter/ebt_arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index 1a46952a56d9..18141392a9b4 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -34,7 +34,7 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in ah->ar_pro, EBT_ARP_PTYPE)) return EBT_NOMATCH; - if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) { + if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { __be32 saddr, daddr, *sap, *dap; if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP)) -- cgit v1.2.2 From 429f08e950a88cd826b203ea898c2f2d0f7db9de Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 5 Nov 2007 21:03:24 -0800 Subject: [IPV4]: Consolidate the ip cork destruction in ip_output.c The ip_push_pending_frames and ip_flush_pending_frames do the same things to flush the sock's cork. Move this into a separate function and save ~80 bytes from the .text Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e5f7dc2de303..fd99fbd685ea 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1183,6 +1183,17 @@ error: return err; } +static void ip_cork_release(struct inet_sock *inet) +{ + inet->cork.flags &= ~IPCORK_OPT; + kfree(inet->cork.opt); + inet->cork.opt = NULL; + if (inet->cork.rt) { + ip_rt_put(inet->cork.rt); + inet->cork.rt = NULL; + } +} + /* * Combined all pending IP fragments on the socket as one IP datagram * and push them out. @@ -1276,13 +1287,7 @@ int ip_push_pending_frames(struct sock *sk) } out: - inet->cork.flags &= ~IPCORK_OPT; - kfree(inet->cork.opt); - inet->cork.opt = NULL; - if (inet->cork.rt) { - ip_rt_put(inet->cork.rt); - inet->cork.rt = NULL; - } + ip_cork_release(inet); return err; error: @@ -1295,19 +1300,12 @@ error: */ void ip_flush_pending_frames(struct sock *sk) { - struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) kfree_skb(skb); - inet->cork.flags &= ~IPCORK_OPT; - kfree(inet->cork.opt); - inet->cork.opt = NULL; - if (inet->cork.rt) { - ip_rt_put(inet->cork.rt); - inet->cork.rt = NULL; - } + ip_cork_release(inet_sk(sk)); } -- cgit v1.2.2 From bf138862b162b6eaf3d7336f759f6e6485e481df Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 5 Nov 2007 21:04:31 -0800 Subject: [IPV6]: Consolidate the ip cork destruction in ip6_output.c The ip6_push_pending_frames and ip6_flush_pending_frames do the same things to flush the sock's cork. Move this into a separate function and save ~100 bytes from the .text Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 653fc0a8235b..86e1835ce4e4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1339,6 +1339,19 @@ error: return err; } +static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) +{ + inet->cork.flags &= ~IPCORK_OPT; + kfree(np->cork.opt); + np->cork.opt = NULL; + if (np->cork.rt) { + dst_release(&np->cork.rt->u.dst); + np->cork.rt = NULL; + inet->cork.flags &= ~IPCORK_ALLFRAG; + } + memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); +} + int ip6_push_pending_frames(struct sock *sk) { struct sk_buff *skb, *tmp_skb; @@ -1415,15 +1428,7 @@ int ip6_push_pending_frames(struct sock *sk) } out: - inet->cork.flags &= ~IPCORK_OPT; - kfree(np->cork.opt); - np->cork.opt = NULL; - if (np->cork.rt) { - dst_release(&np->cork.rt->u.dst); - np->cork.rt = NULL; - inet->cork.flags &= ~IPCORK_ALLFRAG; - } - memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); + ip6_cork_release(inet, np); return err; error: goto out; @@ -1431,8 +1436,6 @@ error: void ip6_flush_pending_frames(struct sock *sk) { - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { @@ -1442,14 +1445,5 @@ void ip6_flush_pending_frames(struct sock *sk) kfree_skb(skb); } - inet->cork.flags &= ~IPCORK_OPT; - - kfree(np->cork.opt); - np->cork.opt = NULL; - if (np->cork.rt) { - dst_release(&np->cork.rt->u.dst); - np->cork.rt = NULL; - inet->cork.flags &= ~IPCORK_ALLFRAG; - } - memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); + ip6_cork_release(inet_sk(sk), inet6_sk(sk)); } -- cgit v1.2.2 From 3f192b5c584b8ecddc6069717aaf36d8fa244713 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Nov 2007 21:28:13 -0800 Subject: [NET]: Remove /proc/net/stat/*_arp_cache upon module removal neigh_table_init_no_netlink() creates them, but they aren't removed anywhere. Steps to reproduce: modprobe clip rmmod clip cat /proc/net/stat/clip_arp_cache BUG: unable to handle kernel paging request at virtual address f89d7758 printing eip: c05a99da *pdpt = 0000000000004001 *pde = 0000000004408067 *pte = 0000000000000000 Oops: 0000 [#1] PREEMPT SMP Modules linked in: atm af_packet ipv6 binfmt_misc sbs sbshc fan dock battery backlight ac power_supply parport loop rtc_cmos rtc_core rtc_lib serio_raw button k8temp hwmon amd_rng sr_mod cdrom shpchp pci_hotplug ehci_hcd ohci_hcd uhci_hcd usbcore Pid: 2082, comm: cat Not tainted (2.6.24-rc1-b1d08ac064268d0ae2281e98bf5e82627e0f0c56-bloat #4) EIP: 0060:[] EFLAGS: 00210256 CPU: 0 EIP is at neigh_stat_seq_next+0x26/0x3f EAX: 00000001 EBX: f89d7600 ECX: c587bf40 EDX: 00000000 ESI: 00000000 EDI: 00000001 EBP: 00000400 ESP: c587bf1c DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process cat (pid: 2082, ti=c587b000 task=c5984e10 task.ti=c587b000) Stack: c06228cc c5313790 c049e5c0 0804f000 c45a7b00 c53137b0 00000000 00000000 00000082 00000001 00000000 00000000 00000000 fffffffb c58d6780 c049e437 c45a7b00 c04b1f93 c587bfa0 00000400 0804f000 00000400 0804f000 c04b1f2f Call Trace: [] seq_read+0x189/0x281 [] seq_read+0x0/0x281 [] proc_reg_read+0x64/0x77 [] proc_reg_read+0x0/0x77 [] vfs_read+0x80/0xd1 [] sys_read+0x41/0x67 [] sysenter_past_esp+0x6b/0xc1 ======================= Code: e9 ec 8d 05 00 56 8b 11 53 8b 40 70 8b 58 3c eb 29 0f a3 15 80 91 7b c0 19 c0 85 c0 8d 42 01 74 17 89 c6 c1 fe 1f 89 01 89 71 04 <8b> 83 58 01 00 00 f7 d0 8b 04 90 eb 09 89 c2 83 fa 01 7e d2 31 EIP: [] neigh_stat_seq_next+0x26/0x3f SS:ESP 0068:c587bf1c Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 05979e356963..29b8ee4e35d6 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1435,6 +1435,8 @@ int neigh_table_clear(struct neigh_table *tbl) kfree(tbl->phash_buckets); tbl->phash_buckets = NULL; + remove_proc_entry(tbl->id, init_net.proc_net_stat); + free_percpu(tbl->stats); tbl->stats = NULL; -- cgit v1.2.2 From 7a0ff716c2282f4b8d89c65850a4f17399628154 Mon Sep 17 00:00:00 2001 From: Mitsuru Chinen Date: Mon, 5 Nov 2007 21:29:17 -0800 Subject: [IPv6] SNMP: Restore Udp6InErrors incrementation As the checksum verification is postponed till user calls recv or poll, the inrementation of Udp6InErrors counter should be also postponed. Currently, it is postponed in non-blocking operation case. However it should be postponed in all case like the IPv4 code. Signed-off-by: Mitsuru Chinen Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index caebad6ee510..8344d8c87219 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -205,12 +205,11 @@ out: return err; csum_copy_err: + UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); skb_kill_datagram(sk, skb, flags); - if (flags & MSG_DONTWAIT) { - UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + if (flags & MSG_DONTWAIT) return -EAGAIN; - } goto try_again; } -- cgit v1.2.2 From 4e058063f49f53f6d75f707e36c82edee6d2e919 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Nov 2007 21:30:11 -0800 Subject: [DECNET]: "addr" module param can't be __initdata sysfs keeps references to module parameters via /sys/module/*/parameters, so marking them as __initdata can't work. Steps to reproduce: modprobe decnet cat /sys/module/decnet/parameters/addr BUG: unable to handle kernel paging request at virtual address f88cd410 printing eip: c043dfd1 *pdpt = 0000000000004001 *pde = 0000000004408067 *pte = 0000000000000000 Oops: 0000 [#1] PREEMPT SMP Modules linked in: decnet sunrpc af_packet ipv6 binfmt_misc dm_mirror dm_multipath dm_mod sbs sbshc fan dock battery backlight ac power_supply parport loop rtc_cmos serio_raw rtc_core rtc_lib button amd_rng sr_mod cdrom shpchp pci_hotplug ehci_hcd ohci_hcd uhci_hcd usbcore Pid: 2099, comm: cat Not tainted (2.6.24-rc1-b1d08ac064268d0ae2281e98bf5e82627e0f0c56-bloat #6) EIP: 0060:[] EFLAGS: 00210286 CPU: 1 EIP is at param_get_int+0x6/0x20 EAX: c5c87000 EBX: 00000000 ECX: 000080d0 EDX: f88cd410 ESI: f8a108f8 EDI: c5c87000 EBP: 00000000 ESP: c5c97f00 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process cat (pid: 2099, ti=c5c97000 task=c641ee10 task.ti=c5c97000) Stack: 00000000 f8a108f8 c5c87000 c043db6b f8a108f1 00000124 c043de1a c043db2f f88cd410 ffffffff c5c87000 f8a16bc8 f8a16bc8 c043dd69 c043dd54 c5dd5078 c043dbc8 c5cc7580 c06ee64c c5d679f8 c04c431f c641f480 c641f484 00001000 Call Trace: [] param_array_get+0x3c/0x62 [] param_array_set+0x0/0xdf [] param_array_get+0x0/0x62 [] param_attr_show+0x15/0x2d [] param_attr_show+0x0/0x2d [] module_attr_show+0x1a/0x1e [] sysfs_read_file+0x7c/0xd9 [] sysfs_read_file+0x0/0xd9 [] vfs_read+0x88/0x134 [] do_page_fault+0x0/0x7d5 [] sys_read+0x41/0x67 [] sysenter_past_esp+0x6b/0xc1 ======================= Code: 00 83 c4 0c c3 83 ec 0c 8b 52 10 8b 12 c7 44 24 04 27 dd 6c c0 89 04 24 89 54 24 08 e8 ea 01 0c 00 83 c4 0c c3 83 ec 0c 8b 52 10 <8b> 12 c7 44 24 04 58 8c 6a c0 89 04 24 89 54 24 08 e8 ca 01 0c EIP: [] param_get_int+0x6/0x20 SS:ESP 0068:c5c97f00 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/decnet/dn_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 26130afd8029..66e266fb5908 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1439,7 +1439,7 @@ static const struct file_operations dn_dev_seq_fops = { #endif /* CONFIG_PROC_FS */ -static int __initdata addr[2]; +static int addr[2]; module_param_array(addr, int, NULL, 0444); MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node"); -- cgit v1.2.2 From 6a9fb9479f2672fa392711735de9e642395c9a14 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 5 Nov 2007 21:32:31 -0800 Subject: [IPV4]: Clean the ip_sockglue.c from some ugly ifdefs The #idfed CONFIG_IP_MROUTE is sometimes places inside the if-s, which looks completely bad. Similar ifdefs inside the functions looks a bit better, but they are also not recommended to be used. Provide an ifdef-ed ip_mroute_opt() helper to cleanup the code. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f51f20e487c8..82817e554363 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -437,10 +437,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, /* If optlen==0, it is equivalent to val == 0 */ -#ifdef CONFIG_IP_MROUTE - if (optname >= MRT_BASE && optname <= (MRT_BASE + 10)) + if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk,optname,optval,optlen); -#endif err = 0; lock_sock(sk); @@ -909,11 +907,9 @@ int ip_setsockopt(struct sock *sk, int level, #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && - optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY -#ifdef CONFIG_IP_MROUTE - && (optname < MRT_BASE || optname > (MRT_BASE + 10)) -#endif - ) { + optname != IP_IPSEC_POLICY && + optname != IP_XFRM_POLICY && + !ip_mroute_opt(optname)) { lock_sock(sk); err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); release_sock(sk); @@ -935,11 +931,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && - optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY -#ifdef CONFIG_IP_MROUTE - && (optname < MRT_BASE || optname > (MRT_BASE + 10)) -#endif - ) { + optname != IP_IPSEC_POLICY && + optname != IP_XFRM_POLICY && + !ip_mroute_opt(optname)) { lock_sock(sk); err = compat_nf_setsockopt(sk, PF_INET, optname, optval, optlen); @@ -967,11 +961,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, if (level != SOL_IP) return -EOPNOTSUPP; -#ifdef CONFIG_IP_MROUTE - if (optname >= MRT_BASE && optname <= MRT_BASE+10) { + if (ip_mroute_opt(optname)) return ip_mroute_getsockopt(sk,optname,optval,optlen); - } -#endif if (get_user(len,optlen)) return -EFAULT; @@ -1171,11 +1162,8 @@ int ip_getsockopt(struct sock *sk, int level, err = do_ip_getsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS -#ifdef CONFIG_IP_MROUTE - && (optname < MRT_BASE || optname > MRT_BASE+10) -#endif - ) { + if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && + !ip_mroute_opt(optname)) { int len; if (get_user(len,optlen)) @@ -1200,11 +1188,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, int err = do_ip_getsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS -#ifdef CONFIG_IP_MROUTE - && (optname < MRT_BASE || optname > MRT_BASE+10) -#endif - ) { + if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && + !ip_mroute_opt(optname)) { int len; if (get_user(len, optlen)) -- cgit v1.2.2 From 286ab3d46058840d68e5d7d52e316c1f7e98c59f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Nov 2007 23:38:39 -0800 Subject: [NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way. "struct proto" currently uses an array stats[NR_CPUS] to track change on 'inuse' sockets per protocol. If NR_CPUS is big, this means we use a big memory area for this. Moreover, all this memory area is located on a single node on NUMA machines, increasing memory pressure on the boot node. In this patch, I tried to : - Keep a fast !CONFIG_SMP implementation - Keep a fast CONFIG_SMP implementation for often used protocols (tcp,udp,raw,...) - Introduce a NUMA efficient implementation Some helper macros are defined in include/net/sock.h These macros take into account CONFIG_SMP If a "struct proto" is declared without using DEFINE_PROTO_INUSE / REF_PROTO_INUSE macros, it will automatically use a default implementation, using a dynamically allocated percpu zone. This default implementation will be NUMA efficient, but might use 32/64 bytes per possible cpu because of current alloc_percpu() implementation. However it still should be better than previous implementation based on stats[NR_CPUS] field. When a "struct proto" is changed to use the new macros, we use a single static "int" percpu variable, lowering the memory and cpu costs, still preserving NUMA efficiency. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/proc.c | 19 ++++--------------- net/ipv6/proc.c | 19 ++++--------------- 3 files changed, 55 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 12ad2067a988..e077f263b730 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1801,12 +1801,41 @@ EXPORT_SYMBOL(sk_common_release); static DEFINE_RWLOCK(proto_list_lock); static LIST_HEAD(proto_list); +#ifdef CONFIG_SMP +/* + * Define default functions to keep track of inuse sockets per protocol + * Note that often used protocols use dedicated functions to get a speed increase. + * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE) + */ +static void inuse_add(struct proto *prot, int inc) +{ + per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc; +} + +static int inuse_get(const struct proto *prot) +{ + int res = 0, cpu; + for_each_possible_cpu(cpu) + res += per_cpu_ptr(prot->inuse_ptr, cpu)[0]; + return res; +} +#endif + int proto_register(struct proto *prot, int alloc_slab) { char *request_sock_slab_name = NULL; char *timewait_sock_slab_name; int rc = -ENOBUFS; +#ifdef CONFIG_SMP + if (!prot->inuse_getval || !prot->inuse_add) { + prot->inuse_ptr = alloc_percpu(int); + if (prot->inuse_ptr == NULL) + goto out; + prot->inuse_getval = inuse_get; + prot->inuse_add = inuse_add; + } +#endif if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -1814,7 +1843,7 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", prot->name); - goto out; + goto out_free_inuse; } if (prot->rsk_prot != NULL) { @@ -1873,6 +1902,15 @@ out_free_request_sock_slab_name: out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; +out_free_inuse: +#ifdef CONFIG_SMP + if (prot->inuse_ptr != NULL) { + free_percpu(prot->inuse_ptr); + prot->inuse_ptr = NULL; + prot->inuse_getval = NULL; + prot->inuse_add = NULL; + } +#endif goto out; } @@ -1884,6 +1922,14 @@ void proto_unregister(struct proto *prot) list_del(&prot->node); write_unlock(&proto_list_lock); +#ifdef CONFIG_SMP + if (prot->inuse_ptr != NULL) { + free_percpu(prot->inuse_ptr); + prot->inuse_ptr = NULL; + prot->inuse_getval = NULL; + prot->inuse_add = NULL; + } +#endif if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); prot->slab = NULL; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ffdccc0972e0..ce34b281803f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,17 +46,6 @@ #include #include -static int fold_prot_inuse(struct proto *proto) -{ - int res = 0; - int cpu; - - for_each_possible_cpu(cpu) - res += proto->stats[cpu].inuse; - - return res; -} - /* * Report socket allocation statistics [mea@utu.fi] */ @@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) { socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", - fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), + sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); - seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); - seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); - seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); + seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot)); + seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot)); + seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues(), ip_frag_mem()); return 0; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index be526ad92543..8631ed7fe8a9 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -32,27 +32,16 @@ static struct proc_dir_entry *proc_net_devsnmp6; -static int fold_prot_inuse(struct proto *proto) -{ - int res = 0; - int cpu; - - for_each_possible_cpu(cpu) - res += proto->stats[cpu].inuse; - - return res; -} - static int sockstat6_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "TCP6: inuse %d\n", - fold_prot_inuse(&tcpv6_prot)); + sock_prot_inuse(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", - fold_prot_inuse(&udpv6_prot)); + sock_prot_inuse(&udpv6_prot)); seq_printf(seq, "UDPLITE6: inuse %d\n", - fold_prot_inuse(&udplitev6_prot)); + sock_prot_inuse(&udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", - fold_prot_inuse(&rawv6_prot)); + sock_prot_inuse(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", ip6_frag_nqueues(), ip6_frag_mem()); return 0; -- cgit v1.2.2 From 47a31a6ffcca3b55149bccd5b99763e5eea60ac4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Nov 2007 23:39:16 -0800 Subject: [IPV4]: Use the {DEFINE|REF}_PROTO_INUSE infrastructure Trivial patch to make "tcp,udp,udplite,raw" protocols uses the fast "inuse sockets" infrastructure Each protocol use then a static percpu var, instead of a dynamic one. This saves some ram and some cpu cycles Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 3 +++ net/ipv4/tcp_ipv4.c | 3 +++ net/ipv4/udp.c | 3 +++ net/ipv4/udplite.c | 3 +++ 4 files changed, 12 insertions(+) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3916faca3afe..66b42f547bf9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -760,6 +760,8 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) } } +DEFINE_PROTO_INUSE(raw) + struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, @@ -781,6 +783,7 @@ struct proto raw_prot = { .compat_setsockopt = compat_raw_setsockopt, .compat_getsockopt = compat_raw_getsockopt, #endif + REF_PROTO_INUSE(raw) }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d438dfb0c8f3..e9127cdced20 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2417,6 +2417,8 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +DEFINE_PROTO_INUSE(tcp) + struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, @@ -2451,6 +2453,7 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif + REF_PROTO_INUSE(tcp) }; void __init tcp_v4_init(struct net_proto_family *ops) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4bc25b46f33f..03c400ca14c5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1430,6 +1430,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) } +DEFINE_PROTO_INUSE(udp) + struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, @@ -1452,6 +1454,7 @@ struct proto udp_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + REF_PROTO_INUSE(udp) }; /* ------------------------------------------------------------------------ */ diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 94977205abb4..f5baeb3e8b85 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -44,6 +44,8 @@ static struct net_protocol udplite_protocol = { .no_policy = 1, }; +DEFINE_PROTO_INUSE(udplite) + struct proto udplite_prot = { .name = "UDP-Lite", .owner = THIS_MODULE, @@ -67,6 +69,7 @@ struct proto udplite_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + REF_PROTO_INUSE(udplite) }; static struct inet_protosw udplite4_protosw = { -- cgit v1.2.2 From c5a432f1a18b4b2efe691dd6bbb30d86a281f783 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Nov 2007 23:39:51 -0800 Subject: [IPV6]: Use the {DEFINE|REF}_PROTO_INUSE infrastructure Trivial patch to make "tcpv6,udpv6,udplitev6,rawv6" protocols uses the fast "inuse sockets" infrastructure Each protocol use then a static percpu var, instead of a dynamic one. This saves some ram and some cpu cycles Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/raw.c | 3 +++ net/ipv6/tcp_ipv6.c | 3 +++ net/ipv6/udp.c | 3 +++ net/ipv6/udplite.c | 3 +++ 4 files changed, 12 insertions(+) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ca24ef19cd8f..807260d03586 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1144,6 +1144,8 @@ static int rawv6_init_sk(struct sock *sk) return(0); } +DEFINE_PROTO_INUSE(rawv6) + struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, @@ -1166,6 +1168,7 @@ struct proto rawv6_prot = { .compat_setsockopt = compat_rawv6_setsockopt, .compat_getsockopt = compat_rawv6_getsockopt, #endif + REF_PROTO_INUSE(rawv6) }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 06be2a1f2730..3aad861975a0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2107,6 +2107,8 @@ void tcp6_proc_exit(void) } #endif +DEFINE_PROTO_INUSE(tcpv6) + struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, @@ -2141,6 +2143,7 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif + REF_PROTO_INUSE(tcpv6) }; static struct inet6_protocol tcpv6_protocol = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8344d8c87219..ee1cc3f8599f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -970,6 +970,8 @@ void udp6_proc_exit(void) { /* ------------------------------------------------------------------------ */ +DEFINE_PROTO_INUSE(udpv6) + struct proto udpv6_prot = { .name = "UDPv6", .owner = THIS_MODULE, @@ -991,6 +993,7 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + REF_PROTO_INUSE(udpv6) }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 766566f7de47..5a0379f71415 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -40,6 +40,8 @@ static int udplite_v6_get_port(struct sock *sk, unsigned short snum) return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); } +DEFINE_PROTO_INUSE(udplitev6) + struct proto udplitev6_prot = { .name = "UDPLITEv6", .owner = THIS_MODULE, @@ -62,6 +64,7 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + REF_PROTO_INUSE(udplitev6) }; static struct inet_protosw udplite6_protosw = { -- cgit v1.2.2 From 8295b6d9e623879344ed0ca7565336e4fd698e42 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Nov 2007 23:40:28 -0800 Subject: [SCTP]: Use the {DEFINE|REF}_PROTO_INUSE infrastructure Trivial patch to make "sctcp,sctpv6" protocols uses the fast "inuse sockets" infrastructure Each protocol use then a static percpu var, instead of a dynamic one. This saves some ram and some cpu cycles Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index bd6f42a15a4b..a7ecf3159e53 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6455,6 +6455,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, } +DEFINE_PROTO_INUSE(sctp) + /* This proto struct describes the ULP interface for SCTP. */ struct proto sctp_prot = { .name = "SCTP", @@ -6483,9 +6485,12 @@ struct proto sctp_prot = { .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, .memory_allocated = &sctp_memory_allocated, + REF_PROTO_INUSE(sctp) }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +DEFINE_PROTO_INUSE(sctpv6) + struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, @@ -6513,5 +6518,6 @@ struct proto sctpv6_prot = { .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, .memory_allocated = &sctp_memory_allocated, + REF_PROTO_INUSE(sctpv6) }; #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -- cgit v1.2.2 From c62cf5cb173a5b8446e513a14448460cad435db2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 5 Nov 2007 23:42:25 -0800 Subject: [DCCP]: Use DEFINE_PROTO_INUSE infrastructure. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 3 +++ net/dccp/ipv6.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 01a6a808bdb7..db17b83e8d3e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -922,6 +922,8 @@ static struct timewait_sock_ops dccp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct inet_timewait_sock), }; +DEFINE_PROTO_INUSE(dccp_v4) + static struct proto dccp_v4_prot = { .name = "DCCP", .owner = THIS_MODULE, @@ -950,6 +952,7 @@ static struct proto dccp_v4_prot = { .compat_setsockopt = compat_dccp_setsockopt, .compat_getsockopt = compat_dccp_getsockopt, #endif + REF_PROTO_INUSE(dccp_v4) }; static struct net_protocol dccp_v4_protocol = { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 62428ff137dd..87c98fb86fa8 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1107,6 +1107,8 @@ static struct timewait_sock_ops dccp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct dccp6_timewait_sock), }; +DEFINE_PROTO_INUSE(dccp_v6) + static struct proto dccp_v6_prot = { .name = "DCCPv6", .owner = THIS_MODULE, @@ -1135,6 +1137,7 @@ static struct proto dccp_v6_prot = { .compat_setsockopt = compat_dccp_setsockopt, .compat_getsockopt = compat_dccp_getsockopt, #endif + REF_PROTO_INUSE(dccp_v6) }; static struct inet6_protocol dccp_v6_protocol = { -- cgit v1.2.2 From 4f9f8311a08c0d95c70261264a2b47f2ae99683a Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Tue, 6 Nov 2007 03:08:09 -0800 Subject: [PKT_SCHED]: Fix OOPS when removing devices from a teql queuing discipline tecl_reset() is called from deactivate and qdisc is set to noop already, but subsequent teql_xmit does not know about it and dereference private data as teql qdisc and thus oopses. not catch it first :) Signed-off-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- net/sched/sch_teql.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 421281d9dd1d..c0ed06d4a504 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -252,6 +252,9 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * static inline int teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { + if (dev->qdisc == &noop_qdisc) + return -ENODEV; + if (dev->header_ops == NULL || skb->dst == NULL || skb->dst->neighbour == NULL) -- cgit v1.2.2 From 33120b30cc3b8665204d4fcde7288638b0dd04d5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Nov 2007 05:27:11 -0800 Subject: [IPV6]: Convert /proc/net/ipv6_route to seq_file interface This removes last proc_net_create() user. Kudos to Benjamin Thery and Stephen Hemminger for comments on previous version. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/route.c | 91 ++++++++++++++++++-------------------------------------- 1 file changed, 29 insertions(+), 62 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 95f8e4a62f68..973a97abc446 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -38,12 +38,8 @@ #include #include #include - -#ifdef CONFIG_PROC_FS #include #include -#endif - #include #include #include @@ -2288,71 +2284,50 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { - struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; + struct seq_file *m = p_arg; - if (arg->skip < arg->offset / RT6_INFO_LEN) { - arg->skip++; - return 0; - } - - if (arg->len >= arg->length) - return 0; - - arg->len += sprintf(arg->buffer + arg->len, - NIP6_SEQFMT " %02x ", - NIP6(rt->rt6i_dst.addr), - rt->rt6i_dst.plen); + seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr), + rt->rt6i_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - arg->len += sprintf(arg->buffer + arg->len, - NIP6_SEQFMT " %02x ", - NIP6(rt->rt6i_src.addr), - rt->rt6i_src.plen); + seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr), + rt->rt6i_src.plen); #else - arg->len += sprintf(arg->buffer + arg->len, - "00000000000000000000000000000000 00 "); + seq_puts(m, "00000000000000000000000000000000 00 "); #endif if (rt->rt6i_nexthop) { - arg->len += sprintf(arg->buffer + arg->len, - NIP6_SEQFMT, - NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); + seq_printf(m, NIP6_SEQFMT, + NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); } else { - arg->len += sprintf(arg->buffer + arg->len, - "00000000000000000000000000000000"); + seq_puts(m, "00000000000000000000000000000000"); } - arg->len += sprintf(arg->buffer + arg->len, - " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), - rt->u.dst.__use, rt->rt6i_flags, - rt->rt6i_dev ? rt->rt6i_dev->name : ""); + seq_printf(m, " %08x %08x %08x %08x %8s\n", + rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), + rt->u.dst.__use, rt->rt6i_flags, + rt->rt6i_dev ? rt->rt6i_dev->name : ""); return 0; } -static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) +static int ipv6_route_show(struct seq_file *m, void *v) { - struct rt6_proc_arg arg = { - .buffer = buffer, - .offset = offset, - .length = length, - }; - - fib6_clean_all(rt6_info_route, 0, &arg); - - *start = buffer; - if (offset) - *start += offset % RT6_INFO_LEN; - - arg.len -= offset % RT6_INFO_LEN; - - if (arg.len > length) - arg.len = length; - if (arg.len < 0) - arg.len = 0; + fib6_clean_all(rt6_info_route, 0, m); + return 0; +} - return arg.len; +static int ipv6_route_open(struct inode *inode, struct file *file) +{ + return single_open(file, ipv6_route_show, NULL); } +static const struct file_operations ipv6_route_proc_fops = { + .owner = THIS_MODULE, + .open = ipv6_route_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int rt6_stats_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", @@ -2489,22 +2464,14 @@ ctl_table ipv6_route_table[] = { void __init ip6_route_init(void) { -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *p; -#endif ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; fib6_init(); -#ifdef CONFIG_PROC_FS - p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info); - if (p) - p->owner = THIS_MODULE; - + proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops); proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); -#endif #ifdef CONFIG_XFRM xfrm6_init(); #endif -- cgit v1.2.2 From c3e9a353d8fc64a82ab11a07e21902e25e1e96d1 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 6 Nov 2007 23:34:04 -0800 Subject: [IPV4]: Compact some ifdefs in the fib code. There are places that check for CONFIG_IP_MULTIPLE_TABLES twice in the same file, but the internals of these #ifdefs can be merged. As a side effect - remove one ifdef from inside a function. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 60123905dbbf..732d8f088b13 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -59,6 +59,13 @@ struct fib_table *ip_fib_main_table; #define FIB_TABLE_HASHSZ 1 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; +static void __init fib4_rules_init(void) +{ + ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); + ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); +} #else #define FIB_TABLE_HASHSZ 256 @@ -905,14 +912,8 @@ void __init ip_fib_init(void) for (i = 0; i < FIB_TABLE_HASHSZ; i++) INIT_HLIST_HEAD(&fib_table_hash[i]); -#ifndef CONFIG_IP_MULTIPLE_TABLES - ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); - hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); - ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); - hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); -#else + fib4_rules_init(); -#endif register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); -- cgit v1.2.2 From 40208d71e0c6b5f912b185e637272b6481fcef3f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 7 Nov 2007 00:49:04 -0800 Subject: [NET]: Removing duplicit #includes Removing duplicit #includes for net/ Signed-off-by: Jiri Olsa Signed-off-by: David S. Miller --- net/core/dst.c | 1 - net/ieee80211/ieee80211_crypt_tkip.c | 1 - net/ieee80211/ieee80211_crypt_wep.c | 1 - 3 files changed, 3 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 16958e64e577..03daead3592a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -18,7 +18,6 @@ #include #include -#include #include /* diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 4cce3534e408..58b22619ab15 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -25,7 +25,6 @@ #include #include -#include #include MODULE_AUTHOR("Jouni Malinen"); diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 866fc04c44f9..3fa30c40779f 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -22,7 +22,6 @@ #include #include -#include #include MODULE_AUTHOR("Jouni Malinen"); -- cgit v1.2.2 From 543821c6f5dea5221426eaf1eac98b100249c7ac Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Wed, 7 Nov 2007 01:20:12 -0800 Subject: [PKT_SCHED] CLS_U32: Fix endianness problem with u32 classifier hash masks. While trying to implement u32 hashes in my shaping machine I ran into a possible bug in the u32 hash/bucket computing algorithm (net/sched/cls_u32.c). The problem occurs only with hash masks that extend over the octet boundary, on little endian machines (where htonl() actually does something). Let's say that I would like to use 0x3fc0 as the hash mask. This means 8 contiguous "1" bits starting at b6. With such a mask, the expected (and logical) behavior is to hash any address in, for instance, 192.168.0.0/26 in bucket 0, then any address in 192.168.0.64/26 in bucket 1, then 192.168.0.128/26 in bucket 2 and so on. This is exactly what would happen on a big endian machine, but on little endian machines, what would actually happen with current implementation is 0x3fc0 being reversed (into 0xc03f0000) by htonl() in the userspace tool and then applied to 192.168.x.x in the u32 classifier. When shifting right by 16 bits (rank of first "1" bit in the reversed mask) and applying the divisor mask (0xff for divisor 256), what would actually remain is 0x3f applied on the "168" octet of the address. One could say is this can be easily worked around by taking endianness into account in userspace and supplying an appropriate mask (0xfc03) that would be turned into contiguous "1" bits when reversed (0x03fc0000). But the actual problem is the network address (inside the packet) not being converted to host order, but used as a host-order value when computing the bucket. Let's say the network address is written as n31 n30 ... n0, with n0 being the least significant bit. When used directly (without any conversion) on a little endian machine, it becomes n7 ... n0 n8 ..n15 etc in the machine's registers. Thus bits n7 and n8 would no longer be adjacent and 192.168.64.0/26 and 192.168.128.0/26 would no longer be consecutive. The fix is to apply ntohl() on the hmask before computing fshift, and in u32_hash_fold() convert the packet data to host order before shifting down by fshift. With helpful feedback from Jamal Hadi Salim and Jarek Poplawski. Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 9e98c6e567dd..53171029439f 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -91,7 +91,7 @@ static struct tc_u_common *u32_list; static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel, u8 fshift) { - unsigned h = (key & sel->hmask)>>fshift; + unsigned h = ntohl(key & sel->hmask)>>fshift; return h; } @@ -615,7 +615,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, n->handle = handle; { u8 i = 0; - u32 mask = s->hmask; + u32 mask = ntohl(s->hmask); if (mask) { while (!(mask & 1)) { i++; -- cgit v1.2.2 From 45a19b0a725a04f3255d9d3da1fca30bb97f1481 Mon Sep 17 00:00:00 2001 From: Johann Felix Soden Date: Wed, 7 Nov 2007 01:30:30 -0800 Subject: [NETNS]: Fix compiler error in net_namespace.c Because net_free is called by copy_net_ns before its declaration, the compiler gives an error. This patch puts net_free before copy_net_ns to fix this. The compiler error: net/core/net_namespace.c: In function 'copy_net_ns': net/core/net_namespace.c:97: error: implicit declaration of function 'net_free' net/core/net_namespace.c: At top level: net/core/net_namespace.c:104: warning: conflicting types for 'net_free' net/core/net_namespace.c:104: error: static declaration of 'net_free' follows non-static declaration net/core/net_namespace.c:97: error: previous implicit declaration of 'net_free' was here The error was introduced by the '[NET]: Hide the dead code in the net_namespace.c' patch (6a1a3b9f686bb04820a232cc1657ef2c45670709). Signed-off-by: Johann Felix Soden Signed-off-by: David S. Miller --- net/core/net_namespace.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index e9f0964ce70b..3f6d37deac45 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -64,6 +64,20 @@ static struct net *net_alloc(void) return kmem_cache_zalloc(net_cachep, GFP_KERNEL); } +static void net_free(struct net *net) +{ + if (!net) + return; + + if (unlikely(atomic_read(&net->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } + + kmem_cache_free(net_cachep, net); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *new_net = NULL; @@ -100,20 +114,6 @@ out: return new_net; } -static void net_free(struct net *net) -{ - if (!net) - return; - - if (unlikely(atomic_read(&net->use_count) != 0)) { - printk(KERN_EMERG "network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } - - kmem_cache_free(net_cachep, net); -} - static void cleanup_net(struct work_struct *work) { struct pernet_operations *ops; -- cgit v1.2.2 From fffe470a803e7f7b74c016291e542a0162761209 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 7 Nov 2007 01:31:32 -0800 Subject: [VLAN]: Fix SET_VLAN_INGRESS_PRIORITY_CMD ioctl Based on report and patch by Doug Kehn : vconfig returns the following error when attempting to execute the set_ingress_map command: vconfig: socket or ioctl error for set_ingress_map: Operation not permitted In vlan.c, vlan_ioctl_handler for SET_VLAN_INGRESS_PRIORITY_CMD sets err = -EPERM and calls vlan_dev_set_ingress_priority. vlan_dev_set_ingress_priority is a void function so err remains at -EPERM and results in the vconfig error (even though the ingress map was set). Fix by setting err = 0 after the vlan_dev_set_ingress_priority call. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3fe4fc86055f..1037748c14db 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -747,6 +747,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) vlan_dev_set_ingress_priority(dev, args.u.skb_priority, args.vlan_qos); + err = 0; break; case SET_VLAN_EGRESS_PRIORITY_CMD: -- cgit v1.2.2 From 4999f3621f4da622e77931b3d33ada6c7083c705 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Nov 2007 02:21:47 -0800 Subject: [IPSEC]: Fix crypto_alloc_comp error checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function crypto_alloc_comp returns an errno instead of NULL to indicate error. So it needs to be tested with IS_ERR. This is based on a patch by Vicenç Beltran Querol. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ipcomp.c | 3 ++- net/ipv6/ipcomp6.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index ca1b5fdb8d31..2c44a94c2135 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -344,7 +345,7 @@ static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) for_each_possible_cpu(cpu) { struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, CRYPTO_ALG_ASYNC); - if (!tfm) + if (IS_ERR(tfm)) goto error; *per_cpu_ptr(tfms, cpu) = tfm; } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 85eb4798d8d2..0cd4056f9127 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -358,7 +359,7 @@ static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) for_each_possible_cpu(cpu) { struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, CRYPTO_ALG_ASYNC); - if (!tfm) + if (IS_ERR(tfm)) goto error; *per_cpu_ptr(tfms, cpu) = tfm; } -- cgit v1.2.2 From b733c007edad6f3e05109951bacc6f87dd807917 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 7 Nov 2007 02:23:38 -0800 Subject: [NET]: Clean proto_(un)register from in-code ifdefs The struct proto has the per-cpu "inuse" counter, which is handled with a special care. All the handling code hides under the ifdef CONFIG_SMP and it introduces some code duplication and makes it look worse than it could. Clean this. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 67 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index e077f263b730..8fc2f84209e4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1819,23 +1819,48 @@ static int inuse_get(const struct proto *prot) res += per_cpu_ptr(prot->inuse_ptr, cpu)[0]; return res; } -#endif -int proto_register(struct proto *prot, int alloc_slab) +static int inuse_init(struct proto *prot) { - char *request_sock_slab_name = NULL; - char *timewait_sock_slab_name; - int rc = -ENOBUFS; - -#ifdef CONFIG_SMP if (!prot->inuse_getval || !prot->inuse_add) { prot->inuse_ptr = alloc_percpu(int); if (prot->inuse_ptr == NULL) - goto out; + return -ENOBUFS; + prot->inuse_getval = inuse_get; prot->inuse_add = inuse_add; } + return 0; +} + +static void inuse_fini(struct proto *prot) +{ + if (prot->inuse_ptr != NULL) { + free_percpu(prot->inuse_ptr); + prot->inuse_ptr = NULL; + prot->inuse_getval = NULL; + prot->inuse_add = NULL; + } +} +#else +static inline int inuse_init(struct proto *prot) +{ + return 0; +} + +static inline void inuse_fini(struct proto *prot) +{ +} #endif + +int proto_register(struct proto *prot, int alloc_slab) +{ + char *request_sock_slab_name = NULL; + char *timewait_sock_slab_name; + + if (inuse_init(prot)) + goto out; + if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -1887,9 +1912,8 @@ int proto_register(struct proto *prot, int alloc_slab) write_lock(&proto_list_lock); list_add(&prot->node, &proto_list); write_unlock(&proto_list_lock); - rc = 0; -out: - return rc; + return 0; + out_free_timewait_sock_slab_name: kfree(timewait_sock_slab_name); out_free_request_sock_slab: @@ -1903,15 +1927,9 @@ out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; out_free_inuse: -#ifdef CONFIG_SMP - if (prot->inuse_ptr != NULL) { - free_percpu(prot->inuse_ptr); - prot->inuse_ptr = NULL; - prot->inuse_getval = NULL; - prot->inuse_add = NULL; - } -#endif - goto out; + inuse_fini(prot); +out: + return -ENOBUFS; } EXPORT_SYMBOL(proto_register); @@ -1922,14 +1940,7 @@ void proto_unregister(struct proto *prot) list_del(&prot->node); write_unlock(&proto_list_lock); -#ifdef CONFIG_SMP - if (prot->inuse_ptr != NULL) { - free_percpu(prot->inuse_ptr); - prot->inuse_ptr = NULL; - prot->inuse_getval = NULL; - prot->inuse_add = NULL; - } -#endif + inuse_fini(prot); if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); prot->slab = NULL; -- cgit v1.2.2 From 1e356f9cdfa885c78791d5d6e5d2baef22f01853 Mon Sep 17 00:00:00 2001 From: "Rumen G. Bogdanovski" Date: Wed, 7 Nov 2007 02:35:54 -0800 Subject: [IPVS]: Bind connections on stanby if the destination exists This patch fixes the problem with node overload on director fail-over. Given the scenario: 2 nodes each accepting 3 connections at a time and 2 directors, director failover occurs when the nodes are fully loaded (6 connections to the cluster) in this case the new director will assign another 6 connections to the cluster, If the same real servers exist there. The problem turned to be in not binding the inherited connections to the real servers (destinations) on the backup director. Therefore: "ipvsadm -l" reports 0 connections: root@test2:~# ipvsadm -l IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP test2.local:5999 wlc -> node473.local:5999 Route 1000 0 0 -> node484.local:5999 Route 1000 0 0 while "ipvs -lnc" is right root@test2:~# ipvsadm -lnc IPVS connection entries pro expire state source virtual destination TCP 14:56 ESTABLISHED 192.168.0.10:39164 192.168.0.222:5999 192.168.0.51:5999 TCP 14:59 ESTABLISHED 192.168.0.10:39165 192.168.0.222:5999 192.168.0.52:5999 So the patch I am sending fixes the problem by binding the received connections to the appropriate service on the backup director, if it exists, else the connection will be handled the old way. So if the master and the backup directors are synchronized in terms of real services there will be no problem with server over-committing since new connections will not be created on the nonexistent real services on the backup. However if the service is created later on the backup, the binding will be performed when the next connection update is received. With this patch the inherited connections will show as inactive on the backup: root@test2:~# ipvsadm -l IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP test2.local:5999 wlc -> node473.local:5999 Route 1000 0 1 -> node484.local:5999 Route 1000 0 1 rumen@test2:~$ cat /proc/net/ip_vs IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP C0A800DE:176F wlc -> C0A80033:176F Route 1000 0 1 -> C0A80032:176F Route 1000 0 1 Regards, Rumen Bogdanovski Acked-by: Julian Anastasov Signed-off-by: Rumen G. Bogdanovski Signed-off-by: Simon Horman --- net/ipv4/ipvs/ip_vs_conn.c | 19 +++++++++++++++++++ net/ipv4/ipvs/ip_vs_ctl.c | 26 ++++++++++++++++++++++++++ net/ipv4/ipvs/ip_vs_sync.c | 24 ++++++++++++++++++++---- 3 files changed, 65 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 4b702f708d30..b7eeae622d9b 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -425,6 +425,25 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) } +/* + * Check if there is a destination for the connection, if so + * bind the connection to the destination. + */ +struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) +{ + struct ip_vs_dest *dest; + + if ((cp) && (!cp->dest)) { + dest = ip_vs_find_dest(cp->daddr, cp->dport, + cp->vaddr, cp->vport, cp->protocol); + ip_vs_bind_dest(cp, dest); + return dest; + } else + return NULL; +} +EXPORT_SYMBOL(ip_vs_try_bind_dest); + + /* * Unbind a connection entry with its VS destination * Called by the ip_vs_conn_expire function. diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 7345fc252a23..3c4d22a468ec 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -579,6 +579,32 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) return NULL; } +/* + * Find destination by {daddr,dport,vaddr,protocol} + * Cretaed to be used in ip_vs_process_message() in + * the backup synchronization daemon. It finds the + * destination to be bound to the received connection + * on the backup. + * + * ip_vs_lookup_real_service() looked promissing, but + * seems not working as expected. + */ +struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, + __be32 vaddr, __be16 vport, __u16 protocol) +{ + struct ip_vs_dest *dest; + struct ip_vs_service *svc; + + svc = ip_vs_service_get(0, protocol, vaddr, vport); + if (!svc) + return NULL; + dest = ip_vs_lookup_dest(svc, daddr, dport); + if (dest) + atomic_inc(&dest->refcnt); + ip_vs_service_put(svc); + return dest; +} +EXPORT_SYMBOL(ip_vs_find_dest); /* * Lookup dest by {svc,addr,port} in the destination trash. diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 0d4d9721cbd4..b1694d67abb9 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -284,6 +284,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) struct ip_vs_sync_conn_options *opt; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_dest *dest; char *p; int i; @@ -317,20 +318,35 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) s->caddr, s->cport, s->vaddr, s->vport); if (!cp) { + /* + * Find the appropriate destination for the connection. + * If it is not found the connection will remain unbound + * but still handled. + */ + dest = ip_vs_find_dest(s->daddr, s->dport, + s->vaddr, s->vport, + s->protocol); cp = ip_vs_conn_new(s->protocol, s->caddr, s->cport, s->vaddr, s->vport, s->daddr, s->dport, - flags, NULL); + flags, dest); + if (dest) + atomic_dec(&dest->refcnt); if (!cp) { IP_VS_ERR("ip_vs_conn_new failed\n"); return; } cp->state = ntohs(s->state); } else if (!cp->dest) { - /* it is an entry created by the synchronization */ - cp->state = ntohs(s->state); - cp->flags = flags | IP_VS_CONN_F_HASHED; + dest = ip_vs_try_bind_dest(cp); + if (!dest) { + /* it is an unbound entry created by + * synchronization */ + cp->state = ntohs(s->state); + cp->flags = flags | IP_VS_CONN_F_HASHED; + } else + atomic_dec(&dest->refcnt); } /* Note that we don't touch its state and flags if it is a normal entry. */ -- cgit v1.2.2 From efac52762b1e3fe3035d29e82d8ee1aebc45e4a7 Mon Sep 17 00:00:00 2001 From: "Rumen G. Bogdanovski" Date: Wed, 7 Nov 2007 02:36:55 -0800 Subject: [IPVS]: Synchronize closing of Connections This patch makes the master daemon to sync the connection when it is about to close. This makes the connections on the backup to close or timeout according their state. Before the sync was performed only if the connection is in ESTABLISHED state which always made the connections to timeout in the hard coded 3 minutes. However the Andy Gospodarek's patch ([IPVS]: use proper timeout instead of fixed value) effectively did nothing more than increasing this to 15 minutes (Established state timeout). So this patch makes use of proper timeout since it syncs the connections on status changes to FIN_WAIT (2min timeout) and CLOSE (10sec timeout). However if the backup misses CLOSE hopefully it did not miss FIN_WAIT. Otherwise we will just have to wait for the ESTABLISHED state timeout. As it is without this patch. This way the number of the hanging connections on the backup is kept to minimum. And very few of them will be left to timeout with a long timeout. This is important if we want to make use of the fix for the real server overcommit on master/backup fail-over. Signed-off-by: Rumen G. Bogdanovski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/ipvs/ip_vs_core.c | 20 ++++++++++++++------ net/ipv4/ipvs/ip_vs_sync.c | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index c6ed7654e839..20c884a57721 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -979,15 +979,23 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, ret = NF_ACCEPT; } - /* increase its packet counter and check if it is needed - to be synchronized */ + /* Increase its packet counter and check if it is needed + * to be synchronized + * + * Sync connection if it is about to close to + * encorage the standby servers to update the connections timeout + */ atomic_inc(&cp->in_pkts); if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && - (cp->protocol != IPPROTO_TCP || - cp->state == IP_VS_TCP_S_ESTABLISHED) && - (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) + (((cp->protocol != IPPROTO_TCP || + cp->state == IP_VS_TCP_S_ESTABLISHED) && + (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] + == sysctl_ip_vs_sync_threshold[0])) || + ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && + ((cp->state == IP_VS_TCP_S_FIN_WAIT) || + (cp->state == IP_VS_TCP_S_CLOSE))))) ip_vs_sync_conn(cp); + cp->old_state = cp->state; ip_vs_conn_put(cp); return ret; diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index b1694d67abb9..bd930efc18da 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -343,7 +343,6 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) if (!dest) { /* it is an unbound entry created by * synchronization */ - cp->state = ntohs(s->state); cp->flags = flags | IP_VS_CONN_F_HASHED; } else atomic_dec(&dest->refcnt); @@ -358,6 +357,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) p += SIMPLE_CONN_SIZE; atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); + cp->state = ntohs(s->state); pp = ip_vs_proto_get(s->protocol); cp->timeout = pp->timeout_table[cp->state]; ip_vs_conn_put(cp); -- cgit v1.2.2 From 230140cffa7feae90ad50bf259db1fa07674f3a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Nov 2007 02:40:20 -0800 Subject: [INET]: Remove per bucket rwlock in tcp/dccp ehash table. As done two years ago on IP route cache table (commit 22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one lock per hash bucket for the huge TCP/DCCP hash tables. On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for litle performance differences. (we hit a different cache line for the rwlock, but then the bucket cache line have a better sharing factor among cpus, since we dirty it less often). For netstat or ss commands that want a full scan of hash table, we perform fewer memory accesses. Using a 'small' table of hashed rwlocks should be more than enough to provide correct SMP concurrency between different buckets, without using too much memory. Sizing of this table depends on num_possible_cpus() and various CONFIG settings. This patch provides some locking abstraction that may ease a future work using a different model for TCP/DCCP table. Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/proto.c | 9 +++++++-- net/ipv4/inet_diag.c | 9 +++++---- net/ipv4/inet_hashtables.c | 7 ++++--- net/ipv4/inet_timewait_sock.c | 13 +++++++------ net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_ipv4.c | 11 ++++++----- net/ipv6/inet6_hashtables.c | 19 ++++++++++--------- 7 files changed, 41 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index d84973928033..7a3bea9c28c1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1072,11 +1072,13 @@ static int __init dccp_init(void) } for (i = 0; i < dccp_hashinfo.ehash_size; i++) { - rwlock_init(&dccp_hashinfo.ehash[i].lock); INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); } + if (inet_ehash_locks_alloc(&dccp_hashinfo)) + goto out_free_dccp_ehash; + bhash_order = ehash_order; do { @@ -1091,7 +1093,7 @@ static int __init dccp_init(void) if (!dccp_hashinfo.bhash) { DCCP_CRIT("Failed to allocate DCCP bind hash table"); - goto out_free_dccp_ehash; + goto out_free_dccp_locks; } for (i = 0; i < dccp_hashinfo.bhash_size; i++) { @@ -1121,6 +1123,8 @@ out_free_dccp_mib: out_free_dccp_bhash: free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); dccp_hashinfo.bhash = NULL; +out_free_dccp_locks: + inet_ehash_locks_free(&dccp_hashinfo); out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); dccp_hashinfo.ehash = NULL; @@ -1139,6 +1143,7 @@ static void __exit dccp_fini(void) free_pages((unsigned long)dccp_hashinfo.ehash, get_order(dccp_hashinfo.ehash_size * sizeof(struct inet_ehash_bucket))); + inet_ehash_locks_free(&dccp_hashinfo); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_ackvec_exit(); dccp_sysctl_exit(); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index dc429b6b0ba6..b0170732b5e9 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -747,13 +747,14 @@ skip_listen_ht: for (i = s_i; i < hashinfo->ehash_size; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; + rwlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; struct hlist_node *node; if (i > s_i) s_num = 0; - read_lock_bh(&head->lock); + read_lock_bh(lock); num = 0; sk_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); @@ -769,7 +770,7 @@ skip_listen_ht: r->id.idiag_dport) goto next_normal; if (inet_csk_diag_dump(sk, skb, cb) < 0) { - read_unlock_bh(&head->lock); + read_unlock_bh(lock); goto done; } next_normal: @@ -791,14 +792,14 @@ next_normal: r->id.idiag_dport) goto next_dying; if (inet_twsk_diag_dump(tw, skb, cb) < 0) { - read_unlock_bh(&head->lock); + read_unlock_bh(lock); goto done; } next_dying: ++num; } } - read_unlock_bh(&head->lock); + read_unlock_bh(lock); } done: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 16eecc7046a3..67704da04fc4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -204,12 +204,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); - write_lock(&head->lock); + write_lock(lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &head->twchain) { @@ -239,7 +240,7 @@ unique: BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); + write_unlock(lock); if (twp) { *twp = tw; @@ -255,7 +256,7 @@ unique: return 0; not_unique: - write_unlock(&head->lock); + write_unlock(lock); return -EADDRNOTAVAIL; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 4e189e28f306..a60b99e0ebdc 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - write_lock(&ehead->lock); + write_lock(lock); if (hlist_unhashed(&tw->tw_node)) { - write_unlock(&ehead->lock); + write_unlock(lock); return; } __hlist_del(&tw->tw_node); sk_node_init(&tw->tw_node); - write_unlock(&ehead->lock); + write_unlock(lock); /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; @@ -59,6 +59,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -71,7 +72,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); - write_lock(&ehead->lock); + write_lock(lock); /* Step 2: Remove SK from established hash. */ if (__sk_del_node_init(sk)) @@ -81,7 +82,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, inet_twsk_add_node(tw, &ehead->twchain); atomic_inc(&tw->tw_refcnt); - write_unlock(&ehead->lock); + write_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c64072bb504b..8e65182f7af1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2456,11 +2456,11 @@ void __init tcp_init(void) thash_entries ? 0 : 512 * 1024); tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; for (i = 0; i < tcp_hashinfo.ehash_size; i++) { - rwlock_init(&tcp_hashinfo.ehash[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); } - + if (inet_ehash_locks_alloc(&tcp_hashinfo)) + panic("TCP: failed to alloc ehash_locks"); tcp_hashinfo.bhash = alloc_large_system_hash("TCP bind", sizeof(struct inet_bind_hashbucket), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9127cdced20..e566f3c67677 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq) struct sock *sk; struct hlist_node *node; struct inet_timewait_sock *tw; + rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); - read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family) { continue; @@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq) rc = tw; goto out; } - read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2094,11 +2095,11 @@ get_tw: cur = tw; goto out; } - read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); st->state = TCP_SEQ_STATE_ESTABLISHED; if (++st->bucket < tcp_hashinfo.ehash_size) { - read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else { cur = NULL; @@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); break; } } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d6f1026f1943..adc73adadfae 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -37,9 +37,8 @@ void __inet6_hash(struct inet_hashinfo *hashinfo, } else { unsigned int hash; sk->sk_hash = hash = inet6_sk_ehashfn(sk); - hash &= (hashinfo->ehash_size - 1); - list = &hashinfo->ehash[hash].chain; - lock = &hashinfo->ehash[hash].lock; + list = &inet_ehash_bucket(hashinfo, hash)->chain; + lock = inet_ehash_lockp(hashinfo, hash); write_lock(lock); } @@ -70,9 +69,10 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, */ unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); prefetch(head->chain.first); - read_lock(&head->lock); + read_lock(lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) @@ -92,12 +92,12 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, goto hit; } } - read_unlock(&head->lock); + read_unlock(lock); return NULL; hit: sock_hold(sk); - read_unlock(&head->lock); + read_unlock(lock); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); @@ -175,12 +175,13 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); - write_lock(&head->lock); + write_lock(lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &head->twchain) { @@ -216,7 +217,7 @@ unique: __sk_add_node(sk, &head->chain); sk->sk_hash = hash; sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); + write_unlock(lock); if (twp != NULL) { *twp = tw; @@ -231,7 +232,7 @@ unique: return 0; not_unique: - write_unlock(&head->lock); + write_unlock(lock); return -EADDRNOTAVAIL; } -- cgit v1.2.2 From c3d8d1e30cace31fed6186a4b8c6b1401836d89c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 7 Nov 2007 02:42:09 -0800 Subject: [NETLINK]: Fix unicast timeouts Commit ed6dcf4a in the history.git tree broke netlink_unicast timeouts by moving the schedule_timeout() call to a new function that doesn't propagate the remaining timeout back to the caller. This means on each retry we start with the full timeout again. ipc/mqueue.c seems to actually want to wait indefinitely so this behaviour is retained. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 260171255576..415c97236f63 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -752,7 +752,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp) * 1: repeat lookup - reference dropped while waiting for socket memory. */ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, - long timeo, struct sock *ssk) + long *timeo, struct sock *ssk) { struct netlink_sock *nlk; @@ -761,7 +761,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || test_bit(0, &nlk->state)) { DECLARE_WAITQUEUE(wait, current); - if (!timeo) { + if (!*timeo) { if (!ssk || netlink_is_kernel(ssk)) netlink_overrun(sk); sock_put(sk); @@ -775,7 +775,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || test_bit(0, &nlk->state)) && !sock_flag(sk, SOCK_DEAD)) - timeo = schedule_timeout(timeo); + *timeo = schedule_timeout(*timeo); __set_current_state(TASK_RUNNING); remove_wait_queue(&nlk->wait, &wait); @@ -783,7 +783,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, if (signal_pending(current)) { kfree_skb(skb); - return sock_intr_errno(timeo); + return sock_intr_errno(*timeo); } return 1; } @@ -877,7 +877,7 @@ retry: if (netlink_is_kernel(sk)) return netlink_unicast_kernel(sk, skb); - err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); + err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk); if (err == 1) goto retry; if (err) -- cgit v1.2.2