From cdac4e07748934e37e415437055ed591aed9eb21 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 13 Jun 2005 15:12:33 -0700 Subject: [SCTP] Add support for ip_nonlocal_bind sysctl & IP_FREEBIND socket option Signed-off-by: Neil Horman Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/ip.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 3f63992eb712..32360bbe143f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -163,6 +163,7 @@ DECLARE_SNMP_STAT(struct linux_mib, net_statistics); extern int sysctl_local_port_range[2]; extern int sysctl_ip_default_ttl; +extern int sysctl_ip_nonlocal_bind; #ifdef CONFIG_INET /* The function in 2.2 was invalid, producing wrong result for -- cgit v1.2.2 From 26b15dad9f1c19d6d4f7b999b07eaa6d98e4b375 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 18 Jun 2005 22:42:13 -0700 Subject: [IPSEC] Add complete xfrm event notification Heres the final patch. What this patch provides - netlink xfrm events - ability to have events generated by netlink propagated to pfkey and vice versa. - fixes the acquire lets-be-happy-with-one-success issue Signed-off-by: Jamal Hadi Salim Signed-off-by: Herbert Xu --- include/net/xfrm.h | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d675836ba6c3..a159655ebede 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -158,6 +158,27 @@ enum { XFRM_STATE_DEAD }; +/* events that could be sent by kernel */ +enum { + XFRM_SAP_INVALID, + XFRM_SAP_EXPIRED, + XFRM_SAP_ADDED, + XFRM_SAP_UPDATED, + XFRM_SAP_DELETED, + XFRM_SAP_FLUSHED, + __XFRM_SAP_MAX +}; +#define XFRM_SAP_MAX (__XFRM_SAP_MAX - 1) + +/* callback structure passed from either netlink or pfkey */ +struct km_event +{ + u32 data; + u32 seq; + u32 pid; + u32 event; +}; + struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { @@ -179,6 +200,8 @@ struct xfrm_policy_afinfo { extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo); extern int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo); +extern void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c); +extern void km_state_notify(struct xfrm_state *x, struct km_event *c); #define XFRM_ACQ_EXPIRES 30 @@ -290,11 +313,11 @@ struct xfrm_mgr { struct list_head list; char *id; - int (*notify)(struct xfrm_state *x, int event); + int (*notify)(struct xfrm_state *x, struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir); struct xfrm_policy *(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); - int (*notify_policy)(struct xfrm_policy *x, int dir, int event); + int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -817,7 +840,7 @@ extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); -extern void xfrm_state_delete(struct xfrm_state *x); +extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); extern int xfrm_replay_check(struct xfrm_state *x, u32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, u32 seq); -- cgit v1.2.2 From 4666faab095230ec8aa62da6c33391287f281154 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2005 22:43:22 -0700 Subject: [IPSEC] Kill spurious hard expire messages This patch ensures that the hard state/policy expire notifications are only sent when the state/policy is successfully removed from their respective tables. As it is, it's possible for a state/policy to both expire through reaching a hard limit, as well as being deleted by the user. Note that this behaviour isn't actually forbidden by RFC 2367. However, it is a quality of implementation issue. As an added bonus, the restructuring in this patch will help eventually in moving the expire notifications from softirq context into process context, thus improving their reliability. One important side-effect from this change is that SAs reaching their hard byte/packet limits are now deleted immediately, just like SAs that have reached their hard time limits. Previously they were announced immediately but only deleted after 30 seconds. This is bad because it prevents the system from issuing an ACQUIRE command until the existing state was deleted by the user or expires after the time is up. In the scenario where the expire notification was lost this introduces a 30 second delay into the system for no good reason. Signed-off-by: Herbert Xu --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a159655ebede..aaa0f5f330e2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -679,7 +679,7 @@ static inline int xfrm_sk_clone_policy(struct sock *sk) return 0; } -extern void xfrm_policy_delete(struct xfrm_policy *pol, int dir); +extern int xfrm_policy_delete(struct xfrm_policy *pol, int dir); static inline void xfrm_sk_free_policy(struct sock *sk) { -- cgit v1.2.2 From bf08867f91a43aa3ba2e4598c06c4769a6cdddf6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2005 22:44:00 -0700 Subject: [IPSEC] Turn km_event.data into a union This patch turns km_event.data into a union. This makes code that uses it clearer. Signed-off-by: Herbert Xu --- include/net/xfrm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aaa0f5f330e2..fda356e81014 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -173,7 +173,12 @@ enum { /* callback structure passed from either netlink or pfkey */ struct km_event { - u32 data; + union { + u32 hard; + u32 proto; + u32 byid; + } data; + u32 seq; u32 pid; u32 event; -- cgit v1.2.2 From f60f6b8f70c756fc786d68f02ec17a1e84db645f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2005 22:44:37 -0700 Subject: [IPSEC] Use XFRM_MSG_* instead of XFRM_SAP_* This patch removes XFRM_SAP_* and converts them over to XFRM_MSG_*. The netlink interface is meant to map directly onto the underlying xfrm subsystem. Therefore rather than using a new independent representation for the events we can simply use the existing ones from xfrm_user. Signed-off-by: Herbert Xu --- include/net/xfrm.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fda356e81014..0e65e02b7a1d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -158,18 +158,6 @@ enum { XFRM_STATE_DEAD }; -/* events that could be sent by kernel */ -enum { - XFRM_SAP_INVALID, - XFRM_SAP_EXPIRED, - XFRM_SAP_ADDED, - XFRM_SAP_UPDATED, - XFRM_SAP_DELETED, - XFRM_SAP_FLUSHED, - __XFRM_SAP_MAX -}; -#define XFRM_SAP_MAX (__XFRM_SAP_MAX - 1) - /* callback structure passed from either netlink or pfkey */ struct km_event { -- cgit v1.2.2 From 2e6599cb899ba4b133f42cbf9d2b1883d2dc583a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:46:52 -0700 Subject: [NET] Generalise TCP's struct open_request minisock infrastructure Kept this first changeset minimal, without changing existing names to ease peer review. Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn has two new members: ->slab, that replaces tcp_openreq_cachep ->obj_size, to inform the size of the openreq descendant for a specific protocol The protocol specific fields in struct open_request were moved to a class hierarchy, with the things that are common to all connection oriented PF_INET protocols in struct inet_request_sock, the TCP ones in tcp_request_sock, that is an inet_request_sock, that is an open_request. I.e. this uses the same approach used for the struct sock class hierarchy, with sk_prot indicating if the protocol wants to use the open_request infrastructure by filling in sk_prot->rsk_prot with an or_calltable. Results? Performance is improved and TCP v4 now uses only 64 bytes per open request minisock, down from 96 without this patch :-) Next changeset will rename some of the structs, fields and functions mentioned above, struct or_calltable is way unclear, better name it struct request_sock_ops, s/struct open_request/struct request_sock/g, etc. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 77 ++++++++++++++++++++++++++++++++++++++++ include/net/sock.h | 4 +++ include/net/tcp.h | 87 ++++++---------------------------------------- include/net/tcp_ecn.h | 7 ++-- 4 files changed, 96 insertions(+), 79 deletions(-) create mode 100644 include/net/request_sock.h (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h new file mode 100644 index 000000000000..9502f5587931 --- /dev/null +++ b/include/net/request_sock.h @@ -0,0 +1,77 @@ +/* + * NET Generic infrastructure for Network protocols. + * + * Definitions for request_sock + * + * Authors: Arnaldo Carvalho de Melo + * + * From code originally in include/net/tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _REQUEST_SOCK_H +#define _REQUEST_SOCK_H + +#include +#include +#include + +struct open_request; +struct sk_buff; +struct dst_entry; +struct proto; + +struct or_calltable { + int family; + kmem_cache_t *slab; + int obj_size; + int (*rtx_syn_ack)(struct sock *sk, + struct open_request *req, + struct dst_entry *dst); + void (*send_ack)(struct sk_buff *skb, + struct open_request *req); + void (*send_reset)(struct sk_buff *skb); + void (*destructor)(struct open_request *req); +}; + +/* struct open_request - mini sock to represent a connection request + */ +struct open_request { + struct open_request *dl_next; /* Must be first member! */ + u16 mss; + u8 retrans; + u8 __pad; + /* The following two fields can be easily recomputed I think -AK */ + u32 window_clamp; /* window clamp at creation time */ + u32 rcv_wnd; /* rcv_wnd offered first time */ + u32 ts_recent; + unsigned long expires; + struct or_calltable *class; + struct sock *sk; +}; + +static inline struct open_request *tcp_openreq_alloc(struct or_calltable *class) +{ + struct open_request *req = kmem_cache_alloc(class->slab, SLAB_ATOMIC); + + if (req != NULL) + req->class = class; + + return req; +} + +static inline void tcp_openreq_fastfree(struct open_request *req) +{ + kmem_cache_free(req->class->slab, req); +} + +static inline void tcp_openreq_free(struct open_request *req) +{ + req->class->destructor(req); + tcp_openreq_fastfree(req); +} + +#endif /* _REQUEST_SOCK_H */ diff --git a/include/net/sock.h b/include/net/sock.h index a9ef3a6a13f3..6919276af8af 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -484,6 +484,8 @@ extern void sk_stream_kill_queues(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); +struct or_calltable; + /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto @@ -547,6 +549,8 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; + struct or_calltable *rsk_prot; + struct module *owner; char name[32]; diff --git a/include/net/tcp.h b/include/net/tcp.h index e71f8ba3e101..d438ba566b89 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -613,74 +614,6 @@ extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; -struct open_request; - -struct or_calltable { - int family; - int (*rtx_syn_ack) (struct sock *sk, struct open_request *req, struct dst_entry*); - void (*send_ack) (struct sk_buff *skb, struct open_request *req); - void (*destructor) (struct open_request *req); - void (*send_reset) (struct sk_buff *skb); -}; - -struct tcp_v4_open_req { - __u32 loc_addr; - __u32 rmt_addr; - struct ip_options *opt; -}; - -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -struct tcp_v6_open_req { - struct in6_addr loc_addr; - struct in6_addr rmt_addr; - struct sk_buff *pktopts; - int iif; -}; -#endif - -/* this structure is too big */ -struct open_request { - struct open_request *dl_next; /* Must be first member! */ - __u32 rcv_isn; - __u32 snt_isn; - __u16 rmt_port; - __u16 mss; - __u8 retrans; - __u8 __pad; - __u16 snd_wscale : 4, - rcv_wscale : 4, - tstamp_ok : 1, - sack_ok : 1, - wscale_ok : 1, - ecn_ok : 1, - acked : 1; - /* The following two fields can be easily recomputed I think -AK */ - __u32 window_clamp; /* window clamp at creation time */ - __u32 rcv_wnd; /* rcv_wnd offered first time */ - __u32 ts_recent; - unsigned long expires; - struct or_calltable *class; - struct sock *sk; - union { - struct tcp_v4_open_req v4_req; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - struct tcp_v6_open_req v6_req; -#endif - } af; -}; - -/* SLAB cache for open requests. */ -extern kmem_cache_t *tcp_openreq_cachep; - -#define tcp_openreq_alloc() kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC) -#define tcp_openreq_fastfree(req) kmem_cache_free(tcp_openreq_cachep, req) - -static inline void tcp_openreq_free(struct open_request *req) -{ - req->class->destructor(req); - tcp_openreq_fastfree(req); -} - #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #define TCP_INET_FAMILY(fam) ((fam) == AF_INET) #else @@ -1832,17 +1765,19 @@ static __inline__ void tcp_openreq_init(struct open_request *req, struct tcp_options_received *rx_opt, struct sk_buff *skb) { + struct inet_request_sock *ireq = inet_rsk(req); + req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ - req->rcv_isn = TCP_SKB_CB(skb)->seq; + tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; - req->tstamp_ok = rx_opt->tstamp_ok; - req->sack_ok = rx_opt->sack_ok; - req->snd_wscale = rx_opt->snd_wscale; - req->wscale_ok = rx_opt->wscale_ok; - req->acked = 0; - req->ecn_ok = 0; - req->rmt_port = skb->h.th->source; + ireq->tstamp_ok = rx_opt->tstamp_ok; + ireq->sack_ok = rx_opt->sack_ok; + ireq->snd_wscale = rx_opt->snd_wscale; + ireq->wscale_ok = rx_opt->wscale_ok; + ireq->acked = 0; + ireq->ecn_ok = 0; + ireq->rmt_port = skb->h.th->source; } extern void tcp_enter_memory_pressure(void); diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index dc1456389a97..94ad970e844a 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -2,6 +2,7 @@ #define _NET_TCP_ECN_H_ 1 #include +#include #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -40,7 +41,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp, static __inline__ void TCP_ECN_make_synack(struct open_request *req, struct tcphdr *th) { - if (req->ecn_ok) + if (inet_rsk(req)->ecn_ok) th->ece = 1; } @@ -113,14 +114,14 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, struct open_request *req) { - tp->ecn_flags = req->ecn_ok ? TCP_ECN_OK : 0; + tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } static __inline__ void TCP_ECN_create_request(struct open_request *req, struct tcphdr *th) { if (sysctl_tcp_ecn && th->ece && th->cwr) - req->ecn_ok = 1; + inet_rsk(req)->ecn_ok = 1; } #endif -- cgit v1.2.2 From 60236fdd08b2169045a3bbfc5ffe1576e6c3c17b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:47:21 -0700 Subject: [NET] Rename open_request to request_sock Ok, this one just renames some stuff to have a better namespace and to dissassociate it from TCP: struct open_request -> struct request_sock tcp_openreq_alloc -> reqsk_alloc tcp_openreq_free -> reqsk_free tcp_openreq_fastfree -> __reqsk_free With this most of the infrastructure closely resembles a struct sock methods subset. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 34 +++++++++++++++++----------------- include/net/sock.h | 4 ++-- include/net/tcp.h | 30 +++++++++++++++--------------- include/net/tcp_ecn.h | 6 +++--- 4 files changed, 37 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9502f5587931..08a8fd1d1610 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -19,28 +19,28 @@ #include #include -struct open_request; +struct request_sock; struct sk_buff; struct dst_entry; struct proto; -struct or_calltable { +struct request_sock_ops { int family; kmem_cache_t *slab; int obj_size; int (*rtx_syn_ack)(struct sock *sk, - struct open_request *req, + struct request_sock *req, struct dst_entry *dst); void (*send_ack)(struct sk_buff *skb, - struct open_request *req); + struct request_sock *req); void (*send_reset)(struct sk_buff *skb); - void (*destructor)(struct open_request *req); + void (*destructor)(struct request_sock *req); }; -/* struct open_request - mini sock to represent a connection request +/* struct request_sock - mini sock to represent a connection request */ -struct open_request { - struct open_request *dl_next; /* Must be first member! */ +struct request_sock { + struct request_sock *dl_next; /* Must be first member! */ u16 mss; u8 retrans; u8 __pad; @@ -49,29 +49,29 @@ struct open_request { u32 rcv_wnd; /* rcv_wnd offered first time */ u32 ts_recent; unsigned long expires; - struct or_calltable *class; + struct request_sock_ops *rsk_ops; struct sock *sk; }; -static inline struct open_request *tcp_openreq_alloc(struct or_calltable *class) +static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops) { - struct open_request *req = kmem_cache_alloc(class->slab, SLAB_ATOMIC); + struct request_sock *req = kmem_cache_alloc(ops->slab, SLAB_ATOMIC); if (req != NULL) - req->class = class; + req->rsk_ops = ops; return req; } -static inline void tcp_openreq_fastfree(struct open_request *req) +static inline void __reqsk_free(struct request_sock *req) { - kmem_cache_free(req->class->slab, req); + kmem_cache_free(req->rsk_ops->slab, req); } -static inline void tcp_openreq_free(struct open_request *req) +static inline void reqsk_free(struct request_sock *req) { - req->class->destructor(req); - tcp_openreq_fastfree(req); + req->rsk_ops->destructor(req); + __reqsk_free(req); } #endif /* _REQUEST_SOCK_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 6919276af8af..e593af5b1ecc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -484,7 +484,7 @@ extern void sk_stream_kill_queues(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); -struct or_calltable; +struct request_sock_ops; /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface @@ -549,7 +549,7 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; - struct or_calltable *rsk_prot; + struct request_sock_ops *rsk_prot; struct module *owner; diff --git a/include/net/tcp.h b/include/net/tcp.h index d438ba566b89..6663086a5e35 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -641,7 +641,7 @@ struct tcp_func { struct sock * (*syn_recv_sock) (struct sock *sk, struct sk_buff *skb, - struct open_request *req, + struct request_sock *req, struct dst_entry *dst); int (*remember_stamp) (struct sock *sk); @@ -785,8 +785,8 @@ extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, unsigned len); extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, - struct open_request *req, - struct open_request **prev); + struct request_sock *req, + struct request_sock **prev); extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); @@ -836,12 +836,12 @@ extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); extern struct sock * tcp_create_openreq_child(struct sock *sk, - struct open_request *req, + struct request_sock *req, struct sk_buff *skb); extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct open_request *req, + struct request_sock *req, struct dst_entry *dst); extern int tcp_v4_do_rcv(struct sock *sk, @@ -855,7 +855,7 @@ extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct open_request *req); + struct request_sock *req); extern int tcp_disconnect(struct sock *sk, int flags); @@ -1683,7 +1683,7 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req, +static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, struct sock *child) { struct tcp_sock *tp = tcp_sk(sk); @@ -1707,11 +1707,11 @@ struct tcp_listen_opt int qlen_young; int clock_hand; u32 hash_rnd; - struct open_request *syn_table[TCP_SYNQ_HSIZE]; + struct request_sock *syn_table[TCP_SYNQ_HSIZE]; }; static inline void -tcp_synq_removed(struct sock *sk, struct open_request *req) +tcp_synq_removed(struct sock *sk, struct request_sock *req) { struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; @@ -1745,23 +1745,23 @@ static inline int tcp_synq_is_full(struct sock *sk) return tcp_synq_len(sk) >> tcp_sk(sk)->listen_opt->max_qlen_log; } -static inline void tcp_synq_unlink(struct tcp_sock *tp, struct open_request *req, - struct open_request **prev) +static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, + struct request_sock **prev) { write_lock(&tp->syn_wait_lock); *prev = req->dl_next; write_unlock(&tp->syn_wait_lock); } -static inline void tcp_synq_drop(struct sock *sk, struct open_request *req, - struct open_request **prev) +static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, + struct request_sock **prev) { tcp_synq_unlink(tcp_sk(sk), req, prev); tcp_synq_removed(sk, req); - tcp_openreq_free(req); + reqsk_free(req); } -static __inline__ void tcp_openreq_init(struct open_request *req, +static __inline__ void tcp_openreq_init(struct request_sock *req, struct tcp_options_received *rx_opt, struct sk_buff *skb) { diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 94ad970e844a..64980ee8c92a 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -39,7 +39,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp, } static __inline__ void -TCP_ECN_make_synack(struct open_request *req, struct tcphdr *th) +TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) { if (inet_rsk(req)->ecn_ok) th->ece = 1; @@ -112,13 +112,13 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) } static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, - struct open_request *req) + struct request_sock *req) { tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } static __inline__ void -TCP_ECN_create_request(struct open_request *req, struct tcphdr *th) +TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th) { if (sysctl_tcp_ecn && th->ece && th->cwr) inet_rsk(req)->ecn_ok = 1; -- cgit v1.2.2 From 0e87506fcc734647c7b2497eee4eb81e785c857a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:47:59 -0700 Subject: [NET] Generalise tcp_listen_opt This chunks out the accept_queue and tcp_listen_opt code and moves them to net/core/request_sock.c and include/net/request_sock.h, to make it useful for other transport protocols, DCCP being the first one to use it. Next patches will rename tcp_listen_opt to accept_sock and remove the inline tcp functions that just call a reqsk_queue_ function. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 178 +++++++++++++++++++++++++++++++++++++++++++++ include/net/tcp.h | 46 ++---------- 2 files changed, 186 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 08a8fd1d1610..38943ed04e73 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -16,7 +16,9 @@ #define _REQUEST_SOCK_H #include +#include #include + #include struct request_sock; @@ -74,4 +76,180 @@ static inline void reqsk_free(struct request_sock *req) __reqsk_free(req); } +extern int sysctl_max_syn_backlog; + +/** struct tcp_listen_opt - listen state + * + * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs + */ +struct tcp_listen_opt { + u8 max_qlen_log; + /* 3 bytes hole, try to use */ + int qlen; + int qlen_young; + int clock_hand; + u32 hash_rnd; + struct request_sock *syn_table[0]; +}; + +/** struct request_sock_queue - queue of request_socks + * + * @rskq_accept_head - FIFO head of established children + * @rskq_accept_tail - FIFO tail of established children + * @syn_wait_lock - serializer + * + * %syn_wait_lock is necessary only to avoid proc interface having to grab the main + * lock sock while browsing the listening hash (otherwise it's deadlock prone). + * + * This lock is acquired in read mode only from listening_get_next() seq_file + * op and it's acquired in write mode _only_ from code that is actively + * changing rskq_accept_head. All readers that are holding the master sock lock + * don't need to grab this lock in read mode too as rskq_accept_head. writes + * are always protected from the main sock lock. + */ +struct request_sock_queue { + struct request_sock *rskq_accept_head; + struct request_sock *rskq_accept_tail; + rwlock_t syn_wait_lock; + struct tcp_listen_opt *listen_opt; +}; + +extern int reqsk_queue_alloc(struct request_sock_queue *queue, + const int nr_table_entries); + +static inline struct tcp_listen_opt *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) +{ + struct tcp_listen_opt *lopt; + + write_lock_bh(&queue->syn_wait_lock); + lopt = queue->listen_opt; + queue->listen_opt = NULL; + write_unlock_bh(&queue->syn_wait_lock); + + return lopt; +} + +static inline void reqsk_queue_destroy(struct request_sock_queue *queue) +{ + kfree(reqsk_queue_yank_listen_sk(queue)); +} + +static inline struct request_sock * + reqsk_queue_yank_acceptq(struct request_sock_queue *queue) +{ + struct request_sock *req = queue->rskq_accept_head; + + queue->rskq_accept_head = queue->rskq_accept_head = NULL; + return req; +} + +static inline int reqsk_queue_empty(struct request_sock_queue *queue) +{ + return queue->rskq_accept_head == NULL; +} + +static inline void reqsk_queue_unlink(struct request_sock_queue *queue, + struct request_sock *req, + struct request_sock **prev_req) +{ + write_lock(&queue->syn_wait_lock); + *prev_req = req->dl_next; + write_unlock(&queue->syn_wait_lock); +} + +static inline void reqsk_queue_add(struct request_sock_queue *queue, + struct request_sock *req, + struct sock *parent, + struct sock *child) +{ + req->sk = child; + sk_acceptq_added(parent); + + if (queue->rskq_accept_head == NULL) + queue->rskq_accept_head = req; + else + queue->rskq_accept_tail->dl_next = req; + + queue->rskq_accept_tail = req; + req->dl_next = NULL; +} + +static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue) +{ + struct request_sock *req = queue->rskq_accept_head; + + BUG_TRAP(req != NULL); + + queue->rskq_accept_head = req->dl_next; + if (queue->rskq_accept_head == NULL) + queue->rskq_accept_tail = NULL; + + return req; +} + +static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue, + struct sock *parent) +{ + struct request_sock *req = reqsk_queue_remove(queue); + struct sock *child = req->sk; + + BUG_TRAP(child != NULL); + + sk_acceptq_removed(parent); + __reqsk_free(req); + return child; +} + +static inline int reqsk_queue_removed(struct request_sock_queue *queue, + struct request_sock *req) +{ + struct tcp_listen_opt *lopt = queue->listen_opt; + + if (req->retrans == 0) + --lopt->qlen_young; + + return --lopt->qlen; +} + +static inline int reqsk_queue_added(struct request_sock_queue *queue) +{ + struct tcp_listen_opt *lopt = queue->listen_opt; + const int prev_qlen = lopt->qlen; + + lopt->qlen_young++; + lopt->qlen++; + return prev_qlen; +} + +static inline int reqsk_queue_len(struct request_sock_queue *queue) +{ + return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; +} + +static inline int reqsk_queue_len_young(struct request_sock_queue *queue) +{ + return queue->listen_opt->qlen_young; +} + +static inline int reqsk_queue_is_full(struct request_sock_queue *queue) +{ + return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; +} + +static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, + u32 hash, struct request_sock *req, + unsigned timeout) +{ + struct tcp_listen_opt *lopt = queue->listen_opt; + + req->expires = jiffies + timeout; + req->retrans = 0; + req->sk = NULL; + req->dl_next = lopt->syn_table[hash]; + + write_lock(&queue->syn_wait_lock); + lopt->syn_table[hash] = req; + write_unlock(&queue->syn_wait_lock); +} + #endif /* _REQUEST_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6663086a5e35..a2e323c54457 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1686,71 +1686,41 @@ static inline int tcp_full_space(const struct sock *sk) static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, struct sock *child) { - struct tcp_sock *tp = tcp_sk(sk); - - req->sk = child; - sk_acceptq_added(sk); - - if (!tp->accept_queue_tail) { - tp->accept_queue = req; - } else { - tp->accept_queue_tail->dl_next = req; - } - tp->accept_queue_tail = req; - req->dl_next = NULL; + reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); } -struct tcp_listen_opt -{ - u8 max_qlen_log; /* log_2 of maximal queued SYNs */ - int qlen; - int qlen_young; - int clock_hand; - u32 hash_rnd; - struct request_sock *syn_table[TCP_SYNQ_HSIZE]; -}; - static inline void tcp_synq_removed(struct sock *sk, struct request_sock *req) { - struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; - - if (--lopt->qlen == 0) + if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) tcp_delete_keepalive_timer(sk); - if (req->retrans == 0) - lopt->qlen_young--; } static inline void tcp_synq_added(struct sock *sk) { - struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; - - if (lopt->qlen++ == 0) + if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); - lopt->qlen_young++; } static inline int tcp_synq_len(struct sock *sk) { - return tcp_sk(sk)->listen_opt->qlen; + return reqsk_queue_len(&tcp_sk(sk)->accept_queue); } static inline int tcp_synq_young(struct sock *sk) { - return tcp_sk(sk)->listen_opt->qlen_young; + return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); } static inline int tcp_synq_is_full(struct sock *sk) { - return tcp_synq_len(sk) >> tcp_sk(sk)->listen_opt->max_qlen_log; + return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); } static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, - struct request_sock **prev) + struct request_sock **prev) { - write_lock(&tp->syn_wait_lock); - *prev = req->dl_next; - write_unlock(&tp->syn_wait_lock); + reqsk_queue_unlink(&tp->accept_queue, req, prev); } static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, -- cgit v1.2.2 From 2ad69c55a282315e6119cf7fd744f26a925bdfd2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:48:55 -0700 Subject: [NET] rename struct tcp_listen_opt to struct listen_sock Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 38943ed04e73..72fd6f5e86b1 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -78,11 +78,11 @@ static inline void reqsk_free(struct request_sock *req) extern int sysctl_max_syn_backlog; -/** struct tcp_listen_opt - listen state +/** struct listen_sock - listen state * * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs */ -struct tcp_listen_opt { +struct listen_sock { u8 max_qlen_log; /* 3 bytes hole, try to use */ int qlen; @@ -111,15 +111,15 @@ struct request_sock_queue { struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; rwlock_t syn_wait_lock; - struct tcp_listen_opt *listen_opt; + struct listen_sock *listen_opt; }; extern int reqsk_queue_alloc(struct request_sock_queue *queue, const int nr_table_entries); -static inline struct tcp_listen_opt *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) +static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) { - struct tcp_listen_opt *lopt; + struct listen_sock *lopt; write_lock_bh(&queue->syn_wait_lock); lopt = queue->listen_opt; @@ -203,7 +203,7 @@ static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queu static inline int reqsk_queue_removed(struct request_sock_queue *queue, struct request_sock *req) { - struct tcp_listen_opt *lopt = queue->listen_opt; + struct listen_sock *lopt = queue->listen_opt; if (req->retrans == 0) --lopt->qlen_young; @@ -213,7 +213,7 @@ static inline int reqsk_queue_removed(struct request_sock_queue *queue, static inline int reqsk_queue_added(struct request_sock_queue *queue) { - struct tcp_listen_opt *lopt = queue->listen_opt; + struct listen_sock *lopt = queue->listen_opt; const int prev_qlen = lopt->qlen; lopt->qlen_young++; @@ -240,7 +240,7 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, u32 hash, struct request_sock *req, unsigned timeout) { - struct tcp_listen_opt *lopt = queue->listen_opt; + struct listen_sock *lopt = queue->listen_opt; req->expires = jiffies + timeout; req->retrans = 0; -- cgit v1.2.2 From e52c1f17e4ea8e61bd26eb25f1a184202693c2b9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 18 Jun 2005 22:49:40 -0700 Subject: [NET]: Move sysctl_max_syn_backlog into request_sock.c This fixes the CONFIG_INET=n build failure noticed by Andrew Morton. Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a2e323c54457..f730935b824a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -564,7 +564,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ /* sysctl variables for tcp */ -extern int sysctl_max_syn_backlog; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; -- cgit v1.2.2 From c7fb64db001f83ece669c76a02d8ec2fdb1dd307 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:50:55 -0700 Subject: [NETLINK]: Neighbour table configuration and statistics via rtnetlink To retrieve the neighbour tables send RTM_GETNEIGHTBL with the NLM_F_DUMP flag set. Every neighbour table configuration is spread over multiple messages to avoid running into message size limits on systems with many interfaces. The first message in the sequence transports all not device specific data such as statistics, configuration, and the default parameter set. This message is followed by 0..n messages carrying device specific parameter sets. Although the ordering should be sufficient, NDTA_NAME can be used to identify sequences. The initial message can be identified by checking for NDTA_CONFIG. The device specific messages do not contain this TLV but have NDTPA_IFINDEX set to the corresponding interface index. To change neighbour table attributes, send RTM_SETNEIGHTBL with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked otherwise. Device specific parameter sets can be changed by setting NDTPA_IFINDEX to the interface index of the corresponding device. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/neighbour.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4f33bbc21e7f..17191ac9be70 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -65,6 +65,7 @@ struct neighbour; struct neigh_parms { + struct net_device *dev; struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; @@ -252,6 +253,9 @@ extern int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern void neigh_app_ns(struct neighbour *n); +extern int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb); +extern int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); + extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); -- cgit v1.2.2 From 88121aea7bdb5fdc527388e262381829c4e1db16 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:51:12 -0700 Subject: [NEIGHBOUR]: Remove unused fields in struct neigh_parms and neigh_table Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/neighbour.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 17191ac9be70..89809891e5ab 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -69,8 +69,6 @@ struct neigh_parms struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; - int entries; - void *priv; void *sysctl_table; @@ -193,7 +191,6 @@ struct neigh_table atomic_t entries; rwlock_t lock; unsigned long last_rand; - struct neigh_parms *parms_list; kmem_cache_t *kmem_cachep; struct neigh_statistics *stats; struct neighbour **hash_buckets; -- cgit v1.2.2 From 9972b25d0c6e7f8f893eb3444dea37b42b1201de Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:57:26 -0700 Subject: [PKT_SCHED]: Generic queue management interface for qdiscs using internal skb queues Implements an interface to be used by leaf qdiscs maintaining an internal skb queue. The interface maintains a backlog in bytes additionaly to the skb_queue_len() maintained by the queue itself. Relevant statistics get incremented automatically. Every function comes in two variants, one assuming Qdisc->q is used as queue and the second taking a sk_buff_head as argument. Be aware that, if you use multiple queues, you still have to maintain the Qdisc->q.qlen counter yourself. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/sch_generic.h | 122 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c57504b3b518..7b97405e2dbf 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -172,4 +172,126 @@ tcf_destroy(struct tcf_proto *tp) kfree(tp); } +static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff_head *list) +{ + __skb_queue_tail(list, skb); + sch->qstats.backlog += skb->len; + sch->bstats.bytes += skb->len; + sch->bstats.packets++; + + return NET_XMIT_SUCCESS; +} + +static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) +{ + return __qdisc_enqueue_tail(skb, sch, &sch->q); +} + +static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __skb_dequeue(list); + + if (likely(skb != NULL)) + sch->qstats.backlog -= skb->len; + + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) +{ + return __qdisc_dequeue_head(sch, &sch->q); +} + +static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __skb_dequeue_tail(list); + + if (likely(skb != NULL)) + sch->qstats.backlog -= skb->len; + + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch) +{ + return __qdisc_dequeue_tail(sch, &sch->q); +} + +static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff_head *list) +{ + __skb_queue_head(list, skb); + sch->qstats.backlog += skb->len; + sch->qstats.requeues++; + + return NET_XMIT_SUCCESS; +} + +static inline int qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch) +{ + return __qdisc_requeue(skb, sch, &sch->q); +} + +static inline void __qdisc_reset_queue(struct Qdisc *sch, + struct sk_buff_head *list) +{ + /* + * We do not know the backlog in bytes of this list, it + * is up to the caller to correct it + */ + skb_queue_purge(list); +} + +static inline void qdisc_reset_queue(struct Qdisc *sch) +{ + __qdisc_reset_queue(sch, &sch->q); + sch->qstats.backlog = 0; +} + +static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __qdisc_dequeue_tail(sch, list); + + if (likely(skb != NULL)) { + unsigned int len = skb->len; + kfree_skb(skb); + return len; + } + + return 0; +} + +static inline unsigned int qdisc_queue_drop(struct Qdisc *sch) +{ + return __qdisc_queue_drop(sch, &sch->q); +} + +static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) +{ + kfree_skb(skb); + sch->qstats.drops++; + + return NET_XMIT_DROP; +} + +static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch) +{ + sch->qstats.drops++; + +#ifdef CONFIG_NET_CLS_POLICE + if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) + goto drop; + + return NET_XMIT_SUCCESS; + +drop: +#endif + kfree_skb(skb); + return NET_XMIT_DROP; +} + #endif -- cgit v1.2.2