diff options
Diffstat (limited to 'include/net')
40 files changed, 1483 insertions, 704 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index ed00a995f576..b55eb7c7f033 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h | |||
@@ -63,7 +63,7 @@ struct tc_action_ops | |||
63 | __u32 type; /* TBD to match kind */ | 63 | __u32 type; /* TBD to match kind */ |
64 | __u32 capab; /* capabilities includes 4 bit version */ | 64 | __u32 capab; /* capabilities includes 4 bit version */ |
65 | struct module *owner; | 65 | struct module *owner; |
66 | int (*act)(struct sk_buff **, struct tc_action *); | 66 | int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *); |
67 | int (*get_stats)(struct sk_buff *, struct tc_action *); | 67 | int (*get_stats)(struct sk_buff *, struct tc_action *); |
68 | int (*dump)(struct sk_buff *, struct tc_action *,int , int); | 68 | int (*dump)(struct sk_buff *, struct tc_action *,int , int); |
69 | int (*cleanup)(struct tc_action *, int bind); | 69 | int (*cleanup)(struct tc_action *, int bind); |
diff --git a/include/net/addrconf.h b/include/net/addrconf.h index a0ed93672176..750e2508dd90 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h | |||
@@ -45,6 +45,7 @@ struct prefix_info { | |||
45 | 45 | ||
46 | #ifdef __KERNEL__ | 46 | #ifdef __KERNEL__ |
47 | 47 | ||
48 | #include <linux/config.h> | ||
48 | #include <linux/netdevice.h> | 49 | #include <linux/netdevice.h> |
49 | #include <net/if_inet6.h> | 50 | #include <net/if_inet6.h> |
50 | #include <net/ipv6.h> | 51 | #include <net/ipv6.h> |
@@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) | |||
238 | addr->s6_addr32[3] == htonl(0x00000002)); | 239 | addr->s6_addr32[3] == htonl(0x00000002)); |
239 | } | 240 | } |
240 | 241 | ||
242 | #ifdef CONFIG_PROC_FS | ||
243 | extern int if6_proc_init(void); | ||
244 | extern void if6_proc_exit(void); | ||
245 | #endif | ||
246 | |||
241 | #endif | 247 | #endif |
242 | #endif | 248 | #endif |
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b60b3846b9d1..b5d785ab4a0e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h | |||
@@ -1,5 +1,11 @@ | |||
1 | #ifndef __LINUX_NET_AFUNIX_H | 1 | #ifndef __LINUX_NET_AFUNIX_H |
2 | #define __LINUX_NET_AFUNIX_H | 2 | #define __LINUX_NET_AFUNIX_H |
3 | |||
4 | #include <linux/config.h> | ||
5 | #include <linux/socket.h> | ||
6 | #include <linux/un.h> | ||
7 | #include <net/sock.h> | ||
8 | |||
3 | extern void unix_inflight(struct file *fp); | 9 | extern void unix_inflight(struct file *fp); |
4 | extern void unix_notinflight(struct file *fp); | 10 | extern void unix_notinflight(struct file *fp); |
5 | extern void unix_gc(void); | 11 | extern void unix_gc(void); |
@@ -74,5 +80,14 @@ struct unix_sock { | |||
74 | wait_queue_head_t peer_wait; | 80 | wait_queue_head_t peer_wait; |
75 | }; | 81 | }; |
76 | #define unix_sk(__sk) ((struct unix_sock *)__sk) | 82 | #define unix_sk(__sk) ((struct unix_sock *)__sk) |
83 | |||
84 | #ifdef CONFIG_SYSCTL | ||
85 | extern int sysctl_unix_max_dgram_qlen; | ||
86 | extern void unix_sysctl_register(void); | ||
87 | extern void unix_sysctl_unregister(void); | ||
88 | #else | ||
89 | static inline void unix_sysctl_register(void) {} | ||
90 | static inline void unix_sysctl_unregister(void) {} | ||
91 | #endif | ||
77 | #endif | 92 | #endif |
78 | #endif | 93 | #endif |
diff --git a/include/net/arp.h b/include/net/arp.h index a1f09fad6a52..a13e30c35f42 100644 --- a/include/net/arp.h +++ b/include/net/arp.h | |||
@@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl; | |||
11 | 11 | ||
12 | extern void arp_init(void); | 12 | extern void arp_init(void); |
13 | extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, | 13 | extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, |
14 | struct packet_type *pt); | 14 | struct packet_type *pt, struct net_device *orig_dev); |
15 | extern int arp_find(unsigned char *haddr, struct sk_buff *skb); | 15 | extern int arp_find(unsigned char *haddr, struct sk_buff *skb); |
16 | extern int arp_ioctl(unsigned int cmd, void __user *arg); | 16 | extern int arp_ioctl(unsigned int cmd, void __user *arg); |
17 | extern void arp_send(int type, int ptype, u32 dest_ip, | 17 | extern void arp_send(int type, int ptype, u32 dest_ip, |
diff --git a/include/net/ax25.h b/include/net/ax25.h index 828a3a93dda1..926eed543023 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h | |||
@@ -139,11 +139,25 @@ enum { | |||
139 | #define AX25_DEF_DS_TIMEOUT (3 * 60 * HZ) /* DAMA timeout 3 minutes */ | 139 | #define AX25_DEF_DS_TIMEOUT (3 * 60 * HZ) /* DAMA timeout 3 minutes */ |
140 | 140 | ||
141 | typedef struct ax25_uid_assoc { | 141 | typedef struct ax25_uid_assoc { |
142 | struct ax25_uid_assoc *next; | 142 | struct hlist_node uid_node; |
143 | atomic_t refcount; | ||
143 | uid_t uid; | 144 | uid_t uid; |
144 | ax25_address call; | 145 | ax25_address call; |
145 | } ax25_uid_assoc; | 146 | } ax25_uid_assoc; |
146 | 147 | ||
148 | #define ax25_uid_for_each(__ax25, node, list) \ | ||
149 | hlist_for_each_entry(__ax25, node, list, uid_node) | ||
150 | |||
151 | #define ax25_uid_hold(ax25) \ | ||
152 | atomic_inc(&((ax25)->refcount)) | ||
153 | |||
154 | static inline void ax25_uid_put(ax25_uid_assoc *assoc) | ||
155 | { | ||
156 | if (atomic_dec_and_test(&assoc->refcount)) { | ||
157 | kfree(assoc); | ||
158 | } | ||
159 | } | ||
160 | |||
147 | typedef struct { | 161 | typedef struct { |
148 | ax25_address calls[AX25_MAX_DIGIS]; | 162 | ax25_address calls[AX25_MAX_DIGIS]; |
149 | unsigned char repeated[AX25_MAX_DIGIS]; | 163 | unsigned char repeated[AX25_MAX_DIGIS]; |
@@ -302,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int); | |||
302 | 316 | ||
303 | /* ax25_in.c */ | 317 | /* ax25_in.c */ |
304 | extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); | 318 | extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); |
305 | extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); | 319 | extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); |
306 | 320 | ||
307 | /* ax25_ip.c */ | 321 | /* ax25_ip.c */ |
308 | extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); | 322 | extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); |
@@ -376,7 +390,7 @@ extern unsigned long ax25_display_timer(struct timer_list *); | |||
376 | 390 | ||
377 | /* ax25_uid.c */ | 391 | /* ax25_uid.c */ |
378 | extern int ax25_uid_policy; | 392 | extern int ax25_uid_policy; |
379 | extern ax25_address *ax25_findbyuid(uid_t); | 393 | extern ax25_uid_assoc *ax25_findbyuid(uid_t); |
380 | extern int ax25_uid_ioctl(int, struct sockaddr_ax25 *); | 394 | extern int ax25_uid_ioctl(int, struct sockaddr_ax25 *); |
381 | extern struct file_operations ax25_uid_fops; | 395 | extern struct file_operations ax25_uid_fops; |
382 | extern void ax25_uid_free(void); | 396 | extern void ax25_uid_free(void); |
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 42a84c53678b..6dfa4a61ffd0 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h | |||
@@ -57,12 +57,6 @@ | |||
57 | #define BT_DBG(fmt, arg...) printk(KERN_INFO "%s: " fmt "\n" , __FUNCTION__ , ## arg) | 57 | #define BT_DBG(fmt, arg...) printk(KERN_INFO "%s: " fmt "\n" , __FUNCTION__ , ## arg) |
58 | #define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __FUNCTION__ , ## arg) | 58 | #define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __FUNCTION__ , ## arg) |
59 | 59 | ||
60 | #ifdef HCI_DATA_DUMP | ||
61 | #define BT_DMP(buf, len) bt_dump(__FUNCTION__, buf, len) | ||
62 | #else | ||
63 | #define BT_DMP(D...) | ||
64 | #endif | ||
65 | |||
66 | extern struct proc_dir_entry *proc_bt; | 60 | extern struct proc_dir_entry *proc_bt; |
67 | 61 | ||
68 | /* Connection and socket states */ | 62 | /* Connection and socket states */ |
@@ -137,11 +131,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); | |||
137 | 131 | ||
138 | /* Skb helpers */ | 132 | /* Skb helpers */ |
139 | struct bt_skb_cb { | 133 | struct bt_skb_cb { |
140 | int incoming; | 134 | __u8 pkt_type; |
135 | __u8 incoming; | ||
141 | }; | 136 | }; |
142 | #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) | 137 | #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) |
143 | 138 | ||
144 | static inline struct sk_buff *bt_skb_alloc(unsigned int len, int how) | 139 | static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how) |
145 | { | 140 | { |
146 | struct sk_buff *skb; | 141 | struct sk_buff *skb; |
147 | 142 | ||
@@ -174,8 +169,6 @@ static inline int skb_frags_no(struct sk_buff *skb) | |||
174 | return n; | 169 | return n; |
175 | } | 170 | } |
176 | 171 | ||
177 | void bt_dump(char *pref, __u8 *buf, int count); | ||
178 | |||
179 | int bt_err(__u16 code); | 172 | int bt_err(__u16 code); |
180 | 173 | ||
181 | #endif /* __BLUETOOTH_H */ | 174 | #endif /* __BLUETOOTH_H */ |
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 6f0706f4af68..371e7d3f2e6f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h | |||
@@ -453,6 +453,15 @@ struct inquiry_info_with_rssi { | |||
453 | __u16 clock_offset; | 453 | __u16 clock_offset; |
454 | __s8 rssi; | 454 | __s8 rssi; |
455 | } __attribute__ ((packed)); | 455 | } __attribute__ ((packed)); |
456 | struct inquiry_info_with_rssi_and_pscan_mode { | ||
457 | bdaddr_t bdaddr; | ||
458 | __u8 pscan_rep_mode; | ||
459 | __u8 pscan_period_mode; | ||
460 | __u8 pscan_mode; | ||
461 | __u8 dev_class[3]; | ||
462 | __u16 clock_offset; | ||
463 | __s8 rssi; | ||
464 | } __attribute__ ((packed)); | ||
456 | 465 | ||
457 | #define HCI_EV_CONN_COMPLETE 0x03 | 466 | #define HCI_EV_CONN_COMPLETE 0x03 |
458 | struct hci_ev_conn_complete { | 467 | struct hci_ev_conn_complete { |
@@ -584,6 +593,12 @@ struct hci_ev_clock_offset { | |||
584 | __u16 clock_offset; | 593 | __u16 clock_offset; |
585 | } __attribute__ ((packed)); | 594 | } __attribute__ ((packed)); |
586 | 595 | ||
596 | #define HCI_EV_PSCAN_REP_MODE 0x20 | ||
597 | struct hci_ev_pscan_rep_mode { | ||
598 | bdaddr_t bdaddr; | ||
599 | __u8 pscan_rep_mode; | ||
600 | } __attribute__ ((packed)); | ||
601 | |||
587 | /* Internal events generated by Bluetooth stack */ | 602 | /* Internal events generated by Bluetooth stack */ |
588 | #define HCI_EV_STACK_INTERNAL 0xFD | 603 | #define HCI_EV_STACK_INTERNAL 0xFD |
589 | struct hci_ev_stack_internal { | 604 | struct hci_ev_stack_internal { |
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6d63a47c731b..7f933f302078 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h | |||
@@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb) | |||
404 | bt_cb(skb)->incoming = 1; | 404 | bt_cb(skb)->incoming = 1; |
405 | 405 | ||
406 | /* Time stamp */ | 406 | /* Time stamp */ |
407 | do_gettimeofday(&skb->stamp); | 407 | __net_timestamp(skb); |
408 | 408 | ||
409 | /* Queue frame for rx task */ | 409 | /* Queue frame for rx task */ |
410 | skb_queue_tail(&hdev->rx_q, skb); | 410 | skb_queue_tail(&hdev->rx_q, skb); |
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 13669bad00b3..ffea9d54071f 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h | |||
@@ -80,9 +80,9 @@ | |||
80 | #define RFCOMM_RPN_STOP_15 1 | 80 | #define RFCOMM_RPN_STOP_15 1 |
81 | 81 | ||
82 | #define RFCOMM_RPN_PARITY_NONE 0x0 | 82 | #define RFCOMM_RPN_PARITY_NONE 0x0 |
83 | #define RFCOMM_RPN_PARITY_ODD 0x4 | 83 | #define RFCOMM_RPN_PARITY_ODD 0x1 |
84 | #define RFCOMM_RPN_PARITY_EVEN 0x5 | 84 | #define RFCOMM_RPN_PARITY_EVEN 0x3 |
85 | #define RFCOMM_RPN_PARITY_MARK 0x6 | 85 | #define RFCOMM_RPN_PARITY_MARK 0x5 |
86 | #define RFCOMM_RPN_PARITY_SPACE 0x7 | 86 | #define RFCOMM_RPN_PARITY_SPACE 0x7 |
87 | 87 | ||
88 | #define RFCOMM_RPN_FLOW_NONE 0x00 | 88 | #define RFCOMM_RPN_FLOW_NONE 0x00 |
@@ -223,8 +223,14 @@ struct rfcomm_dlc { | |||
223 | #define RFCOMM_CFC_DISABLED 0 | 223 | #define RFCOMM_CFC_DISABLED 0 |
224 | #define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS | 224 | #define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS |
225 | 225 | ||
226 | /* ---- RFCOMM SEND RPN ---- */ | ||
227 | int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, | ||
228 | u8 bit_rate, u8 data_bits, u8 stop_bits, | ||
229 | u8 parity, u8 flow_ctrl_settings, | ||
230 | u8 xon_char, u8 xoff_char, u16 param_mask); | ||
231 | |||
226 | /* ---- RFCOMM DLCs (channels) ---- */ | 232 | /* ---- RFCOMM DLCs (channels) ---- */ |
227 | struct rfcomm_dlc *rfcomm_dlc_alloc(int prio); | 233 | struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio); |
228 | void rfcomm_dlc_free(struct rfcomm_dlc *d); | 234 | void rfcomm_dlc_free(struct rfcomm_dlc *d); |
229 | int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); | 235 | int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); |
230 | int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); | 236 | int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); |
diff --git a/include/net/datalink.h b/include/net/datalink.h index 5797ba3d2eb5..deb7ca75db48 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h | |||
@@ -9,7 +9,7 @@ struct datalink_proto { | |||
9 | unsigned short header_length; | 9 | unsigned short header_length; |
10 | 10 | ||
11 | int (*rcvfunc)(struct sk_buff *, struct net_device *, | 11 | int (*rcvfunc)(struct sk_buff *, struct net_device *, |
12 | struct packet_type *); | 12 | struct packet_type *, struct net_device *); |
13 | int (*request)(struct datalink_proto *, struct sk_buff *, | 13 | int (*request)(struct datalink_proto *, struct sk_buff *, |
14 | unsigned char *); | 14 | unsigned char *); |
15 | struct list_head node; | 15 | struct list_head node; |
diff --git a/include/net/dn.h b/include/net/dn.h index 5551c46db397..c1dbbd222793 100644 --- a/include/net/dn.h +++ b/include/net/dn.h | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/dn.h> | 4 | #include <linux/dn.h> |
5 | #include <net/sock.h> | 5 | #include <net/sock.h> |
6 | #include <net/tcp.h> | ||
6 | #include <asm/byteorder.h> | 7 | #include <asm/byteorder.h> |
7 | 8 | ||
8 | typedef unsigned short dn_address; | 9 | typedef unsigned short dn_address; |
diff --git a/include/net/icmp.h b/include/net/icmp.h index e5ef0d15fb45..6cdebeee5f96 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h | |||
@@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) | |||
57 | return (struct raw_sock *)sk; | 57 | return (struct raw_sock *)sk; |
58 | } | 58 | } |
59 | 59 | ||
60 | extern int sysctl_icmp_echo_ignore_all; | ||
61 | extern int sysctl_icmp_echo_ignore_broadcasts; | ||
62 | extern int sysctl_icmp_ignore_bogus_error_responses; | ||
63 | extern int sysctl_icmp_errors_use_inbound_ifaddr; | ||
64 | extern int sysctl_icmp_ratelimit; | ||
65 | extern int sysctl_icmp_ratemask; | ||
66 | |||
60 | #endif /* _ICMP_H */ | 67 | #endif /* _ICMP_H */ |
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h new file mode 100644 index 000000000000..03df3b157960 --- /dev/null +++ b/include/net/inet6_hashtables.h | |||
@@ -0,0 +1,130 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Authors: Lotsa people, from code originally in tcp | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #ifndef _INET6_HASHTABLES_H | ||
15 | #define _INET6_HASHTABLES_H | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | |||
19 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | ||
20 | #include <linux/in6.h> | ||
21 | #include <linux/ipv6.h> | ||
22 | #include <linux/types.h> | ||
23 | |||
24 | #include <net/ipv6.h> | ||
25 | |||
26 | struct inet_hashinfo; | ||
27 | |||
28 | /* I have no idea if this is a good hash for v6 or not. -DaveM */ | ||
29 | static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, | ||
30 | const struct in6_addr *faddr, const u16 fport, | ||
31 | const int ehash_size) | ||
32 | { | ||
33 | int hashent = (lport ^ fport); | ||
34 | |||
35 | hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); | ||
36 | hashent ^= hashent >> 16; | ||
37 | hashent ^= hashent >> 8; | ||
38 | return (hashent & (ehash_size - 1)); | ||
39 | } | ||
40 | |||
41 | static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) | ||
42 | { | ||
43 | const struct inet_sock *inet = inet_sk(sk); | ||
44 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
45 | const struct in6_addr *laddr = &np->rcv_saddr; | ||
46 | const struct in6_addr *faddr = &np->daddr; | ||
47 | const __u16 lport = inet->num; | ||
48 | const __u16 fport = inet->dport; | ||
49 | return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so | ||
54 | * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM | ||
55 | * | ||
56 | * The sockhash lock must be held as a reader here. | ||
57 | */ | ||
58 | static inline struct sock * | ||
59 | __inet6_lookup_established(struct inet_hashinfo *hashinfo, | ||
60 | const struct in6_addr *saddr, | ||
61 | const u16 sport, | ||
62 | const struct in6_addr *daddr, | ||
63 | const u16 hnum, | ||
64 | const int dif) | ||
65 | { | ||
66 | struct sock *sk; | ||
67 | const struct hlist_node *node; | ||
68 | const __u32 ports = INET_COMBINED_PORTS(sport, hnum); | ||
69 | /* Optimize here for direct hit, only listening connections can | ||
70 | * have wildcards anyways. | ||
71 | */ | ||
72 | const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, | ||
73 | hashinfo->ehash_size); | ||
74 | struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; | ||
75 | |||
76 | read_lock(&head->lock); | ||
77 | sk_for_each(sk, node, &head->chain) { | ||
78 | /* For IPV6 do the cheaper port and family tests first. */ | ||
79 | if (INET6_MATCH(sk, saddr, daddr, ports, dif)) | ||
80 | goto hit; /* You sunk my battleship! */ | ||
81 | } | ||
82 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | ||
83 | sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { | ||
84 | const struct inet_timewait_sock *tw = inet_twsk(sk); | ||
85 | |||
86 | if(*((__u32 *)&(tw->tw_dport)) == ports && | ||
87 | sk->sk_family == PF_INET6) { | ||
88 | const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); | ||
89 | |||
90 | if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && | ||
91 | ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && | ||
92 | (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) | ||
93 | goto hit; | ||
94 | } | ||
95 | } | ||
96 | read_unlock(&head->lock); | ||
97 | return NULL; | ||
98 | |||
99 | hit: | ||
100 | sock_hold(sk); | ||
101 | read_unlock(&head->lock); | ||
102 | return sk; | ||
103 | } | ||
104 | |||
105 | extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, | ||
106 | const struct in6_addr *daddr, | ||
107 | const unsigned short hnum, | ||
108 | const int dif); | ||
109 | |||
110 | static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, | ||
111 | const struct in6_addr *saddr, | ||
112 | const u16 sport, | ||
113 | const struct in6_addr *daddr, | ||
114 | const u16 hnum, | ||
115 | const int dif) | ||
116 | { | ||
117 | struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, | ||
118 | daddr, hnum, dif); | ||
119 | if (sk) | ||
120 | return sk; | ||
121 | |||
122 | return inet6_lookup_listener(hashinfo, daddr, hnum, dif); | ||
123 | } | ||
124 | |||
125 | extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, | ||
126 | const struct in6_addr *saddr, const u16 sport, | ||
127 | const struct in6_addr *daddr, const u16 dport, | ||
128 | const int dif); | ||
129 | #endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ | ||
130 | #endif /* _INET6_HASHTABLES_H */ | ||
diff --git a/include/net/inet_common.h b/include/net/inet_common.h index fbc1f4d140d8..f943306ce5ff 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h | |||
@@ -8,6 +8,11 @@ extern struct proto_ops inet_dgram_ops; | |||
8 | * INET4 prototypes used by INET6 | 8 | * INET4 prototypes used by INET6 |
9 | */ | 9 | */ |
10 | 10 | ||
11 | struct msghdr; | ||
12 | struct sock; | ||
13 | struct sockaddr; | ||
14 | struct socket; | ||
15 | |||
11 | extern void inet_remove_sock(struct sock *sk1); | 16 | extern void inet_remove_sock(struct sock *sk1); |
12 | extern void inet_put_sock(unsigned short num, | 17 | extern void inet_put_sock(unsigned short num, |
13 | struct sock *sk); | 18 | struct sock *sk); |
@@ -29,7 +34,6 @@ extern unsigned int inet_poll(struct file * file, struct socket *sock, struct p | |||
29 | extern int inet_listen(struct socket *sock, int backlog); | 34 | extern int inet_listen(struct socket *sock, int backlog); |
30 | 35 | ||
31 | extern void inet_sock_destruct(struct sock *sk); | 36 | extern void inet_sock_destruct(struct sock *sk); |
32 | extern atomic_t inet_sock_nr; | ||
33 | 37 | ||
34 | extern int inet_bind(struct socket *sock, | 38 | extern int inet_bind(struct socket *sock, |
35 | struct sockaddr *uaddr, int addr_len); | 39 | struct sockaddr *uaddr, int addr_len); |
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h new file mode 100644 index 000000000000..651f824c1008 --- /dev/null +++ b/include/net/inet_connection_sock.h | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | * NET Generic infrastructure for INET connection oriented protocols. | ||
3 | * | ||
4 | * Definitions for inet_connection_sock | ||
5 | * | ||
6 | * Authors: Many people, see the TCP sources | ||
7 | * | ||
8 | * From code originally in TCP | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | #ifndef _INET_CONNECTION_SOCK_H | ||
16 | #define _INET_CONNECTION_SOCK_H | ||
17 | |||
18 | #include <linux/ip.h> | ||
19 | #include <linux/string.h> | ||
20 | #include <linux/timer.h> | ||
21 | #include <net/request_sock.h> | ||
22 | |||
23 | #define INET_CSK_DEBUG 1 | ||
24 | |||
25 | /* Cancel timers, when they are not required. */ | ||
26 | #undef INET_CSK_CLEAR_TIMERS | ||
27 | |||
28 | struct inet_bind_bucket; | ||
29 | struct inet_hashinfo; | ||
30 | struct tcp_congestion_ops; | ||
31 | |||
32 | /** inet_connection_sock - INET connection oriented sock | ||
33 | * | ||
34 | * @icsk_accept_queue: FIFO of established children | ||
35 | * @icsk_bind_hash: Bind node | ||
36 | * @icsk_timeout: Timeout | ||
37 | * @icsk_retransmit_timer: Resend (no ack) | ||
38 | * @icsk_rto: Retransmit timeout | ||
39 | * @icsk_ca_ops Pluggable congestion control hook | ||
40 | * @icsk_ca_state: Congestion control state | ||
41 | * @icsk_retransmits: Number of unrecovered [RTO] timeouts | ||
42 | * @icsk_pending: Scheduled timer event | ||
43 | * @icsk_backoff: Backoff | ||
44 | * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries | ||
45 | * @icsk_probes_out: unanswered 0 window probes | ||
46 | * @icsk_ack: Delayed ACK control data | ||
47 | */ | ||
48 | struct inet_connection_sock { | ||
49 | /* inet_sock has to be the first member! */ | ||
50 | struct inet_sock icsk_inet; | ||
51 | struct request_sock_queue icsk_accept_queue; | ||
52 | struct inet_bind_bucket *icsk_bind_hash; | ||
53 | unsigned long icsk_timeout; | ||
54 | struct timer_list icsk_retransmit_timer; | ||
55 | struct timer_list icsk_delack_timer; | ||
56 | __u32 icsk_rto; | ||
57 | struct tcp_congestion_ops *icsk_ca_ops; | ||
58 | __u8 icsk_ca_state; | ||
59 | __u8 icsk_retransmits; | ||
60 | __u8 icsk_pending; | ||
61 | __u8 icsk_backoff; | ||
62 | __u8 icsk_syn_retries; | ||
63 | __u8 icsk_probes_out; | ||
64 | /* 2 BYTES HOLE, TRY TO PACK! */ | ||
65 | struct { | ||
66 | __u8 pending; /* ACK is pending */ | ||
67 | __u8 quick; /* Scheduled number of quick acks */ | ||
68 | __u8 pingpong; /* The session is interactive */ | ||
69 | __u8 blocked; /* Delayed ACK was blocked by socket lock */ | ||
70 | __u32 ato; /* Predicted tick of soft clock */ | ||
71 | unsigned long timeout; /* Currently scheduled timeout */ | ||
72 | __u32 lrcvtime; /* timestamp of last received data packet */ | ||
73 | __u16 last_seg_size; /* Size of last incoming segment */ | ||
74 | __u16 rcv_mss; /* MSS used for delayed ACK decisions */ | ||
75 | } icsk_ack; | ||
76 | u32 icsk_ca_priv[16]; | ||
77 | #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) | ||
78 | }; | ||
79 | |||
80 | #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ | ||
81 | #define ICSK_TIME_DACK 2 /* Delayed ack timer */ | ||
82 | #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ | ||
83 | #define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ | ||
84 | |||
85 | static inline struct inet_connection_sock *inet_csk(const struct sock *sk) | ||
86 | { | ||
87 | return (struct inet_connection_sock *)sk; | ||
88 | } | ||
89 | |||
90 | static inline void *inet_csk_ca(const struct sock *sk) | ||
91 | { | ||
92 | return (void *)inet_csk(sk)->icsk_ca_priv; | ||
93 | } | ||
94 | |||
95 | extern struct sock *inet_csk_clone(struct sock *sk, | ||
96 | const struct request_sock *req, | ||
97 | const unsigned int __nocast priority); | ||
98 | |||
99 | enum inet_csk_ack_state_t { | ||
100 | ICSK_ACK_SCHED = 1, | ||
101 | ICSK_ACK_TIMER = 2, | ||
102 | ICSK_ACK_PUSHED = 4 | ||
103 | }; | ||
104 | |||
105 | extern void inet_csk_init_xmit_timers(struct sock *sk, | ||
106 | void (*retransmit_handler)(unsigned long), | ||
107 | void (*delack_handler)(unsigned long), | ||
108 | void (*keepalive_handler)(unsigned long)); | ||
109 | extern void inet_csk_clear_xmit_timers(struct sock *sk); | ||
110 | |||
111 | static inline void inet_csk_schedule_ack(struct sock *sk) | ||
112 | { | ||
113 | inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; | ||
114 | } | ||
115 | |||
116 | static inline int inet_csk_ack_scheduled(const struct sock *sk) | ||
117 | { | ||
118 | return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; | ||
119 | } | ||
120 | |||
121 | static inline void inet_csk_delack_init(struct sock *sk) | ||
122 | { | ||
123 | memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); | ||
124 | } | ||
125 | |||
126 | extern void inet_csk_delete_keepalive_timer(struct sock *sk); | ||
127 | extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); | ||
128 | |||
129 | #ifdef INET_CSK_DEBUG | ||
130 | extern const char inet_csk_timer_bug_msg[]; | ||
131 | #endif | ||
132 | |||
133 | static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) | ||
134 | { | ||
135 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
136 | |||
137 | if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { | ||
138 | icsk->icsk_pending = 0; | ||
139 | #ifdef INET_CSK_CLEAR_TIMERS | ||
140 | sk_stop_timer(sk, &icsk->icsk_retransmit_timer); | ||
141 | #endif | ||
142 | } else if (what == ICSK_TIME_DACK) { | ||
143 | icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; | ||
144 | #ifdef INET_CSK_CLEAR_TIMERS | ||
145 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | ||
146 | #endif | ||
147 | } | ||
148 | #ifdef INET_CSK_DEBUG | ||
149 | else { | ||
150 | pr_debug("%s", inet_csk_timer_bug_msg); | ||
151 | } | ||
152 | #endif | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * Reset the retransmission timer | ||
157 | */ | ||
158 | static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, | ||
159 | unsigned long when, | ||
160 | const unsigned long max_when) | ||
161 | { | ||
162 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
163 | |||
164 | if (when > max_when) { | ||
165 | #ifdef INET_CSK_DEBUG | ||
166 | pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", | ||
167 | sk, what, when, current_text_addr()); | ||
168 | #endif | ||
169 | when = max_when; | ||
170 | } | ||
171 | |||
172 | if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { | ||
173 | icsk->icsk_pending = what; | ||
174 | icsk->icsk_timeout = jiffies + when; | ||
175 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); | ||
176 | } else if (what == ICSK_TIME_DACK) { | ||
177 | icsk->icsk_ack.pending |= ICSK_ACK_TIMER; | ||
178 | icsk->icsk_ack.timeout = jiffies + when; | ||
179 | sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); | ||
180 | } | ||
181 | #ifdef INET_CSK_DEBUG | ||
182 | else { | ||
183 | pr_debug("%s", inet_csk_timer_bug_msg); | ||
184 | } | ||
185 | #endif | ||
186 | } | ||
187 | |||
188 | extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); | ||
189 | |||
190 | extern struct request_sock *inet_csk_search_req(const struct sock *sk, | ||
191 | struct request_sock ***prevp, | ||
192 | const __u16 rport, | ||
193 | const __u32 raddr, | ||
194 | const __u32 laddr); | ||
195 | extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, | ||
196 | struct sock *sk, unsigned short snum); | ||
197 | |||
198 | extern struct dst_entry* inet_csk_route_req(struct sock *sk, | ||
199 | const struct request_sock *req); | ||
200 | |||
201 | static inline void inet_csk_reqsk_queue_add(struct sock *sk, | ||
202 | struct request_sock *req, | ||
203 | struct sock *child) | ||
204 | { | ||
205 | reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); | ||
206 | } | ||
207 | |||
208 | extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, | ||
209 | struct request_sock *req, | ||
210 | const unsigned timeout); | ||
211 | |||
212 | static inline void inet_csk_reqsk_queue_removed(struct sock *sk, | ||
213 | struct request_sock *req) | ||
214 | { | ||
215 | if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) | ||
216 | inet_csk_delete_keepalive_timer(sk); | ||
217 | } | ||
218 | |||
219 | static inline void inet_csk_reqsk_queue_added(struct sock *sk, | ||
220 | const unsigned long timeout) | ||
221 | { | ||
222 | if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) | ||
223 | inet_csk_reset_keepalive_timer(sk, timeout); | ||
224 | } | ||
225 | |||
226 | static inline int inet_csk_reqsk_queue_len(const struct sock *sk) | ||
227 | { | ||
228 | return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); | ||
229 | } | ||
230 | |||
231 | static inline int inet_csk_reqsk_queue_young(const struct sock *sk) | ||
232 | { | ||
233 | return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); | ||
234 | } | ||
235 | |||
236 | static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) | ||
237 | { | ||
238 | return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); | ||
239 | } | ||
240 | |||
241 | static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, | ||
242 | struct request_sock *req, | ||
243 | struct request_sock **prev) | ||
244 | { | ||
245 | reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); | ||
246 | } | ||
247 | |||
248 | static inline void inet_csk_reqsk_queue_drop(struct sock *sk, | ||
249 | struct request_sock *req, | ||
250 | struct request_sock **prev) | ||
251 | { | ||
252 | inet_csk_reqsk_queue_unlink(sk, req, prev); | ||
253 | inet_csk_reqsk_queue_removed(sk, req); | ||
254 | reqsk_free(req); | ||
255 | } | ||
256 | |||
257 | extern void inet_csk_reqsk_queue_prune(struct sock *parent, | ||
258 | const unsigned long interval, | ||
259 | const unsigned long timeout, | ||
260 | const unsigned long max_rto); | ||
261 | |||
262 | extern void inet_csk_destroy_sock(struct sock *sk); | ||
263 | |||
264 | /* | ||
265 | * LISTEN is a special case for poll.. | ||
266 | */ | ||
267 | static inline unsigned int inet_csk_listen_poll(const struct sock *sk) | ||
268 | { | ||
269 | return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? | ||
270 | (POLLIN | POLLRDNORM) : 0; | ||
271 | } | ||
272 | |||
273 | extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); | ||
274 | extern void inet_csk_listen_stop(struct sock *sk); | ||
275 | |||
276 | #endif /* _INET_CONNECTION_SOCK_H */ | ||
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h new file mode 100644 index 000000000000..646b6ea7fe26 --- /dev/null +++ b/include/net/inet_hashtables.h | |||
@@ -0,0 +1,427 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Authors: Lotsa people, from code originally in tcp | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #ifndef _INET_HASHTABLES_H | ||
15 | #define _INET_HASHTABLES_H | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | |||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/ipv6.h> | ||
21 | #include <linux/list.h> | ||
22 | #include <linux/slab.h> | ||
23 | #include <linux/socket.h> | ||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/types.h> | ||
26 | #include <linux/wait.h> | ||
27 | |||
28 | #include <net/inet_connection_sock.h> | ||
29 | #include <net/route.h> | ||
30 | #include <net/sock.h> | ||
31 | #include <net/tcp_states.h> | ||
32 | |||
33 | #include <asm/atomic.h> | ||
34 | #include <asm/byteorder.h> | ||
35 | |||
36 | /* This is for all connections with a full identity, no wildcards. | ||
37 | * New scheme, half the table is for TIME_WAIT, the other half is | ||
38 | * for the rest. I'll experiment with dynamic table growth later. | ||
39 | */ | ||
40 | struct inet_ehash_bucket { | ||
41 | rwlock_t lock; | ||
42 | struct hlist_head chain; | ||
43 | } __attribute__((__aligned__(8))); | ||
44 | |||
45 | /* There are a few simple rules, which allow for local port reuse by | ||
46 | * an application. In essence: | ||
47 | * | ||
48 | * 1) Sockets bound to different interfaces may share a local port. | ||
49 | * Failing that, goto test 2. | ||
50 | * 2) If all sockets have sk->sk_reuse set, and none of them are in | ||
51 | * TCP_LISTEN state, the port may be shared. | ||
52 | * Failing that, goto test 3. | ||
53 | * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local | ||
54 | * address, and none of them are the same, the port may be | ||
55 | * shared. | ||
56 | * Failing this, the port cannot be shared. | ||
57 | * | ||
58 | * The interesting point, is test #2. This is what an FTP server does | ||
59 | * all day. To optimize this case we use a specific flag bit defined | ||
60 | * below. As we add sockets to a bind bucket list, we perform a | ||
61 | * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) | ||
62 | * As long as all sockets added to a bind bucket pass this test, | ||
63 | * the flag bit will be set. | ||
64 | * The resulting situation is that tcp_v[46]_verify_bind() can just check | ||
65 | * for this flag bit, if it is set and the socket trying to bind has | ||
66 | * sk->sk_reuse set, we don't even have to walk the owners list at all, | ||
67 | * we return that it is ok to bind this socket to the requested local port. | ||
68 | * | ||
69 | * Sounds like a lot of work, but it is worth it. In a more naive | ||
70 | * implementation (ie. current FreeBSD etc.) the entire list of ports | ||
71 | * must be walked for each data port opened by an ftp server. Needless | ||
72 | * to say, this does not scale at all. With a couple thousand FTP | ||
73 | * users logged onto your box, isn't it nice to know that new data | ||
74 | * ports are created in O(1) time? I thought so. ;-) -DaveM | ||
75 | */ | ||
76 | struct inet_bind_bucket { | ||
77 | unsigned short port; | ||
78 | signed short fastreuse; | ||
79 | struct hlist_node node; | ||
80 | struct hlist_head owners; | ||
81 | }; | ||
82 | |||
83 | #define inet_bind_bucket_for_each(tb, node, head) \ | ||
84 | hlist_for_each_entry(tb, node, head, node) | ||
85 | |||
86 | struct inet_bind_hashbucket { | ||
87 | spinlock_t lock; | ||
88 | struct hlist_head chain; | ||
89 | }; | ||
90 | |||
91 | /* This is for listening sockets, thus all sockets which possess wildcards. */ | ||
92 | #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ | ||
93 | |||
94 | struct inet_hashinfo { | ||
95 | /* This is for sockets with full identity only. Sockets here will | ||
96 | * always be without wildcards and will have the following invariant: | ||
97 | * | ||
98 | * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE | ||
99 | * | ||
100 | * First half of the table is for sockets not in TIME_WAIT, second half | ||
101 | * is for TIME_WAIT sockets only. | ||
102 | */ | ||
103 | struct inet_ehash_bucket *ehash; | ||
104 | |||
105 | /* Ok, let's try this, I give up, we do need a local binding | ||
106 | * TCP hash as well as the others for fast bind/connect. | ||
107 | */ | ||
108 | struct inet_bind_hashbucket *bhash; | ||
109 | |||
110 | int bhash_size; | ||
111 | int ehash_size; | ||
112 | |||
113 | /* All sockets in TCP_LISTEN state will be in here. This is the only | ||
114 | * table where wildcard'd TCP sockets can exist. Hash function here | ||
115 | * is just local port number. | ||
116 | */ | ||
117 | struct hlist_head listening_hash[INET_LHTABLE_SIZE]; | ||
118 | |||
119 | /* All the above members are written once at bootup and | ||
120 | * never written again _or_ are predominantly read-access. | ||
121 | * | ||
122 | * Now align to a new cache line as all the following members | ||
123 | * are often dirty. | ||
124 | */ | ||
125 | rwlock_t lhash_lock ____cacheline_aligned; | ||
126 | atomic_t lhash_users; | ||
127 | wait_queue_head_t lhash_wait; | ||
128 | spinlock_t portalloc_lock; | ||
129 | kmem_cache_t *bind_bucket_cachep; | ||
130 | int port_rover; | ||
131 | }; | ||
132 | |||
133 | static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, | ||
134 | const __u32 faddr, const __u16 fport, | ||
135 | const int ehash_size) | ||
136 | { | ||
137 | int h = (laddr ^ lport) ^ (faddr ^ fport); | ||
138 | h ^= h >> 16; | ||
139 | h ^= h >> 8; | ||
140 | return h & (ehash_size - 1); | ||
141 | } | ||
142 | |||
143 | static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) | ||
144 | { | ||
145 | const struct inet_sock *inet = inet_sk(sk); | ||
146 | const __u32 laddr = inet->rcv_saddr; | ||
147 | const __u16 lport = inet->num; | ||
148 | const __u32 faddr = inet->daddr; | ||
149 | const __u16 fport = inet->dport; | ||
150 | |||
151 | return inet_ehashfn(laddr, lport, faddr, fport, ehash_size); | ||
152 | } | ||
153 | |||
154 | extern struct inet_bind_bucket * | ||
155 | inet_bind_bucket_create(kmem_cache_t *cachep, | ||
156 | struct inet_bind_hashbucket *head, | ||
157 | const unsigned short snum); | ||
158 | extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, | ||
159 | struct inet_bind_bucket *tb); | ||
160 | |||
161 | static inline int inet_bhashfn(const __u16 lport, const int bhash_size) | ||
162 | { | ||
163 | return lport & (bhash_size - 1); | ||
164 | } | ||
165 | |||
166 | extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | ||
167 | const unsigned short snum); | ||
168 | |||
169 | /* These can have wildcards, don't try too hard. */ | ||
170 | static inline int inet_lhashfn(const unsigned short num) | ||
171 | { | ||
172 | return num & (INET_LHTABLE_SIZE - 1); | ||
173 | } | ||
174 | |||
175 | static inline int inet_sk_listen_hashfn(const struct sock *sk) | ||
176 | { | ||
177 | return inet_lhashfn(inet_sk(sk)->num); | ||
178 | } | ||
179 | |||
180 | /* Caller must disable local BH processing. */ | ||
181 | static inline void __inet_inherit_port(struct inet_hashinfo *table, | ||
182 | struct sock *sk, struct sock *child) | ||
183 | { | ||
184 | const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); | ||
185 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; | ||
186 | struct inet_bind_bucket *tb; | ||
187 | |||
188 | spin_lock(&head->lock); | ||
189 | tb = inet_csk(sk)->icsk_bind_hash; | ||
190 | sk_add_bind_node(child, &tb->owners); | ||
191 | inet_csk(child)->icsk_bind_hash = tb; | ||
192 | spin_unlock(&head->lock); | ||
193 | } | ||
194 | |||
195 | static inline void inet_inherit_port(struct inet_hashinfo *table, | ||
196 | struct sock *sk, struct sock *child) | ||
197 | { | ||
198 | local_bh_disable(); | ||
199 | __inet_inherit_port(table, sk, child); | ||
200 | local_bh_enable(); | ||
201 | } | ||
202 | |||
203 | extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); | ||
204 | |||
205 | extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); | ||
206 | |||
207 | /* | ||
208 | * - We may sleep inside this lock. | ||
209 | * - If sleeping is not required (or called from BH), | ||
210 | * use plain read_(un)lock(&inet_hashinfo.lhash_lock). | ||
211 | */ | ||
212 | static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) | ||
213 | { | ||
214 | /* read_lock synchronizes to candidates to writers */ | ||
215 | read_lock(&hashinfo->lhash_lock); | ||
216 | atomic_inc(&hashinfo->lhash_users); | ||
217 | read_unlock(&hashinfo->lhash_lock); | ||
218 | } | ||
219 | |||
220 | static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) | ||
221 | { | ||
222 | if (atomic_dec_and_test(&hashinfo->lhash_users)) | ||
223 | wake_up(&hashinfo->lhash_wait); | ||
224 | } | ||
225 | |||
226 | static inline void __inet_hash(struct inet_hashinfo *hashinfo, | ||
227 | struct sock *sk, const int listen_possible) | ||
228 | { | ||
229 | struct hlist_head *list; | ||
230 | rwlock_t *lock; | ||
231 | |||
232 | BUG_TRAP(sk_unhashed(sk)); | ||
233 | if (listen_possible && sk->sk_state == TCP_LISTEN) { | ||
234 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | ||
235 | lock = &hashinfo->lhash_lock; | ||
236 | inet_listen_wlock(hashinfo); | ||
237 | } else { | ||
238 | sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); | ||
239 | list = &hashinfo->ehash[sk->sk_hashent].chain; | ||
240 | lock = &hashinfo->ehash[sk->sk_hashent].lock; | ||
241 | write_lock(lock); | ||
242 | } | ||
243 | __sk_add_node(sk, list); | ||
244 | sock_prot_inc_use(sk->sk_prot); | ||
245 | write_unlock(lock); | ||
246 | if (listen_possible && sk->sk_state == TCP_LISTEN) | ||
247 | wake_up(&hashinfo->lhash_wait); | ||
248 | } | ||
249 | |||
250 | static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) | ||
251 | { | ||
252 | if (sk->sk_state != TCP_CLOSE) { | ||
253 | local_bh_disable(); | ||
254 | __inet_hash(hashinfo, sk, 1); | ||
255 | local_bh_enable(); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) | ||
260 | { | ||
261 | rwlock_t *lock; | ||
262 | |||
263 | if (sk_unhashed(sk)) | ||
264 | goto out; | ||
265 | |||
266 | if (sk->sk_state == TCP_LISTEN) { | ||
267 | local_bh_disable(); | ||
268 | inet_listen_wlock(hashinfo); | ||
269 | lock = &hashinfo->lhash_lock; | ||
270 | } else { | ||
271 | struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent]; | ||
272 | lock = &head->lock; | ||
273 | write_lock_bh(&head->lock); | ||
274 | } | ||
275 | |||
276 | if (__sk_del_node_init(sk)) | ||
277 | sock_prot_dec_use(sk->sk_prot); | ||
278 | write_unlock_bh(lock); | ||
279 | out: | ||
280 | if (sk->sk_state == TCP_LISTEN) | ||
281 | wake_up(&hashinfo->lhash_wait); | ||
282 | } | ||
283 | |||
284 | static inline int inet_iif(const struct sk_buff *skb) | ||
285 | { | ||
286 | return ((struct rtable *)skb->dst)->rt_iif; | ||
287 | } | ||
288 | |||
289 | extern struct sock *__inet_lookup_listener(const struct hlist_head *head, | ||
290 | const u32 daddr, | ||
291 | const unsigned short hnum, | ||
292 | const int dif); | ||
293 | |||
294 | /* Optimize the common listener case. */ | ||
295 | static inline struct sock * | ||
296 | inet_lookup_listener(struct inet_hashinfo *hashinfo, | ||
297 | const u32 daddr, | ||
298 | const unsigned short hnum, const int dif) | ||
299 | { | ||
300 | struct sock *sk = NULL; | ||
301 | const struct hlist_head *head; | ||
302 | |||
303 | read_lock(&hashinfo->lhash_lock); | ||
304 | head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; | ||
305 | if (!hlist_empty(head)) { | ||
306 | const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); | ||
307 | |||
308 | if (inet->num == hnum && !sk->sk_node.next && | ||
309 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | ||
310 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | ||
311 | !sk->sk_bound_dev_if) | ||
312 | goto sherry_cache; | ||
313 | sk = __inet_lookup_listener(head, daddr, hnum, dif); | ||
314 | } | ||
315 | if (sk) { | ||
316 | sherry_cache: | ||
317 | sock_hold(sk); | ||
318 | } | ||
319 | read_unlock(&hashinfo->lhash_lock); | ||
320 | return sk; | ||
321 | } | ||
322 | |||
323 | /* Socket demux engine toys. */ | ||
324 | #ifdef __BIG_ENDIAN | ||
325 | #define INET_COMBINED_PORTS(__sport, __dport) \ | ||
326 | (((__u32)(__sport) << 16) | (__u32)(__dport)) | ||
327 | #else /* __LITTLE_ENDIAN */ | ||
328 | #define INET_COMBINED_PORTS(__sport, __dport) \ | ||
329 | (((__u32)(__dport) << 16) | (__u32)(__sport)) | ||
330 | #endif | ||
331 | |||
332 | #if (BITS_PER_LONG == 64) | ||
333 | #ifdef __BIG_ENDIAN | ||
334 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
335 | const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); | ||
336 | #else /* __LITTLE_ENDIAN */ | ||
337 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
338 | const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); | ||
339 | #endif /* __BIG_ENDIAN */ | ||
340 | #define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
341 | (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ | ||
342 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | ||
343 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
344 | #define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
345 | (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ | ||
346 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | ||
347 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
348 | #else /* 32-bit arch */ | ||
349 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) | ||
350 | #define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ | ||
351 | ((inet_sk(__sk)->daddr == (__saddr)) && \ | ||
352 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | ||
353 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | ||
354 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
355 | #define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ | ||
356 | ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ | ||
357 | (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ | ||
358 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | ||
359 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
360 | #endif /* 64-bit arch */ | ||
361 | |||
362 | /* | ||
363 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need | ||
364 | * not check it for lookups anymore, thanks Alexey. -DaveM | ||
365 | * | ||
366 | * Local BH must be disabled here. | ||
367 | */ | ||
368 | static inline struct sock * | ||
369 | __inet_lookup_established(struct inet_hashinfo *hashinfo, | ||
370 | const u32 saddr, const u16 sport, | ||
371 | const u32 daddr, const u16 hnum, | ||
372 | const int dif) | ||
373 | { | ||
374 | INET_ADDR_COOKIE(acookie, saddr, daddr) | ||
375 | const __u32 ports = INET_COMBINED_PORTS(sport, hnum); | ||
376 | struct sock *sk; | ||
377 | const struct hlist_node *node; | ||
378 | /* Optimize here for direct hit, only listening connections can | ||
379 | * have wildcards anyways. | ||
380 | */ | ||
381 | const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size); | ||
382 | struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; | ||
383 | |||
384 | read_lock(&head->lock); | ||
385 | sk_for_each(sk, node, &head->chain) { | ||
386 | if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) | ||
387 | goto hit; /* You sunk my battleship! */ | ||
388 | } | ||
389 | |||
390 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | ||
391 | sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { | ||
392 | if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) | ||
393 | goto hit; | ||
394 | } | ||
395 | sk = NULL; | ||
396 | out: | ||
397 | read_unlock(&head->lock); | ||
398 | return sk; | ||
399 | hit: | ||
400 | sock_hold(sk); | ||
401 | goto out; | ||
402 | } | ||
403 | |||
404 | static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, | ||
405 | const u32 saddr, const u16 sport, | ||
406 | const u32 daddr, const u16 hnum, | ||
407 | const int dif) | ||
408 | { | ||
409 | struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, | ||
410 | hnum, dif); | ||
411 | return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); | ||
412 | } | ||
413 | |||
414 | static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, | ||
415 | const u32 saddr, const u16 sport, | ||
416 | const u32 daddr, const u16 dport, | ||
417 | const int dif) | ||
418 | { | ||
419 | struct sock *sk; | ||
420 | |||
421 | local_bh_disable(); | ||
422 | sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); | ||
423 | local_bh_enable(); | ||
424 | |||
425 | return sk; | ||
426 | } | ||
427 | #endif /* _INET_HASHTABLES_H */ | ||
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h new file mode 100644 index 000000000000..3b070352e869 --- /dev/null +++ b/include/net/inet_timewait_sock.h | |||
@@ -0,0 +1,219 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Definitions for a generic INET TIMEWAIT sock | ||
7 | * | ||
8 | * From code originally in net/tcp.h | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | */ | ||
15 | #ifndef _INET_TIMEWAIT_SOCK_ | ||
16 | #define _INET_TIMEWAIT_SOCK_ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | |||
20 | #include <linux/ip.h> | ||
21 | #include <linux/list.h> | ||
22 | #include <linux/timer.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <linux/workqueue.h> | ||
25 | |||
26 | #include <net/sock.h> | ||
27 | #include <net/tcp_states.h> | ||
28 | |||
29 | #include <asm/atomic.h> | ||
30 | |||
31 | struct inet_hashinfo; | ||
32 | |||
33 | #define INET_TWDR_RECYCLE_SLOTS_LOG 5 | ||
34 | #define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) | ||
35 | |||
36 | /* | ||
37 | * If time > 4sec, it is "slow" path, no recycling is required, | ||
38 | * so that we select tick to get range about 4 seconds. | ||
39 | */ | ||
40 | #if HZ <= 16 || HZ > 4096 | ||
41 | # error Unsupported: HZ <= 16 or HZ > 4096 | ||
42 | #elif HZ <= 32 | ||
43 | # define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
44 | #elif HZ <= 64 | ||
45 | # define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
46 | #elif HZ <= 128 | ||
47 | # define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
48 | #elif HZ <= 256 | ||
49 | # define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
50 | #elif HZ <= 512 | ||
51 | # define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
52 | #elif HZ <= 1024 | ||
53 | # define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
54 | #elif HZ <= 2048 | ||
55 | # define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
56 | #else | ||
57 | # define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | ||
58 | #endif | ||
59 | |||
60 | /* TIME_WAIT reaping mechanism. */ | ||
61 | #define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ | ||
62 | |||
63 | #define INET_TWDR_TWKILL_QUOTA 100 | ||
64 | |||
65 | struct inet_timewait_death_row { | ||
66 | /* Short-time timewait calendar */ | ||
67 | int twcal_hand; | ||
68 | int twcal_jiffie; | ||
69 | struct timer_list twcal_timer; | ||
70 | struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; | ||
71 | |||
72 | spinlock_t death_lock; | ||
73 | int tw_count; | ||
74 | int period; | ||
75 | u32 thread_slots; | ||
76 | struct work_struct twkill_work; | ||
77 | struct timer_list tw_timer; | ||
78 | int slot; | ||
79 | struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; | ||
80 | struct inet_hashinfo *hashinfo; | ||
81 | int sysctl_tw_recycle; | ||
82 | int sysctl_max_tw_buckets; | ||
83 | }; | ||
84 | |||
85 | extern void inet_twdr_hangman(unsigned long data); | ||
86 | extern void inet_twdr_twkill_work(void *data); | ||
87 | extern void inet_twdr_twcal_tick(unsigned long data); | ||
88 | |||
89 | #if (BITS_PER_LONG == 64) | ||
90 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 | ||
91 | #else | ||
92 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 | ||
93 | #endif | ||
94 | |||
95 | struct inet_bind_bucket; | ||
96 | |||
97 | /* | ||
98 | * This is a TIME_WAIT sock. It works around the memory consumption | ||
99 | * problems of sockets in such a state on heavily loaded servers, but | ||
100 | * without violating the protocol specification. | ||
101 | */ | ||
102 | struct inet_timewait_sock { | ||
103 | /* | ||
104 | * Now struct sock also uses sock_common, so please just | ||
105 | * don't add nothing before this first member (__tw_common) --acme | ||
106 | */ | ||
107 | struct sock_common __tw_common; | ||
108 | #define tw_family __tw_common.skc_family | ||
109 | #define tw_state __tw_common.skc_state | ||
110 | #define tw_reuse __tw_common.skc_reuse | ||
111 | #define tw_bound_dev_if __tw_common.skc_bound_dev_if | ||
112 | #define tw_node __tw_common.skc_node | ||
113 | #define tw_bind_node __tw_common.skc_bind_node | ||
114 | #define tw_refcnt __tw_common.skc_refcnt | ||
115 | #define tw_prot __tw_common.skc_prot | ||
116 | volatile unsigned char tw_substate; | ||
117 | /* 3 bits hole, try to pack */ | ||
118 | unsigned char tw_rcv_wscale; | ||
119 | /* Socket demultiplex comparisons on incoming packets. */ | ||
120 | /* these five are in inet_sock */ | ||
121 | __u16 tw_sport; | ||
122 | __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); | ||
123 | __u32 tw_rcv_saddr; | ||
124 | __u16 tw_dport; | ||
125 | __u16 tw_num; | ||
126 | /* And these are ours. */ | ||
127 | __u8 tw_ipv6only:1; | ||
128 | /* 31 bits hole, try to pack */ | ||
129 | int tw_hashent; | ||
130 | int tw_timeout; | ||
131 | unsigned long tw_ttd; | ||
132 | struct inet_bind_bucket *tw_tb; | ||
133 | struct hlist_node tw_death_node; | ||
134 | }; | ||
135 | |||
136 | static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, | ||
137 | struct hlist_head *list) | ||
138 | { | ||
139 | hlist_add_head(&tw->tw_node, list); | ||
140 | } | ||
141 | |||
142 | static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, | ||
143 | struct hlist_head *list) | ||
144 | { | ||
145 | hlist_add_head(&tw->tw_bind_node, list); | ||
146 | } | ||
147 | |||
148 | static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) | ||
149 | { | ||
150 | return tw->tw_death_node.pprev != NULL; | ||
151 | } | ||
152 | |||
153 | static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) | ||
154 | { | ||
155 | tw->tw_death_node.pprev = NULL; | ||
156 | } | ||
157 | |||
158 | static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) | ||
159 | { | ||
160 | __hlist_del(&tw->tw_death_node); | ||
161 | inet_twsk_dead_node_init(tw); | ||
162 | } | ||
163 | |||
164 | static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) | ||
165 | { | ||
166 | if (inet_twsk_dead_hashed(tw)) { | ||
167 | __inet_twsk_del_dead_node(tw); | ||
168 | return 1; | ||
169 | } | ||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | #define inet_twsk_for_each(tw, node, head) \ | ||
174 | hlist_for_each_entry(tw, node, head, tw_node) | ||
175 | |||
176 | #define inet_twsk_for_each_inmate(tw, node, jail) \ | ||
177 | hlist_for_each_entry(tw, node, jail, tw_death_node) | ||
178 | |||
179 | #define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ | ||
180 | hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) | ||
181 | |||
182 | static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) | ||
183 | { | ||
184 | return (struct inet_timewait_sock *)sk; | ||
185 | } | ||
186 | |||
187 | static inline u32 inet_rcv_saddr(const struct sock *sk) | ||
188 | { | ||
189 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
190 | inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; | ||
191 | } | ||
192 | |||
193 | static inline void inet_twsk_put(struct inet_timewait_sock *tw) | ||
194 | { | ||
195 | if (atomic_dec_and_test(&tw->tw_refcnt)) { | ||
196 | #ifdef SOCK_REFCNT_DEBUG | ||
197 | printk(KERN_DEBUG "%s timewait_sock %p released\n", | ||
198 | tw->tw_prot->name, tw); | ||
199 | #endif | ||
200 | kmem_cache_free(tw->tw_prot->twsk_slab, tw); | ||
201 | } | ||
202 | } | ||
203 | |||
204 | extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, | ||
205 | const int state); | ||
206 | |||
207 | extern void __inet_twsk_kill(struct inet_timewait_sock *tw, | ||
208 | struct inet_hashinfo *hashinfo); | ||
209 | |||
210 | extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, | ||
211 | struct sock *sk, | ||
212 | struct inet_hashinfo *hashinfo); | ||
213 | |||
214 | extern void inet_twsk_schedule(struct inet_timewait_sock *tw, | ||
215 | struct inet_timewait_death_row *twdr, | ||
216 | const int timeo, const int timewait_len); | ||
217 | extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, | ||
218 | struct inet_timewait_death_row *twdr); | ||
219 | #endif /* _INET_TIMEWAIT_SOCK_ */ | ||
diff --git a/include/net/ip.h b/include/net/ip.h index 32360bbe143f..e4563bbee6ea 100644 --- a/include/net/ip.h +++ b/include/net/ip.h | |||
@@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
86 | u32 saddr, u32 daddr, | 86 | u32 saddr, u32 daddr, |
87 | struct ip_options *opt); | 87 | struct ip_options *opt); |
88 | extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, | 88 | extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, |
89 | struct packet_type *pt); | 89 | struct packet_type *pt, struct net_device *orig_dev); |
90 | extern int ip_local_deliver(struct sk_buff *skb); | 90 | extern int ip_local_deliver(struct sk_buff *skb); |
91 | extern int ip_mr_input(struct sk_buff *skb); | 91 | extern int ip_mr_input(struct sk_buff *skb); |
92 | extern int ip_output(struct sk_buff *skb); | 92 | extern int ip_output(struct sk_buff *skb); |
@@ -140,8 +140,6 @@ struct ip_reply_arg { | |||
140 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, | 140 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, |
141 | unsigned int len); | 141 | unsigned int len); |
142 | 142 | ||
143 | extern int ip_finish_output(struct sk_buff *skb); | ||
144 | |||
145 | struct ipv4_config | 143 | struct ipv4_config |
146 | { | 144 | { |
147 | int log_martians; | 145 | int log_martians; |
@@ -165,6 +163,24 @@ extern int sysctl_local_port_range[2]; | |||
165 | extern int sysctl_ip_default_ttl; | 163 | extern int sysctl_ip_default_ttl; |
166 | extern int sysctl_ip_nonlocal_bind; | 164 | extern int sysctl_ip_nonlocal_bind; |
167 | 165 | ||
166 | /* From ip_fragment.c */ | ||
167 | extern int sysctl_ipfrag_high_thresh; | ||
168 | extern int sysctl_ipfrag_low_thresh; | ||
169 | extern int sysctl_ipfrag_time; | ||
170 | extern int sysctl_ipfrag_secret_interval; | ||
171 | |||
172 | /* From inetpeer.c */ | ||
173 | extern int inet_peer_threshold; | ||
174 | extern int inet_peer_minttl; | ||
175 | extern int inet_peer_maxttl; | ||
176 | extern int inet_peer_gc_mintime; | ||
177 | extern int inet_peer_gc_maxtime; | ||
178 | |||
179 | /* From ip_output.c */ | ||
180 | extern int sysctl_ip_dynaddr; | ||
181 | |||
182 | extern void ipfrag_init(void); | ||
183 | |||
168 | #ifdef CONFIG_INET | 184 | #ifdef CONFIG_INET |
169 | /* The function in 2.2 was invalid, producing wrong result for | 185 | /* The function in 2.2 was invalid, producing wrong result for |
170 | * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ | 186 | * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ |
@@ -319,7 +335,10 @@ extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 da | |||
319 | extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); | 335 | extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); |
320 | extern void ip_options_fragment(struct sk_buff *skb); | 336 | extern void ip_options_fragment(struct sk_buff *skb); |
321 | extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); | 337 | extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); |
322 | extern int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user); | 338 | extern int ip_options_get(struct ip_options **optp, |
339 | unsigned char *data, int optlen); | ||
340 | extern int ip_options_get_from_user(struct ip_options **optp, | ||
341 | unsigned char __user *data, int optlen); | ||
323 | extern void ip_options_undo(struct ip_options * opt); | 342 | extern void ip_options_undo(struct ip_options * opt); |
324 | extern void ip_forward_options(struct sk_buff *skb); | 343 | extern void ip_forward_options(struct sk_buff *skb); |
325 | extern int ip_options_rcv_srr(struct sk_buff *skb); | 344 | extern int ip_options_rcv_srr(struct sk_buff *skb); |
@@ -350,5 +369,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, | |||
350 | void __user *oldval, size_t __user *oldlenp, | 369 | void __user *oldval, size_t __user *oldlenp, |
351 | void __user *newval, size_t newlen, | 370 | void __user *newval, size_t newlen, |
352 | void **context); | 371 | void **context); |
372 | #ifdef CONFIG_PROC_FS | ||
373 | extern int ip_misc_proc_init(void); | ||
374 | #endif | ||
375 | |||
376 | extern struct ctl_table ipv4_table[]; | ||
353 | 377 | ||
354 | #endif /* _IP_H */ | 378 | #endif /* _IP_H */ |
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f920706d526b..1f2e428ca364 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <net/flow.h> | 12 | #include <net/flow.h> |
13 | #include <net/ip6_fib.h> | 13 | #include <net/ip6_fib.h> |
14 | #include <net/sock.h> | 14 | #include <net/sock.h> |
15 | #include <linux/tcp.h> | ||
16 | #include <linux/ip.h> | 15 | #include <linux/ip.h> |
17 | #include <linux/ipv6.h> | 16 | #include <linux/ipv6.h> |
18 | 17 | ||
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a4208a336ac0..14de4ebd1211 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h | |||
@@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res) | |||
295 | #endif | 295 | #endif |
296 | } | 296 | } |
297 | 297 | ||
298 | #ifdef CONFIG_PROC_FS | ||
299 | extern int fib_proc_init(void); | ||
300 | extern void fib_proc_exit(void); | ||
301 | #endif | ||
302 | |||
298 | #endif /* _NET_FIB_H */ | 303 | #endif /* _NET_FIB_H */ |
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52da5d26617a..7a3c43711a17 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -255,7 +255,6 @@ struct ip_vs_daemon_user { | |||
255 | #include <asm/atomic.h> /* for struct atomic_t */ | 255 | #include <asm/atomic.h> /* for struct atomic_t */ |
256 | #include <linux/netdevice.h> /* for struct neighbour */ | 256 | #include <linux/netdevice.h> /* for struct neighbour */ |
257 | #include <net/dst.h> /* for struct dst_entry */ | 257 | #include <net/dst.h> /* for struct dst_entry */ |
258 | #include <net/tcp.h> | ||
259 | #include <net/udp.h> | 258 | #include <net/udp.h> |
260 | #include <linux/compiler.h> | 259 | #include <linux/compiler.h> |
261 | 260 | ||
diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 69324465e8b3..3203eaff4bd4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h | |||
@@ -104,6 +104,7 @@ struct frag_hdr { | |||
104 | 104 | ||
105 | #ifdef __KERNEL__ | 105 | #ifdef __KERNEL__ |
106 | 106 | ||
107 | #include <linux/config.h> | ||
107 | #include <net/sock.h> | 108 | #include <net/sock.h> |
108 | 109 | ||
109 | /* sysctls */ | 110 | /* sysctls */ |
@@ -145,7 +146,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); | |||
145 | #define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) | 146 | #define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) |
146 | #define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) | 147 | #define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) |
147 | #define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) | 148 | #define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) |
148 | extern atomic_t inet6_sock_nr; | ||
149 | 149 | ||
150 | int snmp6_register_dev(struct inet6_dev *idev); | 150 | int snmp6_register_dev(struct inet6_dev *idev); |
151 | int snmp6_unregister_dev(struct inet6_dev *idev); | 151 | int snmp6_unregister_dev(struct inet6_dev *idev); |
@@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a) | |||
346 | 346 | ||
347 | extern int ipv6_rcv(struct sk_buff *skb, | 347 | extern int ipv6_rcv(struct sk_buff *skb, |
348 | struct net_device *dev, | 348 | struct net_device *dev, |
349 | struct packet_type *pt); | 349 | struct packet_type *pt, |
350 | struct net_device *orig_dev); | ||
350 | 351 | ||
351 | /* | 352 | /* |
352 | * upper-layer output functions | 353 | * upper-layer output functions |
@@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh; | |||
464 | extern int sysctl_ip6frag_time; | 465 | extern int sysctl_ip6frag_time; |
465 | extern int sysctl_ip6frag_secret_interval; | 466 | extern int sysctl_ip6frag_secret_interval; |
466 | 467 | ||
467 | #endif /* __KERNEL__ */ | 468 | extern struct proto_ops inet6_stream_ops; |
468 | #endif /* _NET_IPV6_H */ | 469 | extern struct proto_ops inet6_dgram_ops; |
470 | |||
471 | extern int ip6_mc_source(int add, int omode, struct sock *sk, | ||
472 | struct group_source_req *pgsr); | ||
473 | extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); | ||
474 | extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, | ||
475 | struct group_filter __user *optval, | ||
476 | int __user *optlen); | ||
477 | |||
478 | #ifdef CONFIG_PROC_FS | ||
479 | extern int ac6_proc_init(void); | ||
480 | extern void ac6_proc_exit(void); | ||
481 | extern int raw6_proc_init(void); | ||
482 | extern void raw6_proc_exit(void); | ||
483 | extern int tcp6_proc_init(void); | ||
484 | extern void tcp6_proc_exit(void); | ||
485 | extern int udp6_proc_init(void); | ||
486 | extern void udp6_proc_exit(void); | ||
487 | extern int ipv6_misc_proc_init(void); | ||
488 | extern void ipv6_misc_proc_exit(void); | ||
489 | |||
490 | extern struct rt6_statistics rt6_stats; | ||
491 | #endif | ||
469 | 492 | ||
493 | #ifdef CONFIG_SYSCTL | ||
494 | extern ctl_table ipv6_route_table[]; | ||
495 | extern ctl_table ipv6_icmp_table[]; | ||
470 | 496 | ||
497 | extern void ipv6_sysctl_register(void); | ||
498 | extern void ipv6_sysctl_unregister(void); | ||
499 | #endif | ||
471 | 500 | ||
501 | #endif /* __KERNEL__ */ | ||
502 | #endif /* _NET_IPV6_H */ | ||
diff --git a/include/net/llc.h b/include/net/llc.h index c9aed2a8b4e2..71769a5aeef3 100644 --- a/include/net/llc.h +++ b/include/net/llc.h | |||
@@ -46,7 +46,8 @@ struct llc_sap { | |||
46 | unsigned char f_bit; | 46 | unsigned char f_bit; |
47 | int (*rcv_func)(struct sk_buff *skb, | 47 | int (*rcv_func)(struct sk_buff *skb, |
48 | struct net_device *dev, | 48 | struct net_device *dev, |
49 | struct packet_type *pt); | 49 | struct packet_type *pt, |
50 | struct net_device *orig_dev); | ||
50 | struct llc_addr laddr; | 51 | struct llc_addr laddr; |
51 | struct list_head node; | 52 | struct list_head node; |
52 | struct { | 53 | struct { |
@@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock; | |||
64 | extern unsigned char llc_station_mac_sa[ETH_ALEN]; | 65 | extern unsigned char llc_station_mac_sa[ETH_ALEN]; |
65 | 66 | ||
66 | extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, | 67 | extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, |
67 | struct packet_type *pt); | 68 | struct packet_type *pt, struct net_device *orig_dev); |
68 | 69 | ||
69 | extern int llc_mac_hdr_init(struct sk_buff *skb, | 70 | extern int llc_mac_hdr_init(struct sk_buff *skb, |
70 | unsigned char *sa, unsigned char *da); | 71 | unsigned char *sa, unsigned char *da); |
@@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb)); | |||
78 | extern struct llc_sap *llc_sap_open(unsigned char lsap, | 79 | extern struct llc_sap *llc_sap_open(unsigned char lsap, |
79 | int (*rcv)(struct sk_buff *skb, | 80 | int (*rcv)(struct sk_buff *skb, |
80 | struct net_device *dev, | 81 | struct net_device *dev, |
81 | struct packet_type *pt)); | 82 | struct packet_type *pt, |
83 | struct net_device *orig_dev)); | ||
82 | extern void llc_sap_close(struct llc_sap *sap); | 84 | extern void llc_sap_close(struct llc_sap *sap); |
83 | 85 | ||
84 | extern struct llc_sap *llc_sap_find(unsigned char sap_value); | 86 | extern struct llc_sap *llc_sap_find(unsigned char sap_value); |
diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 89809891e5ab..34c07731933d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h | |||
@@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, | |||
363 | return neigh_create(tbl, pkey, dev); | 363 | return neigh_create(tbl, pkey, dev); |
364 | } | 364 | } |
365 | 365 | ||
366 | #define LOCALLY_ENQUEUED -2 | 366 | struct neighbour_cb { |
367 | unsigned long sched_next; | ||
368 | unsigned int flags; | ||
369 | }; | ||
370 | |||
371 | #define LOCALLY_ENQUEUED 0x1 | ||
372 | |||
373 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) | ||
367 | 374 | ||
368 | #endif | 375 | #endif |
369 | #endif | 376 | #endif |
diff --git a/include/net/p8022.h b/include/net/p8022.h index 3c99a86c3581..42e9fac51b31 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h | |||
@@ -4,7 +4,10 @@ extern struct datalink_proto * | |||
4 | register_8022_client(unsigned char type, | 4 | register_8022_client(unsigned char type, |
5 | int (*func)(struct sk_buff *skb, | 5 | int (*func)(struct sk_buff *skb, |
6 | struct net_device *dev, | 6 | struct net_device *dev, |
7 | struct packet_type *pt)); | 7 | struct packet_type *pt, |
8 | struct net_device *orig_dev)); | ||
8 | extern void unregister_8022_client(struct datalink_proto *proto); | 9 | extern void unregister_8022_client(struct datalink_proto *proto); |
9 | 10 | ||
11 | extern struct datalink_proto *make_8023_client(void); | ||
12 | extern void destroy_8023_client(struct datalink_proto *dl); | ||
10 | #endif | 13 | #endif |
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4abda6aec05a..b902d24a3256 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h | |||
@@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv) | |||
352 | static inline int | 352 | static inline int |
353 | tcf_match_indev(struct sk_buff *skb, char *indev) | 353 | tcf_match_indev(struct sk_buff *skb, char *indev) |
354 | { | 354 | { |
355 | if (0 != indev[0]) { | 355 | if (indev[0]) { |
356 | if (NULL == skb->input_dev) | 356 | if (!skb->input_dev) |
357 | return 0; | 357 | return 0; |
358 | else if (0 != strcmp(indev, skb->input_dev->name)) | 358 | if (strcmp(indev, skb->input_dev->name)) |
359 | return 0; | 359 | return 0; |
360 | } | 360 | } |
361 | 361 | ||
diff --git a/include/net/psnap.h b/include/net/psnap.h index 9c94e8f98b36..b2e01cc3fc8a 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _NET_PSNAP_H | 1 | #ifndef _NET_PSNAP_H |
2 | #define _NET_PSNAP_H | 2 | #define _NET_PSNAP_H |
3 | 3 | ||
4 | extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); | 4 | extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev)); |
5 | extern void unregister_snap_client(struct datalink_proto *proto); | 5 | extern void unregister_snap_client(struct datalink_proto *proto); |
6 | 6 | ||
7 | #endif | 7 | #endif |
diff --git a/include/net/raw.h b/include/net/raw.h index 1c411c45587a..f47917469b12 100644 --- a/include/net/raw.h +++ b/include/net/raw.h | |||
@@ -17,10 +17,10 @@ | |||
17 | #ifndef _RAW_H | 17 | #ifndef _RAW_H |
18 | #define _RAW_H | 18 | #define _RAW_H |
19 | 19 | ||
20 | #include <linux/config.h> | ||
20 | 21 | ||
21 | extern struct proto raw_prot; | 22 | extern struct proto raw_prot; |
22 | 23 | ||
23 | |||
24 | extern void raw_err(struct sock *, struct sk_buff *, u32 info); | 24 | extern void raw_err(struct sock *, struct sk_buff *, u32 info); |
25 | extern int raw_rcv(struct sock *, struct sk_buff *); | 25 | extern int raw_rcv(struct sock *, struct sk_buff *); |
26 | 26 | ||
@@ -37,6 +37,11 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, | |||
37 | unsigned long raddr, unsigned long laddr, | 37 | unsigned long raddr, unsigned long laddr, |
38 | int dif); | 38 | int dif); |
39 | 39 | ||
40 | extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); | 40 | extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); |
41 | |||
42 | #ifdef CONFIG_PROC_FS | ||
43 | extern int raw_proc_init(void); | ||
44 | extern void raw_proc_exit(void); | ||
45 | #endif | ||
41 | 46 | ||
42 | #endif /* _RAW_H */ | 47 | #endif /* _RAW_H */ |
diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 23fd9a6a221a..14476a71725e 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h | |||
@@ -7,10 +7,11 @@ | |||
7 | extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; | 7 | extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; |
8 | extern rwlock_t raw_v6_lock; | 8 | extern rwlock_t raw_v6_lock; |
9 | 9 | ||
10 | extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); | 10 | extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); |
11 | 11 | ||
12 | extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, | 12 | extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, |
13 | struct in6_addr *loc_addr, struct in6_addr *rmt_addr); | 13 | struct in6_addr *loc_addr, struct in6_addr *rmt_addr, |
14 | int dif); | ||
14 | 15 | ||
15 | extern int rawv6_rcv(struct sock *sk, | 16 | extern int rawv6_rcv(struct sock *sk, |
16 | struct sk_buff *skb); | 17 | struct sk_buff *skb); |
diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 72fd6f5e86b1..b52cc52ffe39 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h | |||
@@ -89,6 +89,7 @@ struct listen_sock { | |||
89 | int qlen_young; | 89 | int qlen_young; |
90 | int clock_hand; | 90 | int clock_hand; |
91 | u32 hash_rnd; | 91 | u32 hash_rnd; |
92 | u32 nr_table_entries; | ||
92 | struct request_sock *syn_table[0]; | 93 | struct request_sock *syn_table[0]; |
93 | }; | 94 | }; |
94 | 95 | ||
@@ -96,6 +97,7 @@ struct listen_sock { | |||
96 | * | 97 | * |
97 | * @rskq_accept_head - FIFO head of established children | 98 | * @rskq_accept_head - FIFO head of established children |
98 | * @rskq_accept_tail - FIFO tail of established children | 99 | * @rskq_accept_tail - FIFO tail of established children |
100 | * @rskq_defer_accept - User waits for some data after accept() | ||
99 | * @syn_wait_lock - serializer | 101 | * @syn_wait_lock - serializer |
100 | * | 102 | * |
101 | * %syn_wait_lock is necessary only to avoid proc interface having to grab the main | 103 | * %syn_wait_lock is necessary only to avoid proc interface having to grab the main |
@@ -111,6 +113,8 @@ struct request_sock_queue { | |||
111 | struct request_sock *rskq_accept_head; | 113 | struct request_sock *rskq_accept_head; |
112 | struct request_sock *rskq_accept_tail; | 114 | struct request_sock *rskq_accept_tail; |
113 | rwlock_t syn_wait_lock; | 115 | rwlock_t syn_wait_lock; |
116 | u8 rskq_defer_accept; | ||
117 | /* 3 bytes hole, try to pack */ | ||
114 | struct listen_sock *listen_opt; | 118 | struct listen_sock *listen_opt; |
115 | }; | 119 | }; |
116 | 120 | ||
@@ -129,11 +133,13 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock | |||
129 | return lopt; | 133 | return lopt; |
130 | } | 134 | } |
131 | 135 | ||
132 | static inline void reqsk_queue_destroy(struct request_sock_queue *queue) | 136 | static inline void __reqsk_queue_destroy(struct request_sock_queue *queue) |
133 | { | 137 | { |
134 | kfree(reqsk_queue_yank_listen_sk(queue)); | 138 | kfree(reqsk_queue_yank_listen_sk(queue)); |
135 | } | 139 | } |
136 | 140 | ||
141 | extern void reqsk_queue_destroy(struct request_sock_queue *queue); | ||
142 | |||
137 | static inline struct request_sock * | 143 | static inline struct request_sock * |
138 | reqsk_queue_yank_acceptq(struct request_sock_queue *queue) | 144 | reqsk_queue_yank_acceptq(struct request_sock_queue *queue) |
139 | { | 145 | { |
@@ -221,17 +227,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue) | |||
221 | return prev_qlen; | 227 | return prev_qlen; |
222 | } | 228 | } |
223 | 229 | ||
224 | static inline int reqsk_queue_len(struct request_sock_queue *queue) | 230 | static inline int reqsk_queue_len(const struct request_sock_queue *queue) |
225 | { | 231 | { |
226 | return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; | 232 | return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; |
227 | } | 233 | } |
228 | 234 | ||
229 | static inline int reqsk_queue_len_young(struct request_sock_queue *queue) | 235 | static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) |
230 | { | 236 | { |
231 | return queue->listen_opt->qlen_young; | 237 | return queue->listen_opt->qlen_young; |
232 | } | 238 | } |
233 | 239 | ||
234 | static inline int reqsk_queue_is_full(struct request_sock_queue *queue) | 240 | static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) |
235 | { | 241 | { |
236 | return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; | 242 | return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; |
237 | } | 243 | } |
diff --git a/include/net/route.h b/include/net/route.h index c3cd069a9aca..dbe79ca67d31 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -105,10 +105,6 @@ struct rt_cache_stat | |||
105 | unsigned int out_hlist_search; | 105 | unsigned int out_hlist_search; |
106 | }; | 106 | }; |
107 | 107 | ||
108 | extern struct rt_cache_stat *rt_cache_stat; | ||
109 | #define RT_CACHE_STAT_INC(field) \ | ||
110 | (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) | ||
111 | |||
112 | extern struct ip_rt_acct *ip_rt_acct; | 108 | extern struct ip_rt_acct *ip_rt_acct; |
113 | 109 | ||
114 | struct in_device; | 110 | struct in_device; |
@@ -199,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) | |||
199 | return rt->peer; | 195 | return rt->peer; |
200 | } | 196 | } |
201 | 197 | ||
198 | extern ctl_table ipv4_route_table[]; | ||
199 | |||
202 | #endif /* _ROUTE_H */ | 200 | #endif /* _ROUTE_H */ |
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5999e5684bbf..c51541ee0247 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h | |||
@@ -47,10 +47,10 @@ | |||
47 | #ifndef __sctp_constants_h__ | 47 | #ifndef __sctp_constants_h__ |
48 | #define __sctp_constants_h__ | 48 | #define __sctp_constants_h__ |
49 | 49 | ||
50 | #include <linux/tcp.h> /* For TCP states used in sctp_sock_state_t */ | ||
51 | #include <linux/sctp.h> | 50 | #include <linux/sctp.h> |
52 | #include <linux/ipv6.h> /* For ipv6hdr. */ | 51 | #include <linux/ipv6.h> /* For ipv6hdr. */ |
53 | #include <net/sctp/user.h> | 52 | #include <net/sctp/user.h> |
53 | #include <net/tcp_states.h> /* For TCP states used in sctp_sock_state_t */ | ||
54 | 54 | ||
55 | /* Value used for stream negotiation. */ | 55 | /* Value used for stream negotiation. */ |
56 | enum { SCTP_MAX_STREAM = 0xffff }; | 56 | enum { SCTP_MAX_STREAM = 0xffff }; |
diff --git a/include/net/sock.h b/include/net/sock.h index a1042d08becd..312cb25cbd18 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \ | |||
88 | } while(0) | 88 | } while(0) |
89 | 89 | ||
90 | struct sock; | 90 | struct sock; |
91 | struct proto; | ||
91 | 92 | ||
92 | /** | 93 | /** |
93 | * struct sock_common - minimal network layer representation of sockets | 94 | * struct sock_common - minimal network layer representation of sockets |
@@ -98,10 +99,11 @@ struct sock; | |||
98 | * @skc_node: main hash linkage for various protocol lookup tables | 99 | * @skc_node: main hash linkage for various protocol lookup tables |
99 | * @skc_bind_node: bind hash linkage for various protocol lookup tables | 100 | * @skc_bind_node: bind hash linkage for various protocol lookup tables |
100 | * @skc_refcnt: reference count | 101 | * @skc_refcnt: reference count |
102 | * @skc_prot: protocol handlers inside a network family | ||
101 | * | 103 | * |
102 | * This is the minimal network layer representation of sockets, the header | 104 | * This is the minimal network layer representation of sockets, the header |
103 | * for struct sock and struct tcp_tw_bucket. | 105 | * for struct sock and struct inet_timewait_sock. |
104 | */ | 106 | */ |
105 | struct sock_common { | 107 | struct sock_common { |
106 | unsigned short skc_family; | 108 | unsigned short skc_family; |
107 | volatile unsigned char skc_state; | 109 | volatile unsigned char skc_state; |
@@ -110,11 +112,12 @@ struct sock_common { | |||
110 | struct hlist_node skc_node; | 112 | struct hlist_node skc_node; |
111 | struct hlist_node skc_bind_node; | 113 | struct hlist_node skc_bind_node; |
112 | atomic_t skc_refcnt; | 114 | atomic_t skc_refcnt; |
115 | struct proto *skc_prot; | ||
113 | }; | 116 | }; |
114 | 117 | ||
115 | /** | 118 | /** |
116 | * struct sock - network layer representation of sockets | 119 | * struct sock - network layer representation of sockets |
117 | * @__sk_common: shared layout with tcp_tw_bucket | 120 | * @__sk_common: shared layout with inet_timewait_sock |
118 | * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN | 121 | * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN |
119 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 122 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
120 | * @sk_lock: synchronizer | 123 | * @sk_lock: synchronizer |
@@ -136,11 +139,10 @@ struct sock_common { | |||
136 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets | 139 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets |
137 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) | 140 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) |
138 | * @sk_lingertime: %SO_LINGER l_linger setting | 141 | * @sk_lingertime: %SO_LINGER l_linger setting |
139 | * @sk_hashent: hash entry in several tables (e.g. tcp_ehash) | 142 | * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) |
140 | * @sk_backlog: always used with the per-socket spinlock held | 143 | * @sk_backlog: always used with the per-socket spinlock held |
141 | * @sk_callback_lock: used with the callbacks in the end of this struct | 144 | * @sk_callback_lock: used with the callbacks in the end of this struct |
142 | * @sk_error_queue: rarely used | 145 | * @sk_error_queue: rarely used |
143 | * @sk_prot: protocol handlers inside a network family | ||
144 | * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) | 146 | * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) |
145 | * @sk_err: last error | 147 | * @sk_err: last error |
146 | * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' | 148 | * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' |
@@ -173,7 +175,7 @@ struct sock_common { | |||
173 | */ | 175 | */ |
174 | struct sock { | 176 | struct sock { |
175 | /* | 177 | /* |
176 | * Now struct tcp_tw_bucket also uses sock_common, so please just | 178 | * Now struct inet_timewait_sock also uses sock_common, so please just |
177 | * don't add nothing before this first member (__sk_common) --acme | 179 | * don't add nothing before this first member (__sk_common) --acme |
178 | */ | 180 | */ |
179 | struct sock_common __sk_common; | 181 | struct sock_common __sk_common; |
@@ -184,6 +186,7 @@ struct sock { | |||
184 | #define sk_node __sk_common.skc_node | 186 | #define sk_node __sk_common.skc_node |
185 | #define sk_bind_node __sk_common.skc_bind_node | 187 | #define sk_bind_node __sk_common.skc_bind_node |
186 | #define sk_refcnt __sk_common.skc_refcnt | 188 | #define sk_refcnt __sk_common.skc_refcnt |
189 | #define sk_prot __sk_common.skc_prot | ||
187 | unsigned char sk_shutdown : 2, | 190 | unsigned char sk_shutdown : 2, |
188 | sk_no_check : 2, | 191 | sk_no_check : 2, |
189 | sk_userlocks : 4; | 192 | sk_userlocks : 4; |
@@ -218,7 +221,6 @@ struct sock { | |||
218 | struct sk_buff *tail; | 221 | struct sk_buff *tail; |
219 | } sk_backlog; | 222 | } sk_backlog; |
220 | struct sk_buff_head sk_error_queue; | 223 | struct sk_buff_head sk_error_queue; |
221 | struct proto *sk_prot; | ||
222 | struct proto *sk_prot_creator; | 224 | struct proto *sk_prot_creator; |
223 | rwlock_t sk_callback_lock; | 225 | rwlock_t sk_callback_lock; |
224 | int sk_err, | 226 | int sk_err, |
@@ -253,28 +255,28 @@ struct sock { | |||
253 | /* | 255 | /* |
254 | * Hashed lists helper routines | 256 | * Hashed lists helper routines |
255 | */ | 257 | */ |
256 | static inline struct sock *__sk_head(struct hlist_head *head) | 258 | static inline struct sock *__sk_head(const struct hlist_head *head) |
257 | { | 259 | { |
258 | return hlist_entry(head->first, struct sock, sk_node); | 260 | return hlist_entry(head->first, struct sock, sk_node); |
259 | } | 261 | } |
260 | 262 | ||
261 | static inline struct sock *sk_head(struct hlist_head *head) | 263 | static inline struct sock *sk_head(const struct hlist_head *head) |
262 | { | 264 | { |
263 | return hlist_empty(head) ? NULL : __sk_head(head); | 265 | return hlist_empty(head) ? NULL : __sk_head(head); |
264 | } | 266 | } |
265 | 267 | ||
266 | static inline struct sock *sk_next(struct sock *sk) | 268 | static inline struct sock *sk_next(const struct sock *sk) |
267 | { | 269 | { |
268 | return sk->sk_node.next ? | 270 | return sk->sk_node.next ? |
269 | hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; | 271 | hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; |
270 | } | 272 | } |
271 | 273 | ||
272 | static inline int sk_unhashed(struct sock *sk) | 274 | static inline int sk_unhashed(const struct sock *sk) |
273 | { | 275 | { |
274 | return hlist_unhashed(&sk->sk_node); | 276 | return hlist_unhashed(&sk->sk_node); |
275 | } | 277 | } |
276 | 278 | ||
277 | static inline int sk_hashed(struct sock *sk) | 279 | static inline int sk_hashed(const struct sock *sk) |
278 | { | 280 | { |
279 | return sk->sk_node.pprev != NULL; | 281 | return sk->sk_node.pprev != NULL; |
280 | } | 282 | } |
@@ -384,6 +386,11 @@ enum sock_flags { | |||
384 | SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ | 386 | SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ |
385 | }; | 387 | }; |
386 | 388 | ||
389 | static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) | ||
390 | { | ||
391 | nsk->sk_flags = osk->sk_flags; | ||
392 | } | ||
393 | |||
387 | static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) | 394 | static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) |
388 | { | 395 | { |
389 | __set_bit(flag, &sk->sk_flags); | 396 | __set_bit(flag, &sk->sk_flags); |
@@ -549,6 +556,10 @@ struct proto { | |||
549 | kmem_cache_t *slab; | 556 | kmem_cache_t *slab; |
550 | unsigned int obj_size; | 557 | unsigned int obj_size; |
551 | 558 | ||
559 | kmem_cache_t *twsk_slab; | ||
560 | unsigned int twsk_obj_size; | ||
561 | atomic_t *orphan_count; | ||
562 | |||
552 | struct request_sock_ops *rsk_prot; | 563 | struct request_sock_ops *rsk_prot; |
553 | 564 | ||
554 | struct module *owner; | 565 | struct module *owner; |
@@ -556,7 +567,9 @@ struct proto { | |||
556 | char name[32]; | 567 | char name[32]; |
557 | 568 | ||
558 | struct list_head node; | 569 | struct list_head node; |
559 | 570 | #ifdef SOCK_REFCNT_DEBUG | |
571 | atomic_t socks; | ||
572 | #endif | ||
560 | struct { | 573 | struct { |
561 | int inuse; | 574 | int inuse; |
562 | u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; | 575 | u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; |
@@ -566,6 +579,31 @@ struct proto { | |||
566 | extern int proto_register(struct proto *prot, int alloc_slab); | 579 | extern int proto_register(struct proto *prot, int alloc_slab); |
567 | extern void proto_unregister(struct proto *prot); | 580 | extern void proto_unregister(struct proto *prot); |
568 | 581 | ||
582 | #ifdef SOCK_REFCNT_DEBUG | ||
583 | static inline void sk_refcnt_debug_inc(struct sock *sk) | ||
584 | { | ||
585 | atomic_inc(&sk->sk_prot->socks); | ||
586 | } | ||
587 | |||
588 | static inline void sk_refcnt_debug_dec(struct sock *sk) | ||
589 | { | ||
590 | atomic_dec(&sk->sk_prot->socks); | ||
591 | printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", | ||
592 | sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); | ||
593 | } | ||
594 | |||
595 | static inline void sk_refcnt_debug_release(const struct sock *sk) | ||
596 | { | ||
597 | if (atomic_read(&sk->sk_refcnt) != 1) | ||
598 | printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", | ||
599 | sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); | ||
600 | } | ||
601 | #else /* SOCK_REFCNT_DEBUG */ | ||
602 | #define sk_refcnt_debug_inc(sk) do { } while (0) | ||
603 | #define sk_refcnt_debug_dec(sk) do { } while (0) | ||
604 | #define sk_refcnt_debug_release(sk) do { } while (0) | ||
605 | #endif /* SOCK_REFCNT_DEBUG */ | ||
606 | |||
569 | /* Called with local bh disabled */ | 607 | /* Called with local bh disabled */ |
570 | static __inline__ void sock_prot_inc_use(struct proto *prot) | 608 | static __inline__ void sock_prot_inc_use(struct proto *prot) |
571 | { | 609 | { |
@@ -577,6 +615,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot) | |||
577 | prot->stats[smp_processor_id()].inuse--; | 615 | prot->stats[smp_processor_id()].inuse--; |
578 | } | 616 | } |
579 | 617 | ||
618 | /* With per-bucket locks this operation is not-atomic, so that | ||
619 | * this version is not worse. | ||
620 | */ | ||
621 | static inline void __sk_prot_rehash(struct sock *sk) | ||
622 | { | ||
623 | sk->sk_prot->unhash(sk); | ||
624 | sk->sk_prot->hash(sk); | ||
625 | } | ||
626 | |||
580 | /* About 10 seconds */ | 627 | /* About 10 seconds */ |
581 | #define SOCK_DESTROY_TIME (10*HZ) | 628 | #define SOCK_DESTROY_TIME (10*HZ) |
582 | 629 | ||
@@ -688,6 +735,8 @@ extern struct sock *sk_alloc(int family, | |||
688 | unsigned int __nocast priority, | 735 | unsigned int __nocast priority, |
689 | struct proto *prot, int zero_it); | 736 | struct proto *prot, int zero_it); |
690 | extern void sk_free(struct sock *sk); | 737 | extern void sk_free(struct sock *sk); |
738 | extern struct sock *sk_clone(const struct sock *sk, | ||
739 | const unsigned int __nocast priority); | ||
691 | 740 | ||
692 | extern struct sk_buff *sock_wmalloc(struct sock *sk, | 741 | extern struct sk_buff *sock_wmalloc(struct sock *sk, |
693 | unsigned long size, int force, | 742 | unsigned long size, int force, |
@@ -981,6 +1030,16 @@ sk_dst_check(struct sock *sk, u32 cookie) | |||
981 | return dst; | 1030 | return dst; |
982 | } | 1031 | } |
983 | 1032 | ||
1033 | static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | ||
1034 | { | ||
1035 | __sk_dst_set(sk, dst); | ||
1036 | sk->sk_route_caps = dst->dev->features; | ||
1037 | if (sk->sk_route_caps & NETIF_F_TSO) { | ||
1038 | if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) | ||
1039 | sk->sk_route_caps &= ~NETIF_F_TSO; | ||
1040 | } | ||
1041 | } | ||
1042 | |||
984 | static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) | 1043 | static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) |
985 | { | 1044 | { |
986 | sk->sk_wmem_queued += skb->truesize; | 1045 | sk->sk_wmem_queued += skb->truesize; |
@@ -1141,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, | |||
1141 | int hdr_len; | 1200 | int hdr_len; |
1142 | 1201 | ||
1143 | hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); | 1202 | hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); |
1144 | skb = alloc_skb(size + hdr_len, gfp); | 1203 | skb = alloc_skb_fclone(size + hdr_len, gfp); |
1145 | if (skb) { | 1204 | if (skb) { |
1146 | skb->truesize += mem; | 1205 | skb->truesize += mem; |
1147 | if (sk->sk_forward_alloc >= (int)skb->truesize || | 1206 | if (sk->sk_forward_alloc >= (int)skb->truesize || |
@@ -1223,16 +1282,19 @@ static inline int sock_intr_errno(long timeo) | |||
1223 | static __inline__ void | 1282 | static __inline__ void |
1224 | sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) | 1283 | sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) |
1225 | { | 1284 | { |
1226 | struct timeval *stamp = &skb->stamp; | 1285 | struct timeval stamp; |
1286 | |||
1287 | skb_get_timestamp(skb, &stamp); | ||
1227 | if (sock_flag(sk, SOCK_RCVTSTAMP)) { | 1288 | if (sock_flag(sk, SOCK_RCVTSTAMP)) { |
1228 | /* Race occurred between timestamp enabling and packet | 1289 | /* Race occurred between timestamp enabling and packet |
1229 | receiving. Fill in the current time for now. */ | 1290 | receiving. Fill in the current time for now. */ |
1230 | if (stamp->tv_sec == 0) | 1291 | if (stamp.tv_sec == 0) |
1231 | do_gettimeofday(stamp); | 1292 | do_gettimeofday(&stamp); |
1293 | skb_set_timestamp(skb, &stamp); | ||
1232 | put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), | 1294 | put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), |
1233 | stamp); | 1295 | &stamp); |
1234 | } else | 1296 | } else |
1235 | sk->sk_stamp = *stamp; | 1297 | sk->sk_stamp = stamp; |
1236 | } | 1298 | } |
1237 | 1299 | ||
1238 | /** | 1300 | /** |
@@ -1257,11 +1319,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *); | |||
1257 | */ | 1319 | */ |
1258 | 1320 | ||
1259 | #if 0 | 1321 | #if 0 |
1260 | #define NETDEBUG(x) do { } while (0) | 1322 | #define NETDEBUG(fmt, args...) do { } while (0) |
1261 | #define LIMIT_NETDEBUG(x) do {} while(0) | 1323 | #define LIMIT_NETDEBUG(fmt, args...) do { } while(0) |
1262 | #else | 1324 | #else |
1263 | #define NETDEBUG(x) do { x; } while (0) | 1325 | #define NETDEBUG(fmt, args...) printk(fmt,##args) |
1264 | #define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) | 1326 | #define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0) |
1265 | #endif | 1327 | #endif |
1266 | 1328 | ||
1267 | /* | 1329 | /* |
@@ -1308,4 +1370,14 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign | |||
1308 | } | 1370 | } |
1309 | #endif | 1371 | #endif |
1310 | 1372 | ||
1373 | extern void sk_init(void); | ||
1374 | |||
1375 | #ifdef CONFIG_SYSCTL | ||
1376 | extern struct ctl_table core_table[]; | ||
1377 | extern int sysctl_optmem_max; | ||
1378 | #endif | ||
1379 | |||
1380 | extern __u32 sysctl_wmem_default; | ||
1381 | extern __u32 sysctl_rmem_default; | ||
1382 | |||
1311 | #endif /* _SOCK_H */ | 1383 | #endif /* _SOCK_H */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 5010f0c5a56e..d6bcf1317a6a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -21,360 +21,29 @@ | |||
21 | #define TCP_DEBUG 1 | 21 | #define TCP_DEBUG 1 |
22 | #define FASTRETRANS_DEBUG 1 | 22 | #define FASTRETRANS_DEBUG 1 |
23 | 23 | ||
24 | /* Cancel timers, when they are not required. */ | ||
25 | #undef TCP_CLEAR_TIMERS | ||
26 | |||
27 | #include <linux/config.h> | 24 | #include <linux/config.h> |
28 | #include <linux/list.h> | 25 | #include <linux/list.h> |
29 | #include <linux/tcp.h> | 26 | #include <linux/tcp.h> |
30 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
31 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
32 | #include <linux/percpu.h> | 29 | #include <linux/percpu.h> |
30 | |||
31 | #include <net/inet_connection_sock.h> | ||
32 | #include <net/inet_timewait_sock.h> | ||
33 | #include <net/inet_hashtables.h> | ||
33 | #include <net/checksum.h> | 34 | #include <net/checksum.h> |
34 | #include <net/request_sock.h> | 35 | #include <net/request_sock.h> |
35 | #include <net/sock.h> | 36 | #include <net/sock.h> |
36 | #include <net/snmp.h> | 37 | #include <net/snmp.h> |
37 | #include <net/ip.h> | 38 | #include <net/ip.h> |
38 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 39 | #include <net/tcp_states.h> |
39 | #include <linux/ipv6.h> | ||
40 | #endif | ||
41 | #include <linux/seq_file.h> | ||
42 | |||
43 | /* This is for all connections with a full identity, no wildcards. | ||
44 | * New scheme, half the table is for TIME_WAIT, the other half is | ||
45 | * for the rest. I'll experiment with dynamic table growth later. | ||
46 | */ | ||
47 | struct tcp_ehash_bucket { | ||
48 | rwlock_t lock; | ||
49 | struct hlist_head chain; | ||
50 | } __attribute__((__aligned__(8))); | ||
51 | |||
52 | /* This is for listening sockets, thus all sockets which possess wildcards. */ | ||
53 | #define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ | ||
54 | |||
55 | /* There are a few simple rules, which allow for local port reuse by | ||
56 | * an application. In essence: | ||
57 | * | ||
58 | * 1) Sockets bound to different interfaces may share a local port. | ||
59 | * Failing that, goto test 2. | ||
60 | * 2) If all sockets have sk->sk_reuse set, and none of them are in | ||
61 | * TCP_LISTEN state, the port may be shared. | ||
62 | * Failing that, goto test 3. | ||
63 | * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local | ||
64 | * address, and none of them are the same, the port may be | ||
65 | * shared. | ||
66 | * Failing this, the port cannot be shared. | ||
67 | * | ||
68 | * The interesting point, is test #2. This is what an FTP server does | ||
69 | * all day. To optimize this case we use a specific flag bit defined | ||
70 | * below. As we add sockets to a bind bucket list, we perform a | ||
71 | * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) | ||
72 | * As long as all sockets added to a bind bucket pass this test, | ||
73 | * the flag bit will be set. | ||
74 | * The resulting situation is that tcp_v[46]_verify_bind() can just check | ||
75 | * for this flag bit, if it is set and the socket trying to bind has | ||
76 | * sk->sk_reuse set, we don't even have to walk the owners list at all, | ||
77 | * we return that it is ok to bind this socket to the requested local port. | ||
78 | * | ||
79 | * Sounds like a lot of work, but it is worth it. In a more naive | ||
80 | * implementation (ie. current FreeBSD etc.) the entire list of ports | ||
81 | * must be walked for each data port opened by an ftp server. Needless | ||
82 | * to say, this does not scale at all. With a couple thousand FTP | ||
83 | * users logged onto your box, isn't it nice to know that new data | ||
84 | * ports are created in O(1) time? I thought so. ;-) -DaveM | ||
85 | */ | ||
86 | struct tcp_bind_bucket { | ||
87 | unsigned short port; | ||
88 | signed short fastreuse; | ||
89 | struct hlist_node node; | ||
90 | struct hlist_head owners; | ||
91 | }; | ||
92 | |||
93 | #define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node) | ||
94 | |||
95 | struct tcp_bind_hashbucket { | ||
96 | spinlock_t lock; | ||
97 | struct hlist_head chain; | ||
98 | }; | ||
99 | |||
100 | static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head) | ||
101 | { | ||
102 | return hlist_entry(head->chain.first, struct tcp_bind_bucket, node); | ||
103 | } | ||
104 | |||
105 | static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head) | ||
106 | { | ||
107 | return hlist_empty(&head->chain) ? NULL : __tb_head(head); | ||
108 | } | ||
109 | |||
110 | extern struct tcp_hashinfo { | ||
111 | /* This is for sockets with full identity only. Sockets here will | ||
112 | * always be without wildcards and will have the following invariant: | ||
113 | * | ||
114 | * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE | ||
115 | * | ||
116 | * First half of the table is for sockets not in TIME_WAIT, second half | ||
117 | * is for TIME_WAIT sockets only. | ||
118 | */ | ||
119 | struct tcp_ehash_bucket *__tcp_ehash; | ||
120 | |||
121 | /* Ok, let's try this, I give up, we do need a local binding | ||
122 | * TCP hash as well as the others for fast bind/connect. | ||
123 | */ | ||
124 | struct tcp_bind_hashbucket *__tcp_bhash; | ||
125 | 40 | ||
126 | int __tcp_bhash_size; | 41 | #include <linux/seq_file.h> |
127 | int __tcp_ehash_size; | ||
128 | |||
129 | /* All sockets in TCP_LISTEN state will be in here. This is the only | ||
130 | * table where wildcard'd TCP sockets can exist. Hash function here | ||
131 | * is just local port number. | ||
132 | */ | ||
133 | struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE]; | ||
134 | |||
135 | /* All the above members are written once at bootup and | ||
136 | * never written again _or_ are predominantly read-access. | ||
137 | * | ||
138 | * Now align to a new cache line as all the following members | ||
139 | * are often dirty. | ||
140 | */ | ||
141 | rwlock_t __tcp_lhash_lock ____cacheline_aligned; | ||
142 | atomic_t __tcp_lhash_users; | ||
143 | wait_queue_head_t __tcp_lhash_wait; | ||
144 | spinlock_t __tcp_portalloc_lock; | ||
145 | } tcp_hashinfo; | ||
146 | |||
147 | #define tcp_ehash (tcp_hashinfo.__tcp_ehash) | ||
148 | #define tcp_bhash (tcp_hashinfo.__tcp_bhash) | ||
149 | #define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size) | ||
150 | #define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size) | ||
151 | #define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash) | ||
152 | #define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock) | ||
153 | #define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users) | ||
154 | #define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait) | ||
155 | #define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock) | ||
156 | |||
157 | extern kmem_cache_t *tcp_bucket_cachep; | ||
158 | extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, | ||
159 | unsigned short snum); | ||
160 | extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb); | ||
161 | extern void tcp_bucket_unlock(struct sock *sk); | ||
162 | extern int tcp_port_rover; | ||
163 | |||
164 | /* These are AF independent. */ | ||
165 | static __inline__ int tcp_bhashfn(__u16 lport) | ||
166 | { | ||
167 | return (lport & (tcp_bhash_size - 1)); | ||
168 | } | ||
169 | |||
170 | extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, | ||
171 | unsigned short snum); | ||
172 | |||
173 | #if (BITS_PER_LONG == 64) | ||
174 | #define TCP_ADDRCMP_ALIGN_BYTES 8 | ||
175 | #else | ||
176 | #define TCP_ADDRCMP_ALIGN_BYTES 4 | ||
177 | #endif | ||
178 | |||
179 | /* This is a TIME_WAIT bucket. It works around the memory consumption | ||
180 | * problems of sockets in such a state on heavily loaded servers, but | ||
181 | * without violating the protocol specification. | ||
182 | */ | ||
183 | struct tcp_tw_bucket { | ||
184 | /* | ||
185 | * Now struct sock also uses sock_common, so please just | ||
186 | * don't add nothing before this first member (__tw_common) --acme | ||
187 | */ | ||
188 | struct sock_common __tw_common; | ||
189 | #define tw_family __tw_common.skc_family | ||
190 | #define tw_state __tw_common.skc_state | ||
191 | #define tw_reuse __tw_common.skc_reuse | ||
192 | #define tw_bound_dev_if __tw_common.skc_bound_dev_if | ||
193 | #define tw_node __tw_common.skc_node | ||
194 | #define tw_bind_node __tw_common.skc_bind_node | ||
195 | #define tw_refcnt __tw_common.skc_refcnt | ||
196 | volatile unsigned char tw_substate; | ||
197 | unsigned char tw_rcv_wscale; | ||
198 | __u16 tw_sport; | ||
199 | /* Socket demultiplex comparisons on incoming packets. */ | ||
200 | /* these five are in inet_sock */ | ||
201 | __u32 tw_daddr | ||
202 | __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); | ||
203 | __u32 tw_rcv_saddr; | ||
204 | __u16 tw_dport; | ||
205 | __u16 tw_num; | ||
206 | /* And these are ours. */ | ||
207 | int tw_hashent; | ||
208 | int tw_timeout; | ||
209 | __u32 tw_rcv_nxt; | ||
210 | __u32 tw_snd_nxt; | ||
211 | __u32 tw_rcv_wnd; | ||
212 | __u32 tw_ts_recent; | ||
213 | long tw_ts_recent_stamp; | ||
214 | unsigned long tw_ttd; | ||
215 | struct tcp_bind_bucket *tw_tb; | ||
216 | struct hlist_node tw_death_node; | ||
217 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
218 | struct in6_addr tw_v6_daddr; | ||
219 | struct in6_addr tw_v6_rcv_saddr; | ||
220 | int tw_v6_ipv6only; | ||
221 | #endif | ||
222 | }; | ||
223 | |||
224 | static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, | ||
225 | struct hlist_head *list) | ||
226 | { | ||
227 | hlist_add_head(&tw->tw_node, list); | ||
228 | } | ||
229 | |||
230 | static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, | ||
231 | struct hlist_head *list) | ||
232 | { | ||
233 | hlist_add_head(&tw->tw_bind_node, list); | ||
234 | } | ||
235 | |||
236 | static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) | ||
237 | { | ||
238 | return tw->tw_death_node.pprev != NULL; | ||
239 | } | ||
240 | |||
241 | static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) | ||
242 | { | ||
243 | tw->tw_death_node.pprev = NULL; | ||
244 | } | ||
245 | |||
246 | static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) | ||
247 | { | ||
248 | __hlist_del(&tw->tw_death_node); | ||
249 | tw_dead_node_init(tw); | ||
250 | } | ||
251 | |||
252 | static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) | ||
253 | { | ||
254 | if (tw_dead_hashed(tw)) { | ||
255 | __tw_del_dead_node(tw); | ||
256 | return 1; | ||
257 | } | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | #define tw_for_each(tw, node, head) \ | ||
262 | hlist_for_each_entry(tw, node, head, tw_node) | ||
263 | |||
264 | #define tw_for_each_inmate(tw, node, jail) \ | ||
265 | hlist_for_each_entry(tw, node, jail, tw_death_node) | ||
266 | |||
267 | #define tw_for_each_inmate_safe(tw, node, safe, jail) \ | ||
268 | hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) | ||
269 | |||
270 | #define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) | ||
271 | |||
272 | static inline u32 tcp_v4_rcv_saddr(const struct sock *sk) | ||
273 | { | ||
274 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
275 | inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr; | ||
276 | } | ||
277 | |||
278 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
279 | static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) | ||
280 | { | ||
281 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
282 | &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr; | ||
283 | } | ||
284 | |||
285 | static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) | ||
286 | { | ||
287 | return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; | ||
288 | } | ||
289 | |||
290 | #define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only) | ||
291 | |||
292 | static inline int tcp_v6_ipv6only(const struct sock *sk) | ||
293 | { | ||
294 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | ||
295 | ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk); | ||
296 | } | ||
297 | #else | ||
298 | # define __tcp_v6_rcv_saddr(__sk) NULL | ||
299 | # define tcp_v6_rcv_saddr(__sk) NULL | ||
300 | # define tcptw_sk_ipv6only(__sk) 0 | ||
301 | # define tcp_v6_ipv6only(__sk) 0 | ||
302 | #endif | ||
303 | 42 | ||
304 | extern kmem_cache_t *tcp_timewait_cachep; | 43 | extern struct inet_hashinfo tcp_hashinfo; |
305 | |||
306 | static inline void tcp_tw_put(struct tcp_tw_bucket *tw) | ||
307 | { | ||
308 | if (atomic_dec_and_test(&tw->tw_refcnt)) { | ||
309 | #ifdef INET_REFCNT_DEBUG | ||
310 | printk(KERN_DEBUG "tw_bucket %p released\n", tw); | ||
311 | #endif | ||
312 | kmem_cache_free(tcp_timewait_cachep, tw); | ||
313 | } | ||
314 | } | ||
315 | 44 | ||
316 | extern atomic_t tcp_orphan_count; | 45 | extern atomic_t tcp_orphan_count; |
317 | extern int tcp_tw_count; | ||
318 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); | 46 | extern void tcp_time_wait(struct sock *sk, int state, int timeo); |
319 | extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); | ||
320 | |||
321 | |||
322 | /* Socket demux engine toys. */ | ||
323 | #ifdef __BIG_ENDIAN | ||
324 | #define TCP_COMBINED_PORTS(__sport, __dport) \ | ||
325 | (((__u32)(__sport)<<16) | (__u32)(__dport)) | ||
326 | #else /* __LITTLE_ENDIAN */ | ||
327 | #define TCP_COMBINED_PORTS(__sport, __dport) \ | ||
328 | (((__u32)(__dport)<<16) | (__u32)(__sport)) | ||
329 | #endif | ||
330 | |||
331 | #if (BITS_PER_LONG == 64) | ||
332 | #ifdef __BIG_ENDIAN | ||
333 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
334 | __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); | ||
335 | #else /* __LITTLE_ENDIAN */ | ||
336 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ | ||
337 | __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); | ||
338 | #endif /* __BIG_ENDIAN */ | ||
339 | #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
340 | (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ | ||
341 | ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
342 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
343 | #define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
344 | (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ | ||
345 | ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ | ||
346 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
347 | #else /* 32-bit arch */ | ||
348 | #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) | ||
349 | #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
350 | ((inet_sk(__sk)->daddr == (__saddr)) && \ | ||
351 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | ||
352 | ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
353 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
354 | #define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ | ||
355 | ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ | ||
356 | (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ | ||
357 | ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ | ||
358 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
359 | #endif /* 64-bit arch */ | ||
360 | |||
361 | #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ | ||
362 | (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ | ||
363 | ((__sk)->sk_family == AF_INET6) && \ | ||
364 | ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ | ||
365 | ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ | ||
366 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | ||
367 | |||
368 | /* These can have wildcards, don't try too hard. */ | ||
369 | static __inline__ int tcp_lhashfn(unsigned short num) | ||
370 | { | ||
371 | return num & (TCP_LHTABLE_SIZE - 1); | ||
372 | } | ||
373 | |||
374 | static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) | ||
375 | { | ||
376 | return tcp_lhashfn(inet_sk(sk)->num); | ||
377 | } | ||
378 | 47 | ||
379 | #define MAX_TCP_HEADER (128 + MAX_HEADER) | 48 | #define MAX_TCP_HEADER (128 + MAX_HEADER) |
380 | 49 | ||
@@ -478,33 +147,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) | |||
478 | * timestamps. It must be less than | 147 | * timestamps. It must be less than |
479 | * minimal timewait lifetime. | 148 | * minimal timewait lifetime. |
480 | */ | 149 | */ |
481 | |||
482 | #define TCP_TW_RECYCLE_SLOTS_LOG 5 | ||
483 | #define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG) | ||
484 | |||
485 | /* If time > 4sec, it is "slow" path, no recycling is required, | ||
486 | so that we select tick to get range about 4 seconds. | ||
487 | */ | ||
488 | |||
489 | #if HZ <= 16 || HZ > 4096 | ||
490 | # error Unsupported: HZ <= 16 or HZ > 4096 | ||
491 | #elif HZ <= 32 | ||
492 | # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
493 | #elif HZ <= 64 | ||
494 | # define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
495 | #elif HZ <= 128 | ||
496 | # define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
497 | #elif HZ <= 256 | ||
498 | # define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
499 | #elif HZ <= 512 | ||
500 | # define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
501 | #elif HZ <= 1024 | ||
502 | # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
503 | #elif HZ <= 2048 | ||
504 | # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
505 | #else | ||
506 | # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG) | ||
507 | #endif | ||
508 | /* | 150 | /* |
509 | * TCP option | 151 | * TCP option |
510 | */ | 152 | */ |
@@ -534,22 +176,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) | |||
534 | #define TCPOLEN_SACK_BASE_ALIGNED 4 | 176 | #define TCPOLEN_SACK_BASE_ALIGNED 4 |
535 | #define TCPOLEN_SACK_PERBLOCK 8 | 177 | #define TCPOLEN_SACK_PERBLOCK 8 |
536 | 178 | ||
537 | #define TCP_TIME_RETRANS 1 /* Retransmit timer */ | ||
538 | #define TCP_TIME_DACK 2 /* Delayed ack timer */ | ||
539 | #define TCP_TIME_PROBE0 3 /* Zero window probe timer */ | ||
540 | #define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */ | ||
541 | |||
542 | /* Flags in tp->nonagle */ | 179 | /* Flags in tp->nonagle */ |
543 | #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ | 180 | #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ |
544 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ | 181 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ |
545 | #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ | 182 | #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ |
546 | 183 | ||
184 | extern struct inet_timewait_death_row tcp_death_row; | ||
185 | |||
547 | /* sysctl variables for tcp */ | 186 | /* sysctl variables for tcp */ |
548 | extern int sysctl_tcp_timestamps; | 187 | extern int sysctl_tcp_timestamps; |
549 | extern int sysctl_tcp_window_scaling; | 188 | extern int sysctl_tcp_window_scaling; |
550 | extern int sysctl_tcp_sack; | 189 | extern int sysctl_tcp_sack; |
551 | extern int sysctl_tcp_fin_timeout; | 190 | extern int sysctl_tcp_fin_timeout; |
552 | extern int sysctl_tcp_tw_recycle; | ||
553 | extern int sysctl_tcp_keepalive_time; | 191 | extern int sysctl_tcp_keepalive_time; |
554 | extern int sysctl_tcp_keepalive_probes; | 192 | extern int sysctl_tcp_keepalive_probes; |
555 | extern int sysctl_tcp_keepalive_intvl; | 193 | extern int sysctl_tcp_keepalive_intvl; |
@@ -564,7 +202,6 @@ extern int sysctl_tcp_stdurg; | |||
564 | extern int sysctl_tcp_rfc1337; | 202 | extern int sysctl_tcp_rfc1337; |
565 | extern int sysctl_tcp_abort_on_overflow; | 203 | extern int sysctl_tcp_abort_on_overflow; |
566 | extern int sysctl_tcp_max_orphans; | 204 | extern int sysctl_tcp_max_orphans; |
567 | extern int sysctl_tcp_max_tw_buckets; | ||
568 | extern int sysctl_tcp_fack; | 205 | extern int sysctl_tcp_fack; |
569 | extern int sysctl_tcp_reordering; | 206 | extern int sysctl_tcp_reordering; |
570 | extern int sysctl_tcp_ecn; | 207 | extern int sysctl_tcp_ecn; |
@@ -585,12 +222,6 @@ extern atomic_t tcp_memory_allocated; | |||
585 | extern atomic_t tcp_sockets_allocated; | 222 | extern atomic_t tcp_sockets_allocated; |
586 | extern int tcp_memory_pressure; | 223 | extern int tcp_memory_pressure; |
587 | 224 | ||
588 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
589 | #define TCP_INET_FAMILY(fam) ((fam) == AF_INET) | ||
590 | #else | ||
591 | #define TCP_INET_FAMILY(fam) 1 | ||
592 | #endif | ||
593 | |||
594 | /* | 225 | /* |
595 | * Pointers to address related TCP functions | 226 | * Pointers to address related TCP functions |
596 | * (i.e. things that depend on the address family) | 227 | * (i.e. things that depend on the address family) |
@@ -671,9 +302,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics); | |||
671 | #define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) | 302 | #define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) |
672 | #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) | 303 | #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) |
673 | 304 | ||
674 | extern void tcp_put_port(struct sock *sk); | ||
675 | extern void tcp_inherit_port(struct sock *sk, struct sock *child); | ||
676 | |||
677 | extern void tcp_v4_err(struct sk_buff *skb, u32); | 305 | extern void tcp_v4_err(struct sk_buff *skb, u32); |
678 | 306 | ||
679 | extern void tcp_shutdown (struct sock *sk, int how); | 307 | extern void tcp_shutdown (struct sock *sk, int how); |
@@ -682,7 +310,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb); | |||
682 | 310 | ||
683 | extern int tcp_v4_remember_stamp(struct sock *sk); | 311 | extern int tcp_v4_remember_stamp(struct sock *sk); |
684 | 312 | ||
685 | extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); | 313 | extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); |
686 | 314 | ||
687 | extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, | 315 | extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, |
688 | struct msghdr *msg, size_t size); | 316 | struct msghdr *msg, size_t size); |
@@ -704,42 +332,22 @@ extern int tcp_rcv_established(struct sock *sk, | |||
704 | 332 | ||
705 | extern void tcp_rcv_space_adjust(struct sock *sk); | 333 | extern void tcp_rcv_space_adjust(struct sock *sk); |
706 | 334 | ||
707 | enum tcp_ack_state_t | 335 | static inline void tcp_dec_quickack_mode(struct sock *sk, |
708 | { | 336 | const unsigned int pkts) |
709 | TCP_ACK_SCHED = 1, | ||
710 | TCP_ACK_TIMER = 2, | ||
711 | TCP_ACK_PUSHED= 4 | ||
712 | }; | ||
713 | |||
714 | static inline void tcp_schedule_ack(struct tcp_sock *tp) | ||
715 | { | 337 | { |
716 | tp->ack.pending |= TCP_ACK_SCHED; | 338 | struct inet_connection_sock *icsk = inet_csk(sk); |
717 | } | ||
718 | |||
719 | static inline int tcp_ack_scheduled(struct tcp_sock *tp) | ||
720 | { | ||
721 | return tp->ack.pending&TCP_ACK_SCHED; | ||
722 | } | ||
723 | |||
724 | static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts) | ||
725 | { | ||
726 | if (tp->ack.quick) { | ||
727 | if (pkts >= tp->ack.quick) { | ||
728 | tp->ack.quick = 0; | ||
729 | 339 | ||
340 | if (icsk->icsk_ack.quick) { | ||
341 | if (pkts >= icsk->icsk_ack.quick) { | ||
342 | icsk->icsk_ack.quick = 0; | ||
730 | /* Leaving quickack mode we deflate ATO. */ | 343 | /* Leaving quickack mode we deflate ATO. */ |
731 | tp->ack.ato = TCP_ATO_MIN; | 344 | icsk->icsk_ack.ato = TCP_ATO_MIN; |
732 | } else | 345 | } else |
733 | tp->ack.quick -= pkts; | 346 | icsk->icsk_ack.quick -= pkts; |
734 | } | 347 | } |
735 | } | 348 | } |
736 | 349 | ||
737 | extern void tcp_enter_quickack_mode(struct tcp_sock *tp); | 350 | extern void tcp_enter_quickack_mode(struct sock *sk); |
738 | |||
739 | static __inline__ void tcp_delack_init(struct tcp_sock *tp) | ||
740 | { | ||
741 | memset(&tp->ack, 0, sizeof(tp->ack)); | ||
742 | } | ||
743 | 351 | ||
744 | static inline void tcp_clear_options(struct tcp_options_received *rx_opt) | 352 | static inline void tcp_clear_options(struct tcp_options_received *rx_opt) |
745 | { | 353 | { |
@@ -755,10 +363,9 @@ enum tcp_tw_status | |||
755 | }; | 363 | }; |
756 | 364 | ||
757 | 365 | ||
758 | extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, | 366 | extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, |
759 | struct sk_buff *skb, | 367 | struct sk_buff *skb, |
760 | struct tcphdr *th, | 368 | const struct tcphdr *th); |
761 | unsigned len); | ||
762 | 369 | ||
763 | extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, | 370 | extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, |
764 | struct request_sock *req, | 371 | struct request_sock *req, |
@@ -773,7 +380,6 @@ extern void tcp_update_metrics(struct sock *sk); | |||
773 | 380 | ||
774 | extern void tcp_close(struct sock *sk, | 381 | extern void tcp_close(struct sock *sk, |
775 | long timeout); | 382 | long timeout); |
776 | extern struct sock * tcp_accept(struct sock *sk, int flags, int *err); | ||
777 | extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); | 383 | extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); |
778 | 384 | ||
779 | extern int tcp_getsockopt(struct sock *sk, int level, | 385 | extern int tcp_getsockopt(struct sock *sk, int level, |
@@ -789,8 +395,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, | |||
789 | size_t len, int nonblock, | 395 | size_t len, int nonblock, |
790 | int flags, int *addr_len); | 396 | int flags, int *addr_len); |
791 | 397 | ||
792 | extern int tcp_listen_start(struct sock *sk); | ||
793 | |||
794 | extern void tcp_parse_options(struct sk_buff *skb, | 398 | extern void tcp_parse_options(struct sk_buff *skb, |
795 | struct tcp_options_received *opt_rx, | 399 | struct tcp_options_received *opt_rx, |
796 | int estab); | 400 | int estab); |
@@ -799,11 +403,6 @@ extern void tcp_parse_options(struct sk_buff *skb, | |||
799 | * TCP v4 functions exported for the inet6 API | 403 | * TCP v4 functions exported for the inet6 API |
800 | */ | 404 | */ |
801 | 405 | ||
802 | extern int tcp_v4_rebuild_header(struct sock *sk); | ||
803 | |||
804 | extern int tcp_v4_build_header(struct sock *sk, | ||
805 | struct sk_buff *skb); | ||
806 | |||
807 | extern void tcp_v4_send_check(struct sock *sk, | 406 | extern void tcp_v4_send_check(struct sock *sk, |
808 | struct tcphdr *th, int len, | 407 | struct tcphdr *th, int len, |
809 | struct sk_buff *skb); | 408 | struct sk_buff *skb); |
@@ -872,18 +471,15 @@ extern void tcp_cwnd_application_limited(struct sock *sk); | |||
872 | 471 | ||
873 | /* tcp_timer.c */ | 472 | /* tcp_timer.c */ |
874 | extern void tcp_init_xmit_timers(struct sock *); | 473 | extern void tcp_init_xmit_timers(struct sock *); |
875 | extern void tcp_clear_xmit_timers(struct sock *); | 474 | static inline void tcp_clear_xmit_timers(struct sock *sk) |
475 | { | ||
476 | inet_csk_clear_xmit_timers(sk); | ||
477 | } | ||
876 | 478 | ||
877 | extern void tcp_delete_keepalive_timer(struct sock *); | ||
878 | extern void tcp_reset_keepalive_timer(struct sock *, unsigned long); | ||
879 | extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); | 479 | extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); |
880 | extern unsigned int tcp_current_mss(struct sock *sk, int large); | 480 | extern unsigned int tcp_current_mss(struct sock *sk, int large); |
881 | 481 | ||
882 | #ifdef TCP_DEBUG | 482 | /* tcp.c */ |
883 | extern const char tcp_timer_bug_msg[]; | ||
884 | #endif | ||
885 | |||
886 | /* tcp_diag.c */ | ||
887 | extern void tcp_get_info(struct sock *, struct tcp_info *); | 483 | extern void tcp_get_info(struct sock *, struct tcp_info *); |
888 | 484 | ||
889 | /* Read 'sendfile()'-style from a TCP socket */ | 485 | /* Read 'sendfile()'-style from a TCP socket */ |
@@ -892,72 +488,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, | |||
892 | extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | 488 | extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, |
893 | sk_read_actor_t recv_actor); | 489 | sk_read_actor_t recv_actor); |
894 | 490 | ||
895 | static inline void tcp_clear_xmit_timer(struct sock *sk, int what) | ||
896 | { | ||
897 | struct tcp_sock *tp = tcp_sk(sk); | ||
898 | |||
899 | switch (what) { | ||
900 | case TCP_TIME_RETRANS: | ||
901 | case TCP_TIME_PROBE0: | ||
902 | tp->pending = 0; | ||
903 | |||
904 | #ifdef TCP_CLEAR_TIMERS | ||
905 | sk_stop_timer(sk, &tp->retransmit_timer); | ||
906 | #endif | ||
907 | break; | ||
908 | case TCP_TIME_DACK: | ||
909 | tp->ack.blocked = 0; | ||
910 | tp->ack.pending = 0; | ||
911 | |||
912 | #ifdef TCP_CLEAR_TIMERS | ||
913 | sk_stop_timer(sk, &tp->delack_timer); | ||
914 | #endif | ||
915 | break; | ||
916 | default: | ||
917 | #ifdef TCP_DEBUG | ||
918 | printk(tcp_timer_bug_msg); | ||
919 | #endif | ||
920 | return; | ||
921 | }; | ||
922 | |||
923 | } | ||
924 | |||
925 | /* | ||
926 | * Reset the retransmission timer | ||
927 | */ | ||
928 | static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) | ||
929 | { | ||
930 | struct tcp_sock *tp = tcp_sk(sk); | ||
931 | |||
932 | if (when > TCP_RTO_MAX) { | ||
933 | #ifdef TCP_DEBUG | ||
934 | printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr()); | ||
935 | #endif | ||
936 | when = TCP_RTO_MAX; | ||
937 | } | ||
938 | |||
939 | switch (what) { | ||
940 | case TCP_TIME_RETRANS: | ||
941 | case TCP_TIME_PROBE0: | ||
942 | tp->pending = what; | ||
943 | tp->timeout = jiffies+when; | ||
944 | sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); | ||
945 | break; | ||
946 | |||
947 | case TCP_TIME_DACK: | ||
948 | tp->ack.pending |= TCP_ACK_TIMER; | ||
949 | tp->ack.timeout = jiffies+when; | ||
950 | sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); | ||
951 | break; | ||
952 | |||
953 | default: | ||
954 | #ifdef TCP_DEBUG | ||
955 | printk(tcp_timer_bug_msg); | ||
956 | #endif | ||
957 | return; | ||
958 | }; | ||
959 | } | ||
960 | |||
961 | /* Initialize RCV_MSS value. | 491 | /* Initialize RCV_MSS value. |
962 | * RCV_MSS is an our guess about MSS used by the peer. | 492 | * RCV_MSS is an our guess about MSS used by the peer. |
963 | * We haven't any direct information about the MSS. | 493 | * We haven't any direct information about the MSS. |
@@ -975,7 +505,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk) | |||
975 | hint = min(hint, TCP_MIN_RCVMSS); | 505 | hint = min(hint, TCP_MIN_RCVMSS); |
976 | hint = max(hint, TCP_MIN_MSS); | 506 | hint = max(hint, TCP_MIN_MSS); |
977 | 507 | ||
978 | tp->ack.rcv_mss = hint; | 508 | inet_csk(sk)->icsk_ack.rcv_mss = hint; |
979 | } | 509 | } |
980 | 510 | ||
981 | static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) | 511 | static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) |
@@ -1110,7 +640,8 @@ static inline void tcp_packets_out_inc(struct sock *sk, | |||
1110 | 640 | ||
1111 | tp->packets_out += tcp_skb_pcount(skb); | 641 | tp->packets_out += tcp_skb_pcount(skb); |
1112 | if (!orig) | 642 | if (!orig) |
1113 | tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); | 643 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
644 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
1114 | } | 645 | } |
1115 | 646 | ||
1116 | static inline void tcp_packets_out_dec(struct tcp_sock *tp, | 647 | static inline void tcp_packets_out_dec(struct tcp_sock *tp, |
@@ -1138,29 +669,29 @@ struct tcp_congestion_ops { | |||
1138 | struct list_head list; | 669 | struct list_head list; |
1139 | 670 | ||
1140 | /* initialize private data (optional) */ | 671 | /* initialize private data (optional) */ |
1141 | void (*init)(struct tcp_sock *tp); | 672 | void (*init)(struct sock *sk); |
1142 | /* cleanup private data (optional) */ | 673 | /* cleanup private data (optional) */ |
1143 | void (*release)(struct tcp_sock *tp); | 674 | void (*release)(struct sock *sk); |
1144 | 675 | ||
1145 | /* return slow start threshold (required) */ | 676 | /* return slow start threshold (required) */ |
1146 | u32 (*ssthresh)(struct tcp_sock *tp); | 677 | u32 (*ssthresh)(struct sock *sk); |
1147 | /* lower bound for congestion window (optional) */ | 678 | /* lower bound for congestion window (optional) */ |
1148 | u32 (*min_cwnd)(struct tcp_sock *tp); | 679 | u32 (*min_cwnd)(struct sock *sk); |
1149 | /* do new cwnd calculation (required) */ | 680 | /* do new cwnd calculation (required) */ |
1150 | void (*cong_avoid)(struct tcp_sock *tp, u32 ack, | 681 | void (*cong_avoid)(struct sock *sk, u32 ack, |
1151 | u32 rtt, u32 in_flight, int good_ack); | 682 | u32 rtt, u32 in_flight, int good_ack); |
1152 | /* round trip time sample per acked packet (optional) */ | 683 | /* round trip time sample per acked packet (optional) */ |
1153 | void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); | 684 | void (*rtt_sample)(struct sock *sk, u32 usrtt); |
1154 | /* call before changing ca_state (optional) */ | 685 | /* call before changing ca_state (optional) */ |
1155 | void (*set_state)(struct tcp_sock *tp, u8 new_state); | 686 | void (*set_state)(struct sock *sk, u8 new_state); |
1156 | /* call when cwnd event occurs (optional) */ | 687 | /* call when cwnd event occurs (optional) */ |
1157 | void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); | 688 | void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); |
1158 | /* new value of cwnd after loss (optional) */ | 689 | /* new value of cwnd after loss (optional) */ |
1159 | u32 (*undo_cwnd)(struct tcp_sock *tp); | 690 | u32 (*undo_cwnd)(struct sock *sk); |
1160 | /* hook for packet ack accounting (optional) */ | 691 | /* hook for packet ack accounting (optional) */ |
1161 | void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); | 692 | void (*pkts_acked)(struct sock *sk, u32 num_acked); |
1162 | /* get info for tcp_diag (optional) */ | 693 | /* get info for inet_diag (optional) */ |
1163 | void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); | 694 | void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); |
1164 | 695 | ||
1165 | char name[TCP_CA_NAME_MAX]; | 696 | char name[TCP_CA_NAME_MAX]; |
1166 | struct module *owner; | 697 | struct module *owner; |
@@ -1169,30 +700,34 @@ struct tcp_congestion_ops { | |||
1169 | extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); | 700 | extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); |
1170 | extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); | 701 | extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); |
1171 | 702 | ||
1172 | extern void tcp_init_congestion_control(struct tcp_sock *tp); | 703 | extern void tcp_init_congestion_control(struct sock *sk); |
1173 | extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); | 704 | extern void tcp_cleanup_congestion_control(struct sock *sk); |
1174 | extern int tcp_set_default_congestion_control(const char *name); | 705 | extern int tcp_set_default_congestion_control(const char *name); |
1175 | extern void tcp_get_default_congestion_control(char *name); | 706 | extern void tcp_get_default_congestion_control(char *name); |
1176 | extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); | 707 | extern int tcp_set_congestion_control(struct sock *sk, const char *name); |
1177 | 708 | ||
1178 | extern struct tcp_congestion_ops tcp_init_congestion_ops; | 709 | extern struct tcp_congestion_ops tcp_init_congestion_ops; |
1179 | extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); | 710 | extern u32 tcp_reno_ssthresh(struct sock *sk); |
1180 | extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, | 711 | extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, |
1181 | u32 rtt, u32 in_flight, int flag); | 712 | u32 rtt, u32 in_flight, int flag); |
1182 | extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); | 713 | extern u32 tcp_reno_min_cwnd(struct sock *sk); |
1183 | extern struct tcp_congestion_ops tcp_reno; | 714 | extern struct tcp_congestion_ops tcp_reno; |
1184 | 715 | ||
1185 | static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) | 716 | static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) |
1186 | { | 717 | { |
1187 | if (tp->ca_ops->set_state) | 718 | struct inet_connection_sock *icsk = inet_csk(sk); |
1188 | tp->ca_ops->set_state(tp, ca_state); | 719 | |
1189 | tp->ca_state = ca_state; | 720 | if (icsk->icsk_ca_ops->set_state) |
721 | icsk->icsk_ca_ops->set_state(sk, ca_state); | ||
722 | icsk->icsk_ca_state = ca_state; | ||
1190 | } | 723 | } |
1191 | 724 | ||
1192 | static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) | 725 | static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) |
1193 | { | 726 | { |
1194 | if (tp->ca_ops->cwnd_event) | 727 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1195 | tp->ca_ops->cwnd_event(tp, event); | 728 | |
729 | if (icsk->icsk_ca_ops->cwnd_event) | ||
730 | icsk->icsk_ca_ops->cwnd_event(sk, event); | ||
1196 | } | 731 | } |
1197 | 732 | ||
1198 | /* This determines how many packets are "in the network" to the best | 733 | /* This determines how many packets are "in the network" to the best |
@@ -1218,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) | |||
1218 | * The exception is rate halving phase, when cwnd is decreasing towards | 753 | * The exception is rate halving phase, when cwnd is decreasing towards |
1219 | * ssthresh. | 754 | * ssthresh. |
1220 | */ | 755 | */ |
1221 | static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) | 756 | static inline __u32 tcp_current_ssthresh(const struct sock *sk) |
1222 | { | 757 | { |
1223 | if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) | 758 | const struct tcp_sock *tp = tcp_sk(sk); |
759 | if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) | ||
1224 | return tp->snd_ssthresh; | 760 | return tp->snd_ssthresh; |
1225 | else | 761 | else |
1226 | return max(tp->snd_ssthresh, | 762 | return max(tp->snd_ssthresh, |
@@ -1237,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) | |||
1237 | } | 773 | } |
1238 | 774 | ||
1239 | /* Set slow start threshold and cwnd not falling to slow start */ | 775 | /* Set slow start threshold and cwnd not falling to slow start */ |
1240 | static inline void __tcp_enter_cwr(struct tcp_sock *tp) | 776 | static inline void __tcp_enter_cwr(struct sock *sk) |
1241 | { | 777 | { |
778 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
779 | struct tcp_sock *tp = tcp_sk(sk); | ||
780 | |||
1242 | tp->undo_marker = 0; | 781 | tp->undo_marker = 0; |
1243 | tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); | 782 | tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); |
1244 | tp->snd_cwnd = min(tp->snd_cwnd, | 783 | tp->snd_cwnd = min(tp->snd_cwnd, |
1245 | tcp_packets_in_flight(tp) + 1U); | 784 | tcp_packets_in_flight(tp) + 1U); |
1246 | tp->snd_cwnd_cnt = 0; | 785 | tp->snd_cwnd_cnt = 0; |
@@ -1249,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp) | |||
1249 | TCP_ECN_queue_cwr(tp); | 788 | TCP_ECN_queue_cwr(tp); |
1250 | } | 789 | } |
1251 | 790 | ||
1252 | static inline void tcp_enter_cwr(struct tcp_sock *tp) | 791 | static inline void tcp_enter_cwr(struct sock *sk) |
1253 | { | 792 | { |
793 | struct tcp_sock *tp = tcp_sk(sk); | ||
794 | |||
1254 | tp->prior_ssthresh = 0; | 795 | tp->prior_ssthresh = 0; |
1255 | if (tp->ca_state < TCP_CA_CWR) { | 796 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { |
1256 | __tcp_enter_cwr(tp); | 797 | __tcp_enter_cwr(sk); |
1257 | tcp_set_ca_state(tp, TCP_CA_CWR); | 798 | tcp_set_ca_state(sk, TCP_CA_CWR); |
1258 | } | 799 | } |
1259 | } | 800 | } |
1260 | 801 | ||
@@ -1277,8 +818,10 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, | |||
1277 | 818 | ||
1278 | static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) | 819 | static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) |
1279 | { | 820 | { |
1280 | if (!tp->packets_out && !tp->pending) | 821 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1281 | tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); | 822 | if (!tp->packets_out && !icsk->icsk_pending) |
823 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, | ||
824 | icsk->icsk_rto, TCP_RTO_MAX); | ||
1282 | } | 825 | } |
1283 | 826 | ||
1284 | static __inline__ void tcp_push_pending_frames(struct sock *sk, | 827 | static __inline__ void tcp_push_pending_frames(struct sock *sk, |
@@ -1297,9 +840,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) | |||
1297 | tp->snd_wl1 = seq; | 840 | tp->snd_wl1 = seq; |
1298 | } | 841 | } |
1299 | 842 | ||
1300 | extern void tcp_destroy_sock(struct sock *sk); | ||
1301 | |||
1302 | |||
1303 | /* | 843 | /* |
1304 | * Calculate(/check) TCP checksum | 844 | * Calculate(/check) TCP checksum |
1305 | */ | 845 | */ |
@@ -1359,8 +899,10 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) | |||
1359 | tp->ucopy.memory = 0; | 899 | tp->ucopy.memory = 0; |
1360 | } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { | 900 | } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { |
1361 | wake_up_interruptible(sk->sk_sleep); | 901 | wake_up_interruptible(sk->sk_sleep); |
1362 | if (!tcp_ack_scheduled(tp)) | 902 | if (!inet_csk_ack_scheduled(sk)) |
1363 | tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); | 903 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
904 | (3 * TCP_RTO_MIN) / 4, | ||
905 | TCP_RTO_MAX); | ||
1364 | } | 906 | } |
1365 | return 1; | 907 | return 1; |
1366 | } | 908 | } |
@@ -1393,9 +935,9 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) | |||
1393 | TCP_INC_STATS(TCP_MIB_ESTABRESETS); | 935 | TCP_INC_STATS(TCP_MIB_ESTABRESETS); |
1394 | 936 | ||
1395 | sk->sk_prot->unhash(sk); | 937 | sk->sk_prot->unhash(sk); |
1396 | if (tcp_sk(sk)->bind_hash && | 938 | if (inet_csk(sk)->icsk_bind_hash && |
1397 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) | 939 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) |
1398 | tcp_put_port(sk); | 940 | inet_put_port(&tcp_hashinfo, sk); |
1399 | /* fall through */ | 941 | /* fall through */ |
1400 | default: | 942 | default: |
1401 | if (oldstate==TCP_ESTABLISHED) | 943 | if (oldstate==TCP_ESTABLISHED) |
@@ -1422,7 +964,7 @@ static __inline__ void tcp_done(struct sock *sk) | |||
1422 | if (!sock_flag(sk, SOCK_DEAD)) | 964 | if (!sock_flag(sk, SOCK_DEAD)) |
1423 | sk->sk_state_change(sk); | 965 | sk->sk_state_change(sk); |
1424 | else | 966 | else |
1425 | tcp_destroy_sock(sk); | 967 | inet_csk_destroy_sock(sk); |
1426 | } | 968 | } |
1427 | 969 | ||
1428 | static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) | 970 | static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) |
@@ -1524,54 +1066,6 @@ static inline int tcp_full_space(const struct sock *sk) | |||
1524 | return tcp_win_from_space(sk->sk_rcvbuf); | 1066 | return tcp_win_from_space(sk->sk_rcvbuf); |
1525 | } | 1067 | } |
1526 | 1068 | ||
1527 | static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, | ||
1528 | struct sock *child) | ||
1529 | { | ||
1530 | reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); | ||
1531 | } | ||
1532 | |||
1533 | static inline void | ||
1534 | tcp_synq_removed(struct sock *sk, struct request_sock *req) | ||
1535 | { | ||
1536 | if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) | ||
1537 | tcp_delete_keepalive_timer(sk); | ||
1538 | } | ||
1539 | |||
1540 | static inline void tcp_synq_added(struct sock *sk) | ||
1541 | { | ||
1542 | if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) | ||
1543 | tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); | ||
1544 | } | ||
1545 | |||
1546 | static inline int tcp_synq_len(struct sock *sk) | ||
1547 | { | ||
1548 | return reqsk_queue_len(&tcp_sk(sk)->accept_queue); | ||
1549 | } | ||
1550 | |||
1551 | static inline int tcp_synq_young(struct sock *sk) | ||
1552 | { | ||
1553 | return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); | ||
1554 | } | ||
1555 | |||
1556 | static inline int tcp_synq_is_full(struct sock *sk) | ||
1557 | { | ||
1558 | return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); | ||
1559 | } | ||
1560 | |||
1561 | static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, | ||
1562 | struct request_sock **prev) | ||
1563 | { | ||
1564 | reqsk_queue_unlink(&tp->accept_queue, req, prev); | ||
1565 | } | ||
1566 | |||
1567 | static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, | ||
1568 | struct request_sock **prev) | ||
1569 | { | ||
1570 | tcp_synq_unlink(tcp_sk(sk), req, prev); | ||
1571 | tcp_synq_removed(sk, req); | ||
1572 | reqsk_free(req); | ||
1573 | } | ||
1574 | |||
1575 | static __inline__ void tcp_openreq_init(struct request_sock *req, | 1069 | static __inline__ void tcp_openreq_init(struct request_sock *req, |
1576 | struct tcp_options_received *rx_opt, | 1070 | struct tcp_options_received *rx_opt, |
1577 | struct sk_buff *skb) | 1071 | struct sk_buff *skb) |
@@ -1593,27 +1087,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req, | |||
1593 | 1087 | ||
1594 | extern void tcp_enter_memory_pressure(void); | 1088 | extern void tcp_enter_memory_pressure(void); |
1595 | 1089 | ||
1596 | extern void tcp_listen_wlock(void); | ||
1597 | |||
1598 | /* - We may sleep inside this lock. | ||
1599 | * - If sleeping is not required (or called from BH), | ||
1600 | * use plain read_(un)lock(&tcp_lhash_lock). | ||
1601 | */ | ||
1602 | |||
1603 | static inline void tcp_listen_lock(void) | ||
1604 | { | ||
1605 | /* read_lock synchronizes to candidates to writers */ | ||
1606 | read_lock(&tcp_lhash_lock); | ||
1607 | atomic_inc(&tcp_lhash_users); | ||
1608 | read_unlock(&tcp_lhash_lock); | ||
1609 | } | ||
1610 | |||
1611 | static inline void tcp_listen_unlock(void) | ||
1612 | { | ||
1613 | if (atomic_dec_and_test(&tcp_lhash_users)) | ||
1614 | wake_up(&tcp_lhash_wait); | ||
1615 | } | ||
1616 | |||
1617 | static inline int keepalive_intvl_when(const struct tcp_sock *tp) | 1090 | static inline int keepalive_intvl_when(const struct tcp_sock *tp) |
1618 | { | 1091 | { |
1619 | return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; | 1092 | return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; |
@@ -1624,12 +1097,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) | |||
1624 | return tp->keepalive_time ? : sysctl_tcp_keepalive_time; | 1097 | return tp->keepalive_time ? : sysctl_tcp_keepalive_time; |
1625 | } | 1098 | } |
1626 | 1099 | ||
1627 | static inline int tcp_fin_time(const struct tcp_sock *tp) | 1100 | static inline int tcp_fin_time(const struct sock *sk) |
1628 | { | 1101 | { |
1629 | int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; | 1102 | int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; |
1103 | const int rto = inet_csk(sk)->icsk_rto; | ||
1630 | 1104 | ||
1631 | if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) | 1105 | if (fin_timeout < (rto << 2) - (rto >> 1)) |
1632 | fin_timeout = (tp->rto<<2) - (tp->rto>>1); | 1106 | fin_timeout = (rto << 2) - (rto >> 1); |
1633 | 1107 | ||
1634 | return fin_timeout; | 1108 | return fin_timeout; |
1635 | } | 1109 | } |
@@ -1658,15 +1132,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int | |||
1658 | return 1; | 1132 | return 1; |
1659 | } | 1133 | } |
1660 | 1134 | ||
1661 | static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst) | ||
1662 | { | ||
1663 | sk->sk_route_caps = dst->dev->features; | ||
1664 | if (sk->sk_route_caps & NETIF_F_TSO) { | ||
1665 | if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) | ||
1666 | sk->sk_route_caps &= ~NETIF_F_TSO; | ||
1667 | } | ||
1668 | } | ||
1669 | |||
1670 | #define TCP_CHECK_TIMER(sk) do { } while (0) | 1135 | #define TCP_CHECK_TIMER(sk) do { } while (0) |
1671 | 1136 | ||
1672 | static inline int tcp_use_frto(const struct sock *sk) | 1137 | static inline int tcp_use_frto(const struct sock *sk) |
@@ -1718,4 +1183,16 @@ struct tcp_iter_state { | |||
1718 | extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); | 1183 | extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); |
1719 | extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); | 1184 | extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); |
1720 | 1185 | ||
1186 | extern struct request_sock_ops tcp_request_sock_ops; | ||
1187 | |||
1188 | extern int tcp_v4_destroy_sock(struct sock *sk); | ||
1189 | |||
1190 | #ifdef CONFIG_PROC_FS | ||
1191 | extern int tcp4_proc_init(void); | ||
1192 | extern void tcp4_proc_exit(void); | ||
1193 | #endif | ||
1194 | |||
1195 | extern void tcp_v4_init(struct net_proto_family *ops); | ||
1196 | extern void tcp_init(void); | ||
1197 | |||
1721 | #endif /* _TCP_H */ | 1198 | #endif /* _TCP_H */ |
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 64980ee8c92a..c6b84397448d 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h | |||
@@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) | |||
88 | * it is surely retransmit. It is not in ECN RFC, | 88 | * it is surely retransmit. It is not in ECN RFC, |
89 | * but Linux follows this rule. */ | 89 | * but Linux follows this rule. */ |
90 | else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) | 90 | else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) |
91 | tcp_enter_quickack_mode(tp); | 91 | tcp_enter_quickack_mode((struct sock *)tp); |
92 | } | 92 | } |
93 | } | 93 | } |
94 | 94 | ||
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h new file mode 100644 index 000000000000..b9d4176b2d15 --- /dev/null +++ b/include/net/tcp_states.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Definitions for the TCP protocol sk_state field. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | #ifndef _LINUX_TCP_STATES_H | ||
14 | #define _LINUX_TCP_STATES_H | ||
15 | |||
16 | enum { | ||
17 | TCP_ESTABLISHED = 1, | ||
18 | TCP_SYN_SENT, | ||
19 | TCP_SYN_RECV, | ||
20 | TCP_FIN_WAIT1, | ||
21 | TCP_FIN_WAIT2, | ||
22 | TCP_TIME_WAIT, | ||
23 | TCP_CLOSE, | ||
24 | TCP_CLOSE_WAIT, | ||
25 | TCP_LAST_ACK, | ||
26 | TCP_LISTEN, | ||
27 | TCP_CLOSING, /* Now a valid state */ | ||
28 | |||
29 | TCP_MAX_STATES /* Leave at the end! */ | ||
30 | }; | ||
31 | |||
32 | #define TCP_STATE_MASK 0xF | ||
33 | |||
34 | #endif /* _LINUX_TCP_STATES_H */ | ||
diff --git a/include/net/udp.h b/include/net/udp.h index ac229b761dbc..107b9d791a1f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h | |||
@@ -94,6 +94,11 @@ struct udp_iter_state { | |||
94 | struct seq_operations seq_ops; | 94 | struct seq_operations seq_ops; |
95 | }; | 95 | }; |
96 | 96 | ||
97 | #ifdef CONFIG_PROC_FS | ||
97 | extern int udp_proc_register(struct udp_seq_afinfo *afinfo); | 98 | extern int udp_proc_register(struct udp_seq_afinfo *afinfo); |
98 | extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); | 99 | extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); |
100 | |||
101 | extern int udp4_proc_init(void); | ||
102 | extern void udp4_proc_exit(void); | ||
103 | #endif | ||
99 | #endif /* _UDP_H */ | 104 | #endif /* _UDP_H */ |
diff --git a/include/net/x25.h b/include/net/x25.h index 8b39b98876e8..fee62ff8c194 100644 --- a/include/net/x25.h +++ b/include/net/x25.h | |||
@@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *); | |||
175 | 175 | ||
176 | /* x25_dev.c */ | 176 | /* x25_dev.c */ |
177 | extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); | 177 | extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); |
178 | extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); | 178 | extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); |
179 | extern void x25_establish_link(struct x25_neigh *); | 179 | extern void x25_establish_link(struct x25_neigh *); |
180 | extern void x25_terminate_link(struct x25_neigh *); | 180 | extern void x25_terminate_link(struct x25_neigh *); |
181 | 181 | ||
diff --git a/include/net/x25device.h b/include/net/x25device.h index d45ae883bd1d..1a318374faef 100644 --- a/include/net/x25device.h +++ b/include/net/x25device.h | |||
@@ -8,7 +8,6 @@ | |||
8 | static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) | 8 | static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) |
9 | { | 9 | { |
10 | skb->mac.raw = skb->data; | 10 | skb->mac.raw = skb->data; |
11 | skb->input_dev = skb->dev = dev; | ||
12 | skb->pkt_type = PACKET_HOST; | 11 | skb->pkt_type = PACKET_HOST; |
13 | 12 | ||
14 | return htons(ETH_P_X25); | 13 | return htons(ETH_P_X25); |
diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 868ef88ef971..a9d0d8c5dfbf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h | |||
@@ -818,7 +818,6 @@ extern void xfrm6_init(void); | |||
818 | extern void xfrm6_fini(void); | 818 | extern void xfrm6_fini(void); |
819 | extern void xfrm_state_init(void); | 819 | extern void xfrm_state_init(void); |
820 | extern void xfrm4_state_init(void); | 820 | extern void xfrm4_state_init(void); |
821 | extern void xfrm4_state_fini(void); | ||
822 | extern void xfrm6_state_init(void); | 821 | extern void xfrm6_state_init(void); |
823 | extern void xfrm6_state_fini(void); | 822 | extern void xfrm6_state_fini(void); |
824 | 823 | ||