aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/act_api.h2
-rw-r--r--include/net/addrconf.h6
-rw-r--r--include/net/af_unix.h15
-rw-r--r--include/net/arp.h2
-rw-r--r--include/net/ax25.h20
-rw-r--r--include/net/bluetooth/bluetooth.h13
-rw-r--r--include/net/bluetooth/hci.h15
-rw-r--r--include/net/bluetooth/hci_core.h2
-rw-r--r--include/net/bluetooth/rfcomm.h14
-rw-r--r--include/net/datalink.h2
-rw-r--r--include/net/dn.h1
-rw-r--r--include/net/icmp.h7
-rw-r--r--include/net/inet6_hashtables.h130
-rw-r--r--include/net/inet_common.h6
-rw-r--r--include/net/inet_connection_sock.h276
-rw-r--r--include/net/inet_hashtables.h427
-rw-r--r--include/net/inet_timewait_sock.h219
-rw-r--r--include/net/ip.h32
-rw-r--r--include/net/ip6_route.h1
-rw-r--r--include/net/ip_fib.h5
-rw-r--r--include/net/ip_vs.h1
-rw-r--r--include/net/ipv6.h39
-rw-r--r--include/net/llc.h8
-rw-r--r--include/net/neighbour.h9
-rw-r--r--include/net/p8022.h5
-rw-r--r--include/net/pkt_cls.h6
-rw-r--r--include/net/psnap.h2
-rw-r--r--include/net/raw.h9
-rw-r--r--include/net/rawv6.h5
-rw-r--r--include/net/request_sock.h14
-rw-r--r--include/net/route.h6
-rw-r--r--include/net/sctp/constants.h2
-rw-r--r--include/net/sock.h118
-rw-r--r--include/net/tcp.h723
-rw-r--r--include/net/tcp_ecn.h2
-rw-r--r--include/net/tcp_states.h34
-rw-r--r--include/net/udp.h5
-rw-r--r--include/net/x25.h2
-rw-r--r--include/net/x25device.h1
-rw-r--r--include/net/xfrm.h1
40 files changed, 1483 insertions, 704 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index ed00a995f576..b55eb7c7f033 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -63,7 +63,7 @@ struct tc_action_ops
63 __u32 type; /* TBD to match kind */ 63 __u32 type; /* TBD to match kind */
64 __u32 capab; /* capabilities includes 4 bit version */ 64 __u32 capab; /* capabilities includes 4 bit version */
65 struct module *owner; 65 struct module *owner;
66 int (*act)(struct sk_buff **, struct tc_action *); 66 int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *);
67 int (*get_stats)(struct sk_buff *, struct tc_action *); 67 int (*get_stats)(struct sk_buff *, struct tc_action *);
68 int (*dump)(struct sk_buff *, struct tc_action *,int , int); 68 int (*dump)(struct sk_buff *, struct tc_action *,int , int);
69 int (*cleanup)(struct tc_action *, int bind); 69 int (*cleanup)(struct tc_action *, int bind);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index a0ed93672176..750e2508dd90 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -45,6 +45,7 @@ struct prefix_info {
45 45
46#ifdef __KERNEL__ 46#ifdef __KERNEL__
47 47
48#include <linux/config.h>
48#include <linux/netdevice.h> 49#include <linux/netdevice.h>
49#include <net/if_inet6.h> 50#include <net/if_inet6.h>
50#include <net/ipv6.h> 51#include <net/ipv6.h>
@@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr)
238 addr->s6_addr32[3] == htonl(0x00000002)); 239 addr->s6_addr32[3] == htonl(0x00000002));
239} 240}
240 241
242#ifdef CONFIG_PROC_FS
243extern int if6_proc_init(void);
244extern void if6_proc_exit(void);
245#endif
246
241#endif 247#endif
242#endif 248#endif
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index b60b3846b9d1..b5d785ab4a0e 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -1,5 +1,11 @@
1#ifndef __LINUX_NET_AFUNIX_H 1#ifndef __LINUX_NET_AFUNIX_H
2#define __LINUX_NET_AFUNIX_H 2#define __LINUX_NET_AFUNIX_H
3
4#include <linux/config.h>
5#include <linux/socket.h>
6#include <linux/un.h>
7#include <net/sock.h>
8
3extern void unix_inflight(struct file *fp); 9extern void unix_inflight(struct file *fp);
4extern void unix_notinflight(struct file *fp); 10extern void unix_notinflight(struct file *fp);
5extern void unix_gc(void); 11extern void unix_gc(void);
@@ -74,5 +80,14 @@ struct unix_sock {
74 wait_queue_head_t peer_wait; 80 wait_queue_head_t peer_wait;
75}; 81};
76#define unix_sk(__sk) ((struct unix_sock *)__sk) 82#define unix_sk(__sk) ((struct unix_sock *)__sk)
83
84#ifdef CONFIG_SYSCTL
85extern int sysctl_unix_max_dgram_qlen;
86extern void unix_sysctl_register(void);
87extern void unix_sysctl_unregister(void);
88#else
89static inline void unix_sysctl_register(void) {}
90static inline void unix_sysctl_unregister(void) {}
91#endif
77#endif 92#endif
78#endif 93#endif
diff --git a/include/net/arp.h b/include/net/arp.h
index a1f09fad6a52..a13e30c35f42 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl;
11 11
12extern void arp_init(void); 12extern void arp_init(void);
13extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, 13extern int arp_rcv(struct sk_buff *skb, struct net_device *dev,
14 struct packet_type *pt); 14 struct packet_type *pt, struct net_device *orig_dev);
15extern int arp_find(unsigned char *haddr, struct sk_buff *skb); 15extern int arp_find(unsigned char *haddr, struct sk_buff *skb);
16extern int arp_ioctl(unsigned int cmd, void __user *arg); 16extern int arp_ioctl(unsigned int cmd, void __user *arg);
17extern void arp_send(int type, int ptype, u32 dest_ip, 17extern void arp_send(int type, int ptype, u32 dest_ip,
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 828a3a93dda1..926eed543023 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -139,11 +139,25 @@ enum {
139#define AX25_DEF_DS_TIMEOUT (3 * 60 * HZ) /* DAMA timeout 3 minutes */ 139#define AX25_DEF_DS_TIMEOUT (3 * 60 * HZ) /* DAMA timeout 3 minutes */
140 140
141typedef struct ax25_uid_assoc { 141typedef struct ax25_uid_assoc {
142 struct ax25_uid_assoc *next; 142 struct hlist_node uid_node;
143 atomic_t refcount;
143 uid_t uid; 144 uid_t uid;
144 ax25_address call; 145 ax25_address call;
145} ax25_uid_assoc; 146} ax25_uid_assoc;
146 147
148#define ax25_uid_for_each(__ax25, node, list) \
149 hlist_for_each_entry(__ax25, node, list, uid_node)
150
151#define ax25_uid_hold(ax25) \
152 atomic_inc(&((ax25)->refcount))
153
154static inline void ax25_uid_put(ax25_uid_assoc *assoc)
155{
156 if (atomic_dec_and_test(&assoc->refcount)) {
157 kfree(assoc);
158 }
159}
160
147typedef struct { 161typedef struct {
148 ax25_address calls[AX25_MAX_DIGIS]; 162 ax25_address calls[AX25_MAX_DIGIS];
149 unsigned char repeated[AX25_MAX_DIGIS]; 163 unsigned char repeated[AX25_MAX_DIGIS];
@@ -302,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int);
302 316
303/* ax25_in.c */ 317/* ax25_in.c */
304extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); 318extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *);
305extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); 319extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
306 320
307/* ax25_ip.c */ 321/* ax25_ip.c */
308extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); 322extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int);
@@ -376,7 +390,7 @@ extern unsigned long ax25_display_timer(struct timer_list *);
376 390
377/* ax25_uid.c */ 391/* ax25_uid.c */
378extern int ax25_uid_policy; 392extern int ax25_uid_policy;
379extern ax25_address *ax25_findbyuid(uid_t); 393extern ax25_uid_assoc *ax25_findbyuid(uid_t);
380extern int ax25_uid_ioctl(int, struct sockaddr_ax25 *); 394extern int ax25_uid_ioctl(int, struct sockaddr_ax25 *);
381extern struct file_operations ax25_uid_fops; 395extern struct file_operations ax25_uid_fops;
382extern void ax25_uid_free(void); 396extern void ax25_uid_free(void);
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 42a84c53678b..6dfa4a61ffd0 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -57,12 +57,6 @@
57#define BT_DBG(fmt, arg...) printk(KERN_INFO "%s: " fmt "\n" , __FUNCTION__ , ## arg) 57#define BT_DBG(fmt, arg...) printk(KERN_INFO "%s: " fmt "\n" , __FUNCTION__ , ## arg)
58#define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __FUNCTION__ , ## arg) 58#define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __FUNCTION__ , ## arg)
59 59
60#ifdef HCI_DATA_DUMP
61#define BT_DMP(buf, len) bt_dump(__FUNCTION__, buf, len)
62#else
63#define BT_DMP(D...)
64#endif
65
66extern struct proc_dir_entry *proc_bt; 60extern struct proc_dir_entry *proc_bt;
67 61
68/* Connection and socket states */ 62/* Connection and socket states */
@@ -137,11 +131,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
137 131
138/* Skb helpers */ 132/* Skb helpers */
139struct bt_skb_cb { 133struct bt_skb_cb {
140 int incoming; 134 __u8 pkt_type;
135 __u8 incoming;
141}; 136};
142#define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) 137#define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb))
143 138
144static inline struct sk_buff *bt_skb_alloc(unsigned int len, int how) 139static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how)
145{ 140{
146 struct sk_buff *skb; 141 struct sk_buff *skb;
147 142
@@ -174,8 +169,6 @@ static inline int skb_frags_no(struct sk_buff *skb)
174 return n; 169 return n;
175} 170}
176 171
177void bt_dump(char *pref, __u8 *buf, int count);
178
179int bt_err(__u16 code); 172int bt_err(__u16 code);
180 173
181#endif /* __BLUETOOTH_H */ 174#endif /* __BLUETOOTH_H */
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 6f0706f4af68..371e7d3f2e6f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -453,6 +453,15 @@ struct inquiry_info_with_rssi {
453 __u16 clock_offset; 453 __u16 clock_offset;
454 __s8 rssi; 454 __s8 rssi;
455} __attribute__ ((packed)); 455} __attribute__ ((packed));
456struct inquiry_info_with_rssi_and_pscan_mode {
457 bdaddr_t bdaddr;
458 __u8 pscan_rep_mode;
459 __u8 pscan_period_mode;
460 __u8 pscan_mode;
461 __u8 dev_class[3];
462 __u16 clock_offset;
463 __s8 rssi;
464} __attribute__ ((packed));
456 465
457#define HCI_EV_CONN_COMPLETE 0x03 466#define HCI_EV_CONN_COMPLETE 0x03
458struct hci_ev_conn_complete { 467struct hci_ev_conn_complete {
@@ -584,6 +593,12 @@ struct hci_ev_clock_offset {
584 __u16 clock_offset; 593 __u16 clock_offset;
585} __attribute__ ((packed)); 594} __attribute__ ((packed));
586 595
596#define HCI_EV_PSCAN_REP_MODE 0x20
597struct hci_ev_pscan_rep_mode {
598 bdaddr_t bdaddr;
599 __u8 pscan_rep_mode;
600} __attribute__ ((packed));
601
587/* Internal events generated by Bluetooth stack */ 602/* Internal events generated by Bluetooth stack */
588#define HCI_EV_STACK_INTERNAL 0xFD 603#define HCI_EV_STACK_INTERNAL 0xFD
589struct hci_ev_stack_internal { 604struct hci_ev_stack_internal {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 6d63a47c731b..7f933f302078 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb)
404 bt_cb(skb)->incoming = 1; 404 bt_cb(skb)->incoming = 1;
405 405
406 /* Time stamp */ 406 /* Time stamp */
407 do_gettimeofday(&skb->stamp); 407 __net_timestamp(skb);
408 408
409 /* Queue frame for rx task */ 409 /* Queue frame for rx task */
410 skb_queue_tail(&hdev->rx_q, skb); 410 skb_queue_tail(&hdev->rx_q, skb);
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 13669bad00b3..ffea9d54071f 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -80,9 +80,9 @@
80#define RFCOMM_RPN_STOP_15 1 80#define RFCOMM_RPN_STOP_15 1
81 81
82#define RFCOMM_RPN_PARITY_NONE 0x0 82#define RFCOMM_RPN_PARITY_NONE 0x0
83#define RFCOMM_RPN_PARITY_ODD 0x4 83#define RFCOMM_RPN_PARITY_ODD 0x1
84#define RFCOMM_RPN_PARITY_EVEN 0x5 84#define RFCOMM_RPN_PARITY_EVEN 0x3
85#define RFCOMM_RPN_PARITY_MARK 0x6 85#define RFCOMM_RPN_PARITY_MARK 0x5
86#define RFCOMM_RPN_PARITY_SPACE 0x7 86#define RFCOMM_RPN_PARITY_SPACE 0x7
87 87
88#define RFCOMM_RPN_FLOW_NONE 0x00 88#define RFCOMM_RPN_FLOW_NONE 0x00
@@ -223,8 +223,14 @@ struct rfcomm_dlc {
223#define RFCOMM_CFC_DISABLED 0 223#define RFCOMM_CFC_DISABLED 0
224#define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS 224#define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS
225 225
226/* ---- RFCOMM SEND RPN ---- */
227int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
228 u8 bit_rate, u8 data_bits, u8 stop_bits,
229 u8 parity, u8 flow_ctrl_settings,
230 u8 xon_char, u8 xoff_char, u16 param_mask);
231
226/* ---- RFCOMM DLCs (channels) ---- */ 232/* ---- RFCOMM DLCs (channels) ---- */
227struct rfcomm_dlc *rfcomm_dlc_alloc(int prio); 233struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio);
228void rfcomm_dlc_free(struct rfcomm_dlc *d); 234void rfcomm_dlc_free(struct rfcomm_dlc *d);
229int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); 235int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel);
230int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); 236int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason);
diff --git a/include/net/datalink.h b/include/net/datalink.h
index 5797ba3d2eb5..deb7ca75db48 100644
--- a/include/net/datalink.h
+++ b/include/net/datalink.h
@@ -9,7 +9,7 @@ struct datalink_proto {
9 unsigned short header_length; 9 unsigned short header_length;
10 10
11 int (*rcvfunc)(struct sk_buff *, struct net_device *, 11 int (*rcvfunc)(struct sk_buff *, struct net_device *,
12 struct packet_type *); 12 struct packet_type *, struct net_device *);
13 int (*request)(struct datalink_proto *, struct sk_buff *, 13 int (*request)(struct datalink_proto *, struct sk_buff *,
14 unsigned char *); 14 unsigned char *);
15 struct list_head node; 15 struct list_head node;
diff --git a/include/net/dn.h b/include/net/dn.h
index 5551c46db397..c1dbbd222793 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/dn.h> 4#include <linux/dn.h>
5#include <net/sock.h> 5#include <net/sock.h>
6#include <net/tcp.h>
6#include <asm/byteorder.h> 7#include <asm/byteorder.h>
7 8
8typedef unsigned short dn_address; 9typedef unsigned short dn_address;
diff --git a/include/net/icmp.h b/include/net/icmp.h
index e5ef0d15fb45..6cdebeee5f96 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk)
57 return (struct raw_sock *)sk; 57 return (struct raw_sock *)sk;
58} 58}
59 59
60extern int sysctl_icmp_echo_ignore_all;
61extern int sysctl_icmp_echo_ignore_broadcasts;
62extern int sysctl_icmp_ignore_bogus_error_responses;
63extern int sysctl_icmp_errors_use_inbound_ifaddr;
64extern int sysctl_icmp_ratelimit;
65extern int sysctl_icmp_ratemask;
66
60#endif /* _ICMP_H */ 67#endif /* _ICMP_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
new file mode 100644
index 000000000000..03df3b157960
--- /dev/null
+++ b/include/net/inet6_hashtables.h
@@ -0,0 +1,130 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Authors: Lotsa people, from code originally in tcp
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#ifndef _INET6_HASHTABLES_H
15#define _INET6_HASHTABLES_H
16
17#include <linux/config.h>
18
19#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
20#include <linux/in6.h>
21#include <linux/ipv6.h>
22#include <linux/types.h>
23
24#include <net/ipv6.h>
25
26struct inet_hashinfo;
27
28/* I have no idea if this is a good hash for v6 or not. -DaveM */
29static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
30 const struct in6_addr *faddr, const u16 fport,
31 const int ehash_size)
32{
33 int hashent = (lport ^ fport);
34
35 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
36 hashent ^= hashent >> 16;
37 hashent ^= hashent >> 8;
38 return (hashent & (ehash_size - 1));
39}
40
41static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size)
42{
43 const struct inet_sock *inet = inet_sk(sk);
44 const struct ipv6_pinfo *np = inet6_sk(sk);
45 const struct in6_addr *laddr = &np->rcv_saddr;
46 const struct in6_addr *faddr = &np->daddr;
47 const __u16 lport = inet->num;
48 const __u16 fport = inet->dport;
49 return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size);
50}
51
52/*
53 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
54 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
55 *
56 * The sockhash lock must be held as a reader here.
57 */
58static inline struct sock *
59 __inet6_lookup_established(struct inet_hashinfo *hashinfo,
60 const struct in6_addr *saddr,
61 const u16 sport,
62 const struct in6_addr *daddr,
63 const u16 hnum,
64 const int dif)
65{
66 struct sock *sk;
67 const struct hlist_node *node;
68 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
69 /* Optimize here for direct hit, only listening connections can
70 * have wildcards anyways.
71 */
72 const int hash = inet6_ehashfn(daddr, hnum, saddr, sport,
73 hashinfo->ehash_size);
74 struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
75
76 read_lock(&head->lock);
77 sk_for_each(sk, node, &head->chain) {
78 /* For IPV6 do the cheaper port and family tests first. */
79 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
80 goto hit; /* You sunk my battleship! */
81 }
82 /* Must check for a TIME_WAIT'er before going to listener hash. */
83 sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
84 const struct inet_timewait_sock *tw = inet_twsk(sk);
85
86 if(*((__u32 *)&(tw->tw_dport)) == ports &&
87 sk->sk_family == PF_INET6) {
88 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
89
90 if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
91 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
92 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
93 goto hit;
94 }
95 }
96 read_unlock(&head->lock);
97 return NULL;
98
99hit:
100 sock_hold(sk);
101 read_unlock(&head->lock);
102 return sk;
103}
104
105extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
106 const struct in6_addr *daddr,
107 const unsigned short hnum,
108 const int dif);
109
110static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo,
111 const struct in6_addr *saddr,
112 const u16 sport,
113 const struct in6_addr *daddr,
114 const u16 hnum,
115 const int dif)
116{
117 struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport,
118 daddr, hnum, dif);
119 if (sk)
120 return sk;
121
122 return inet6_lookup_listener(hashinfo, daddr, hnum, dif);
123}
124
125extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
126 const struct in6_addr *saddr, const u16 sport,
127 const struct in6_addr *daddr, const u16 dport,
128 const int dif);
129#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
130#endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index fbc1f4d140d8..f943306ce5ff 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -8,6 +8,11 @@ extern struct proto_ops inet_dgram_ops;
8 * INET4 prototypes used by INET6 8 * INET4 prototypes used by INET6
9 */ 9 */
10 10
11struct msghdr;
12struct sock;
13struct sockaddr;
14struct socket;
15
11extern void inet_remove_sock(struct sock *sk1); 16extern void inet_remove_sock(struct sock *sk1);
12extern void inet_put_sock(unsigned short num, 17extern void inet_put_sock(unsigned short num,
13 struct sock *sk); 18 struct sock *sk);
@@ -29,7 +34,6 @@ extern unsigned int inet_poll(struct file * file, struct socket *sock, struct p
29extern int inet_listen(struct socket *sock, int backlog); 34extern int inet_listen(struct socket *sock, int backlog);
30 35
31extern void inet_sock_destruct(struct sock *sk); 36extern void inet_sock_destruct(struct sock *sk);
32extern atomic_t inet_sock_nr;
33 37
34extern int inet_bind(struct socket *sock, 38extern int inet_bind(struct socket *sock,
35 struct sockaddr *uaddr, int addr_len); 39 struct sockaddr *uaddr, int addr_len);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
new file mode 100644
index 000000000000..651f824c1008
--- /dev/null
+++ b/include/net/inet_connection_sock.h
@@ -0,0 +1,276 @@
1/*
2 * NET Generic infrastructure for INET connection oriented protocols.
3 *
4 * Definitions for inet_connection_sock
5 *
6 * Authors: Many people, see the TCP sources
7 *
8 * From code originally in TCP
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15#ifndef _INET_CONNECTION_SOCK_H
16#define _INET_CONNECTION_SOCK_H
17
18#include <linux/ip.h>
19#include <linux/string.h>
20#include <linux/timer.h>
21#include <net/request_sock.h>
22
23#define INET_CSK_DEBUG 1
24
25/* Cancel timers, when they are not required. */
26#undef INET_CSK_CLEAR_TIMERS
27
28struct inet_bind_bucket;
29struct inet_hashinfo;
30struct tcp_congestion_ops;
31
32/** inet_connection_sock - INET connection oriented sock
33 *
34 * @icsk_accept_queue: FIFO of established children
35 * @icsk_bind_hash: Bind node
36 * @icsk_timeout: Timeout
37 * @icsk_retransmit_timer: Resend (no ack)
38 * @icsk_rto: Retransmit timeout
39 * @icsk_ca_ops Pluggable congestion control hook
40 * @icsk_ca_state: Congestion control state
41 * @icsk_retransmits: Number of unrecovered [RTO] timeouts
42 * @icsk_pending: Scheduled timer event
43 * @icsk_backoff: Backoff
44 * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries
45 * @icsk_probes_out: unanswered 0 window probes
46 * @icsk_ack: Delayed ACK control data
47 */
48struct inet_connection_sock {
49 /* inet_sock has to be the first member! */
50 struct inet_sock icsk_inet;
51 struct request_sock_queue icsk_accept_queue;
52 struct inet_bind_bucket *icsk_bind_hash;
53 unsigned long icsk_timeout;
54 struct timer_list icsk_retransmit_timer;
55 struct timer_list icsk_delack_timer;
56 __u32 icsk_rto;
57 struct tcp_congestion_ops *icsk_ca_ops;
58 __u8 icsk_ca_state;
59 __u8 icsk_retransmits;
60 __u8 icsk_pending;
61 __u8 icsk_backoff;
62 __u8 icsk_syn_retries;
63 __u8 icsk_probes_out;
64 /* 2 BYTES HOLE, TRY TO PACK! */
65 struct {
66 __u8 pending; /* ACK is pending */
67 __u8 quick; /* Scheduled number of quick acks */
68 __u8 pingpong; /* The session is interactive */
69 __u8 blocked; /* Delayed ACK was blocked by socket lock */
70 __u32 ato; /* Predicted tick of soft clock */
71 unsigned long timeout; /* Currently scheduled timeout */
72 __u32 lrcvtime; /* timestamp of last received data packet */
73 __u16 last_seg_size; /* Size of last incoming segment */
74 __u16 rcv_mss; /* MSS used for delayed ACK decisions */
75 } icsk_ack;
76 u32 icsk_ca_priv[16];
77#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
78};
79
80#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
81#define ICSK_TIME_DACK 2 /* Delayed ack timer */
82#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */
83#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */
84
85static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
86{
87 return (struct inet_connection_sock *)sk;
88}
89
90static inline void *inet_csk_ca(const struct sock *sk)
91{
92 return (void *)inet_csk(sk)->icsk_ca_priv;
93}
94
95extern struct sock *inet_csk_clone(struct sock *sk,
96 const struct request_sock *req,
97 const unsigned int __nocast priority);
98
99enum inet_csk_ack_state_t {
100 ICSK_ACK_SCHED = 1,
101 ICSK_ACK_TIMER = 2,
102 ICSK_ACK_PUSHED = 4
103};
104
105extern void inet_csk_init_xmit_timers(struct sock *sk,
106 void (*retransmit_handler)(unsigned long),
107 void (*delack_handler)(unsigned long),
108 void (*keepalive_handler)(unsigned long));
109extern void inet_csk_clear_xmit_timers(struct sock *sk);
110
111static inline void inet_csk_schedule_ack(struct sock *sk)
112{
113 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED;
114}
115
116static inline int inet_csk_ack_scheduled(const struct sock *sk)
117{
118 return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED;
119}
120
121static inline void inet_csk_delack_init(struct sock *sk)
122{
123 memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
124}
125
126extern void inet_csk_delete_keepalive_timer(struct sock *sk);
127extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
128
129#ifdef INET_CSK_DEBUG
130extern const char inet_csk_timer_bug_msg[];
131#endif
132
133static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
134{
135 struct inet_connection_sock *icsk = inet_csk(sk);
136
137 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
138 icsk->icsk_pending = 0;
139#ifdef INET_CSK_CLEAR_TIMERS
140 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
141#endif
142 } else if (what == ICSK_TIME_DACK) {
143 icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
144#ifdef INET_CSK_CLEAR_TIMERS
145 sk_stop_timer(sk, &icsk->icsk_delack_timer);
146#endif
147 }
148#ifdef INET_CSK_DEBUG
149 else {
150 pr_debug("%s", inet_csk_timer_bug_msg);
151 }
152#endif
153}
154
155/*
156 * Reset the retransmission timer
157 */
158static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
159 unsigned long when,
160 const unsigned long max_when)
161{
162 struct inet_connection_sock *icsk = inet_csk(sk);
163
164 if (when > max_when) {
165#ifdef INET_CSK_DEBUG
166 pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
167 sk, what, when, current_text_addr());
168#endif
169 when = max_when;
170 }
171
172 if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
173 icsk->icsk_pending = what;
174 icsk->icsk_timeout = jiffies + when;
175 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
176 } else if (what == ICSK_TIME_DACK) {
177 icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
178 icsk->icsk_ack.timeout = jiffies + when;
179 sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
180 }
181#ifdef INET_CSK_DEBUG
182 else {
183 pr_debug("%s", inet_csk_timer_bug_msg);
184 }
185#endif
186}
187
188extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
189
190extern struct request_sock *inet_csk_search_req(const struct sock *sk,
191 struct request_sock ***prevp,
192 const __u16 rport,
193 const __u32 raddr,
194 const __u32 laddr);
195extern int inet_csk_get_port(struct inet_hashinfo *hashinfo,
196 struct sock *sk, unsigned short snum);
197
198extern struct dst_entry* inet_csk_route_req(struct sock *sk,
199 const struct request_sock *req);
200
201static inline void inet_csk_reqsk_queue_add(struct sock *sk,
202 struct request_sock *req,
203 struct sock *child)
204{
205 reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
206}
207
208extern void inet_csk_reqsk_queue_hash_add(struct sock *sk,
209 struct request_sock *req,
210 const unsigned timeout);
211
212static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
213 struct request_sock *req)
214{
215 if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
216 inet_csk_delete_keepalive_timer(sk);
217}
218
219static inline void inet_csk_reqsk_queue_added(struct sock *sk,
220 const unsigned long timeout)
221{
222 if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
223 inet_csk_reset_keepalive_timer(sk, timeout);
224}
225
226static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
227{
228 return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue);
229}
230
231static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
232{
233 return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
234}
235
236static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
237{
238 return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
239}
240
241static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
242 struct request_sock *req,
243 struct request_sock **prev)
244{
245 reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
246}
247
248static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
249 struct request_sock *req,
250 struct request_sock **prev)
251{
252 inet_csk_reqsk_queue_unlink(sk, req, prev);
253 inet_csk_reqsk_queue_removed(sk, req);
254 reqsk_free(req);
255}
256
257extern void inet_csk_reqsk_queue_prune(struct sock *parent,
258 const unsigned long interval,
259 const unsigned long timeout,
260 const unsigned long max_rto);
261
262extern void inet_csk_destroy_sock(struct sock *sk);
263
264/*
265 * LISTEN is a special case for poll..
266 */
267static inline unsigned int inet_csk_listen_poll(const struct sock *sk)
268{
269 return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
270 (POLLIN | POLLRDNORM) : 0;
271}
272
273extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries);
274extern void inet_csk_listen_stop(struct sock *sk);
275
276#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
new file mode 100644
index 000000000000..646b6ea7fe26
--- /dev/null
+++ b/include/net/inet_hashtables.h
@@ -0,0 +1,427 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Authors: Lotsa people, from code originally in tcp
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#ifndef _INET_HASHTABLES_H
15#define _INET_HASHTABLES_H
16
17#include <linux/config.h>
18
19#include <linux/interrupt.h>
20#include <linux/ipv6.h>
21#include <linux/list.h>
22#include <linux/slab.h>
23#include <linux/socket.h>
24#include <linux/spinlock.h>
25#include <linux/types.h>
26#include <linux/wait.h>
27
28#include <net/inet_connection_sock.h>
29#include <net/route.h>
30#include <net/sock.h>
31#include <net/tcp_states.h>
32
33#include <asm/atomic.h>
34#include <asm/byteorder.h>
35
36/* This is for all connections with a full identity, no wildcards.
37 * New scheme, half the table is for TIME_WAIT, the other half is
38 * for the rest. I'll experiment with dynamic table growth later.
39 */
40struct inet_ehash_bucket {
41 rwlock_t lock;
42 struct hlist_head chain;
43} __attribute__((__aligned__(8)));
44
45/* There are a few simple rules, which allow for local port reuse by
46 * an application. In essence:
47 *
48 * 1) Sockets bound to different interfaces may share a local port.
49 * Failing that, goto test 2.
50 * 2) If all sockets have sk->sk_reuse set, and none of them are in
51 * TCP_LISTEN state, the port may be shared.
52 * Failing that, goto test 3.
53 * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
54 * address, and none of them are the same, the port may be
55 * shared.
56 * Failing this, the port cannot be shared.
57 *
58 * The interesting point, is test #2. This is what an FTP server does
59 * all day. To optimize this case we use a specific flag bit defined
60 * below. As we add sockets to a bind bucket list, we perform a
61 * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
62 * As long as all sockets added to a bind bucket pass this test,
63 * the flag bit will be set.
64 * The resulting situation is that tcp_v[46]_verify_bind() can just check
65 * for this flag bit, if it is set and the socket trying to bind has
66 * sk->sk_reuse set, we don't even have to walk the owners list at all,
67 * we return that it is ok to bind this socket to the requested local port.
68 *
69 * Sounds like a lot of work, but it is worth it. In a more naive
70 * implementation (ie. current FreeBSD etc.) the entire list of ports
71 * must be walked for each data port opened by an ftp server. Needless
72 * to say, this does not scale at all. With a couple thousand FTP
73 * users logged onto your box, isn't it nice to know that new data
74 * ports are created in O(1) time? I thought so. ;-) -DaveM
75 */
76struct inet_bind_bucket {
77 unsigned short port;
78 signed short fastreuse;
79 struct hlist_node node;
80 struct hlist_head owners;
81};
82
83#define inet_bind_bucket_for_each(tb, node, head) \
84 hlist_for_each_entry(tb, node, head, node)
85
86struct inet_bind_hashbucket {
87 spinlock_t lock;
88 struct hlist_head chain;
89};
90
91/* This is for listening sockets, thus all sockets which possess wildcards. */
92#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
93
94struct inet_hashinfo {
95 /* This is for sockets with full identity only. Sockets here will
96 * always be without wildcards and will have the following invariant:
97 *
98 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
99 *
100 * First half of the table is for sockets not in TIME_WAIT, second half
101 * is for TIME_WAIT sockets only.
102 */
103 struct inet_ehash_bucket *ehash;
104
105 /* Ok, let's try this, I give up, we do need a local binding
106 * TCP hash as well as the others for fast bind/connect.
107 */
108 struct inet_bind_hashbucket *bhash;
109
110 int bhash_size;
111 int ehash_size;
112
113 /* All sockets in TCP_LISTEN state will be in here. This is the only
114 * table where wildcard'd TCP sockets can exist. Hash function here
115 * is just local port number.
116 */
117 struct hlist_head listening_hash[INET_LHTABLE_SIZE];
118
119 /* All the above members are written once at bootup and
120 * never written again _or_ are predominantly read-access.
121 *
122 * Now align to a new cache line as all the following members
123 * are often dirty.
124 */
125 rwlock_t lhash_lock ____cacheline_aligned;
126 atomic_t lhash_users;
127 wait_queue_head_t lhash_wait;
128 spinlock_t portalloc_lock;
129 kmem_cache_t *bind_bucket_cachep;
130 int port_rover;
131};
132
133static inline int inet_ehashfn(const __u32 laddr, const __u16 lport,
134 const __u32 faddr, const __u16 fport,
135 const int ehash_size)
136{
137 int h = (laddr ^ lport) ^ (faddr ^ fport);
138 h ^= h >> 16;
139 h ^= h >> 8;
140 return h & (ehash_size - 1);
141}
142
143static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size)
144{
145 const struct inet_sock *inet = inet_sk(sk);
146 const __u32 laddr = inet->rcv_saddr;
147 const __u16 lport = inet->num;
148 const __u32 faddr = inet->daddr;
149 const __u16 fport = inet->dport;
150
151 return inet_ehashfn(laddr, lport, faddr, fport, ehash_size);
152}
153
154extern struct inet_bind_bucket *
155 inet_bind_bucket_create(kmem_cache_t *cachep,
156 struct inet_bind_hashbucket *head,
157 const unsigned short snum);
158extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
159 struct inet_bind_bucket *tb);
160
161static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
162{
163 return lport & (bhash_size - 1);
164}
165
166extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
167 const unsigned short snum);
168
169/* These can have wildcards, don't try too hard. */
170static inline int inet_lhashfn(const unsigned short num)
171{
172 return num & (INET_LHTABLE_SIZE - 1);
173}
174
175static inline int inet_sk_listen_hashfn(const struct sock *sk)
176{
177 return inet_lhashfn(inet_sk(sk)->num);
178}
179
180/* Caller must disable local BH processing. */
181static inline void __inet_inherit_port(struct inet_hashinfo *table,
182 struct sock *sk, struct sock *child)
183{
184 const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
185 struct inet_bind_hashbucket *head = &table->bhash[bhash];
186 struct inet_bind_bucket *tb;
187
188 spin_lock(&head->lock);
189 tb = inet_csk(sk)->icsk_bind_hash;
190 sk_add_bind_node(child, &tb->owners);
191 inet_csk(child)->icsk_bind_hash = tb;
192 spin_unlock(&head->lock);
193}
194
195static inline void inet_inherit_port(struct inet_hashinfo *table,
196 struct sock *sk, struct sock *child)
197{
198 local_bh_disable();
199 __inet_inherit_port(table, sk, child);
200 local_bh_enable();
201}
202
203extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);
204
205extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);
206
207/*
208 * - We may sleep inside this lock.
209 * - If sleeping is not required (or called from BH),
210 * use plain read_(un)lock(&inet_hashinfo.lhash_lock).
211 */
212static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
213{
214 /* read_lock synchronizes to candidates to writers */
215 read_lock(&hashinfo->lhash_lock);
216 atomic_inc(&hashinfo->lhash_users);
217 read_unlock(&hashinfo->lhash_lock);
218}
219
220static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
221{
222 if (atomic_dec_and_test(&hashinfo->lhash_users))
223 wake_up(&hashinfo->lhash_wait);
224}
225
226static inline void __inet_hash(struct inet_hashinfo *hashinfo,
227 struct sock *sk, const int listen_possible)
228{
229 struct hlist_head *list;
230 rwlock_t *lock;
231
232 BUG_TRAP(sk_unhashed(sk));
233 if (listen_possible && sk->sk_state == TCP_LISTEN) {
234 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
235 lock = &hashinfo->lhash_lock;
236 inet_listen_wlock(hashinfo);
237 } else {
238 sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size);
239 list = &hashinfo->ehash[sk->sk_hashent].chain;
240 lock = &hashinfo->ehash[sk->sk_hashent].lock;
241 write_lock(lock);
242 }
243 __sk_add_node(sk, list);
244 sock_prot_inc_use(sk->sk_prot);
245 write_unlock(lock);
246 if (listen_possible && sk->sk_state == TCP_LISTEN)
247 wake_up(&hashinfo->lhash_wait);
248}
249
250static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
251{
252 if (sk->sk_state != TCP_CLOSE) {
253 local_bh_disable();
254 __inet_hash(hashinfo, sk, 1);
255 local_bh_enable();
256 }
257}
258
259static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
260{
261 rwlock_t *lock;
262
263 if (sk_unhashed(sk))
264 goto out;
265
266 if (sk->sk_state == TCP_LISTEN) {
267 local_bh_disable();
268 inet_listen_wlock(hashinfo);
269 lock = &hashinfo->lhash_lock;
270 } else {
271 struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent];
272 lock = &head->lock;
273 write_lock_bh(&head->lock);
274 }
275
276 if (__sk_del_node_init(sk))
277 sock_prot_dec_use(sk->sk_prot);
278 write_unlock_bh(lock);
279out:
280 if (sk->sk_state == TCP_LISTEN)
281 wake_up(&hashinfo->lhash_wait);
282}
283
284static inline int inet_iif(const struct sk_buff *skb)
285{
286 return ((struct rtable *)skb->dst)->rt_iif;
287}
288
289extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
290 const u32 daddr,
291 const unsigned short hnum,
292 const int dif);
293
294/* Optimize the common listener case. */
295static inline struct sock *
296 inet_lookup_listener(struct inet_hashinfo *hashinfo,
297 const u32 daddr,
298 const unsigned short hnum, const int dif)
299{
300 struct sock *sk = NULL;
301 const struct hlist_head *head;
302
303 read_lock(&hashinfo->lhash_lock);
304 head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
305 if (!hlist_empty(head)) {
306 const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
307
308 if (inet->num == hnum && !sk->sk_node.next &&
309 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
310 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
311 !sk->sk_bound_dev_if)
312 goto sherry_cache;
313 sk = __inet_lookup_listener(head, daddr, hnum, dif);
314 }
315 if (sk) {
316sherry_cache:
317 sock_hold(sk);
318 }
319 read_unlock(&hashinfo->lhash_lock);
320 return sk;
321}
322
323/* Socket demux engine toys. */
324#ifdef __BIG_ENDIAN
325#define INET_COMBINED_PORTS(__sport, __dport) \
326 (((__u32)(__sport) << 16) | (__u32)(__dport))
327#else /* __LITTLE_ENDIAN */
328#define INET_COMBINED_PORTS(__sport, __dport) \
329 (((__u32)(__dport) << 16) | (__u32)(__sport))
330#endif
331
332#if (BITS_PER_LONG == 64)
333#ifdef __BIG_ENDIAN
334#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
335 const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr));
336#else /* __LITTLE_ENDIAN */
337#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
338 const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
339#endif /* __BIG_ENDIAN */
340#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
341 (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \
342 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
343 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
344#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
345 (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \
346 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
347 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
348#else /* 32-bit arch */
349#define INET_ADDR_COOKIE(__name, __saddr, __daddr)
350#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \
351 ((inet_sk(__sk)->daddr == (__saddr)) && \
352 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
353 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
354 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
355#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \
356 ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \
357 (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \
358 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
359 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
360#endif /* 64-bit arch */
361
362/*
363 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
364 * not check it for lookups anymore, thanks Alexey. -DaveM
365 *
366 * Local BH must be disabled here.
367 */
368static inline struct sock *
369 __inet_lookup_established(struct inet_hashinfo *hashinfo,
370 const u32 saddr, const u16 sport,
371 const u32 daddr, const u16 hnum,
372 const int dif)
373{
374 INET_ADDR_COOKIE(acookie, saddr, daddr)
375 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
376 struct sock *sk;
377 const struct hlist_node *node;
378 /* Optimize here for direct hit, only listening connections can
379 * have wildcards anyways.
380 */
381 const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size);
382 struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
383
384 read_lock(&head->lock);
385 sk_for_each(sk, node, &head->chain) {
386 if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
387 goto hit; /* You sunk my battleship! */
388 }
389
390 /* Must check for a TIME_WAIT'er before going to listener hash. */
391 sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
392 if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
393 goto hit;
394 }
395 sk = NULL;
396out:
397 read_unlock(&head->lock);
398 return sk;
399hit:
400 sock_hold(sk);
401 goto out;
402}
403
404static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
405 const u32 saddr, const u16 sport,
406 const u32 daddr, const u16 hnum,
407 const int dif)
408{
409 struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
410 hnum, dif);
411 return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif);
412}
413
414static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
415 const u32 saddr, const u16 sport,
416 const u32 daddr, const u16 dport,
417 const int dif)
418{
419 struct sock *sk;
420
421 local_bh_disable();
422 sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
423 local_bh_enable();
424
425 return sk;
426}
427#endif /* _INET_HASHTABLES_H */
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
new file mode 100644
index 000000000000..3b070352e869
--- /dev/null
+++ b/include/net/inet_timewait_sock.h
@@ -0,0 +1,219 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Definitions for a generic INET TIMEWAIT sock
7 *
8 * From code originally in net/tcp.h
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15#ifndef _INET_TIMEWAIT_SOCK_
16#define _INET_TIMEWAIT_SOCK_
17
18#include <linux/config.h>
19
20#include <linux/ip.h>
21#include <linux/list.h>
22#include <linux/timer.h>
23#include <linux/types.h>
24#include <linux/workqueue.h>
25
26#include <net/sock.h>
27#include <net/tcp_states.h>
28
29#include <asm/atomic.h>
30
31struct inet_hashinfo;
32
33#define INET_TWDR_RECYCLE_SLOTS_LOG 5
34#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG)
35
36/*
37 * If time > 4sec, it is "slow" path, no recycling is required,
38 * so that we select tick to get range about 4 seconds.
39 */
40#if HZ <= 16 || HZ > 4096
41# error Unsupported: HZ <= 16 or HZ > 4096
42#elif HZ <= 32
43# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
44#elif HZ <= 64
45# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
46#elif HZ <= 128
47# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
48#elif HZ <= 256
49# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
50#elif HZ <= 512
51# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
52#elif HZ <= 1024
53# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
54#elif HZ <= 2048
55# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
56#else
57# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
58#endif
59
60/* TIME_WAIT reaping mechanism. */
61#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
62
63#define INET_TWDR_TWKILL_QUOTA 100
64
65struct inet_timewait_death_row {
66 /* Short-time timewait calendar */
67 int twcal_hand;
68 int twcal_jiffie;
69 struct timer_list twcal_timer;
70 struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS];
71
72 spinlock_t death_lock;
73 int tw_count;
74 int period;
75 u32 thread_slots;
76 struct work_struct twkill_work;
77 struct timer_list tw_timer;
78 int slot;
79 struct hlist_head cells[INET_TWDR_TWKILL_SLOTS];
80 struct inet_hashinfo *hashinfo;
81 int sysctl_tw_recycle;
82 int sysctl_max_tw_buckets;
83};
84
85extern void inet_twdr_hangman(unsigned long data);
86extern void inet_twdr_twkill_work(void *data);
87extern void inet_twdr_twcal_tick(unsigned long data);
88
89#if (BITS_PER_LONG == 64)
90#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
91#else
92#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4
93#endif
94
95struct inet_bind_bucket;
96
97/*
98 * This is a TIME_WAIT sock. It works around the memory consumption
99 * problems of sockets in such a state on heavily loaded servers, but
100 * without violating the protocol specification.
101 */
102struct inet_timewait_sock {
103 /*
104 * Now struct sock also uses sock_common, so please just
105 * don't add nothing before this first member (__tw_common) --acme
106 */
107 struct sock_common __tw_common;
108#define tw_family __tw_common.skc_family
109#define tw_state __tw_common.skc_state
110#define tw_reuse __tw_common.skc_reuse
111#define tw_bound_dev_if __tw_common.skc_bound_dev_if
112#define tw_node __tw_common.skc_node
113#define tw_bind_node __tw_common.skc_bind_node
114#define tw_refcnt __tw_common.skc_refcnt
115#define tw_prot __tw_common.skc_prot
116 volatile unsigned char tw_substate;
117 /* 3 bits hole, try to pack */
118 unsigned char tw_rcv_wscale;
119 /* Socket demultiplex comparisons on incoming packets. */
120 /* these five are in inet_sock */
121 __u16 tw_sport;
122 __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES)));
123 __u32 tw_rcv_saddr;
124 __u16 tw_dport;
125 __u16 tw_num;
126 /* And these are ours. */
127 __u8 tw_ipv6only:1;
128 /* 31 bits hole, try to pack */
129 int tw_hashent;
130 int tw_timeout;
131 unsigned long tw_ttd;
132 struct inet_bind_bucket *tw_tb;
133 struct hlist_node tw_death_node;
134};
135
136static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
137 struct hlist_head *list)
138{
139 hlist_add_head(&tw->tw_node, list);
140}
141
142static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
143 struct hlist_head *list)
144{
145 hlist_add_head(&tw->tw_bind_node, list);
146}
147
148static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw)
149{
150 return tw->tw_death_node.pprev != NULL;
151}
152
153static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw)
154{
155 tw->tw_death_node.pprev = NULL;
156}
157
158static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
159{
160 __hlist_del(&tw->tw_death_node);
161 inet_twsk_dead_node_init(tw);
162}
163
164static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
165{
166 if (inet_twsk_dead_hashed(tw)) {
167 __inet_twsk_del_dead_node(tw);
168 return 1;
169 }
170 return 0;
171}
172
173#define inet_twsk_for_each(tw, node, head) \
174 hlist_for_each_entry(tw, node, head, tw_node)
175
176#define inet_twsk_for_each_inmate(tw, node, jail) \
177 hlist_for_each_entry(tw, node, jail, tw_death_node)
178
179#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \
180 hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
181
182static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
183{
184 return (struct inet_timewait_sock *)sk;
185}
186
187static inline u32 inet_rcv_saddr(const struct sock *sk)
188{
189 return likely(sk->sk_state != TCP_TIME_WAIT) ?
190 inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr;
191}
192
193static inline void inet_twsk_put(struct inet_timewait_sock *tw)
194{
195 if (atomic_dec_and_test(&tw->tw_refcnt)) {
196#ifdef SOCK_REFCNT_DEBUG
197 printk(KERN_DEBUG "%s timewait_sock %p released\n",
198 tw->tw_prot->name, tw);
199#endif
200 kmem_cache_free(tw->tw_prot->twsk_slab, tw);
201 }
202}
203
204extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
205 const int state);
206
207extern void __inet_twsk_kill(struct inet_timewait_sock *tw,
208 struct inet_hashinfo *hashinfo);
209
210extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw,
211 struct sock *sk,
212 struct inet_hashinfo *hashinfo);
213
214extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
215 struct inet_timewait_death_row *twdr,
216 const int timeo, const int timewait_len);
217extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
218 struct inet_timewait_death_row *twdr);
219#endif /* _INET_TIMEWAIT_SOCK_ */
diff --git a/include/net/ip.h b/include/net/ip.h
index 32360bbe143f..e4563bbee6ea 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
86 u32 saddr, u32 daddr, 86 u32 saddr, u32 daddr,
87 struct ip_options *opt); 87 struct ip_options *opt);
88extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, 88extern int ip_rcv(struct sk_buff *skb, struct net_device *dev,
89 struct packet_type *pt); 89 struct packet_type *pt, struct net_device *orig_dev);
90extern int ip_local_deliver(struct sk_buff *skb); 90extern int ip_local_deliver(struct sk_buff *skb);
91extern int ip_mr_input(struct sk_buff *skb); 91extern int ip_mr_input(struct sk_buff *skb);
92extern int ip_output(struct sk_buff *skb); 92extern int ip_output(struct sk_buff *skb);
@@ -140,8 +140,6 @@ struct ip_reply_arg {
140void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, 140void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
141 unsigned int len); 141 unsigned int len);
142 142
143extern int ip_finish_output(struct sk_buff *skb);
144
145struct ipv4_config 143struct ipv4_config
146{ 144{
147 int log_martians; 145 int log_martians;
@@ -165,6 +163,24 @@ extern int sysctl_local_port_range[2];
165extern int sysctl_ip_default_ttl; 163extern int sysctl_ip_default_ttl;
166extern int sysctl_ip_nonlocal_bind; 164extern int sysctl_ip_nonlocal_bind;
167 165
166/* From ip_fragment.c */
167extern int sysctl_ipfrag_high_thresh;
168extern int sysctl_ipfrag_low_thresh;
169extern int sysctl_ipfrag_time;
170extern int sysctl_ipfrag_secret_interval;
171
172/* From inetpeer.c */
173extern int inet_peer_threshold;
174extern int inet_peer_minttl;
175extern int inet_peer_maxttl;
176extern int inet_peer_gc_mintime;
177extern int inet_peer_gc_maxtime;
178
179/* From ip_output.c */
180extern int sysctl_ip_dynaddr;
181
182extern void ipfrag_init(void);
183
168#ifdef CONFIG_INET 184#ifdef CONFIG_INET
169/* The function in 2.2 was invalid, producing wrong result for 185/* The function in 2.2 was invalid, producing wrong result for
170 * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ 186 * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */
@@ -319,7 +335,10 @@ extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 da
319extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); 335extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
320extern void ip_options_fragment(struct sk_buff *skb); 336extern void ip_options_fragment(struct sk_buff *skb);
321extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); 337extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb);
322extern int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user); 338extern int ip_options_get(struct ip_options **optp,
339 unsigned char *data, int optlen);
340extern int ip_options_get_from_user(struct ip_options **optp,
341 unsigned char __user *data, int optlen);
323extern void ip_options_undo(struct ip_options * opt); 342extern void ip_options_undo(struct ip_options * opt);
324extern void ip_forward_options(struct sk_buff *skb); 343extern void ip_forward_options(struct sk_buff *skb);
325extern int ip_options_rcv_srr(struct sk_buff *skb); 344extern int ip_options_rcv_srr(struct sk_buff *skb);
@@ -350,5 +369,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
350 void __user *oldval, size_t __user *oldlenp, 369 void __user *oldval, size_t __user *oldlenp,
351 void __user *newval, size_t newlen, 370 void __user *newval, size_t newlen,
352 void **context); 371 void **context);
372#ifdef CONFIG_PROC_FS
373extern int ip_misc_proc_init(void);
374#endif
375
376extern struct ctl_table ipv4_table[];
353 377
354#endif /* _IP_H */ 378#endif /* _IP_H */
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index f920706d526b..1f2e428ca364 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -12,7 +12,6 @@
12#include <net/flow.h> 12#include <net/flow.h>
13#include <net/ip6_fib.h> 13#include <net/ip6_fib.h>
14#include <net/sock.h> 14#include <net/sock.h>
15#include <linux/tcp.h>
16#include <linux/ip.h> 15#include <linux/ip.h>
17#include <linux/ipv6.h> 16#include <linux/ipv6.h>
18 17
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a4208a336ac0..14de4ebd1211 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res)
295#endif 295#endif
296} 296}
297 297
298#ifdef CONFIG_PROC_FS
299extern int fib_proc_init(void);
300extern void fib_proc_exit(void);
301#endif
302
298#endif /* _NET_FIB_H */ 303#endif /* _NET_FIB_H */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 52da5d26617a..7a3c43711a17 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -255,7 +255,6 @@ struct ip_vs_daemon_user {
255#include <asm/atomic.h> /* for struct atomic_t */ 255#include <asm/atomic.h> /* for struct atomic_t */
256#include <linux/netdevice.h> /* for struct neighbour */ 256#include <linux/netdevice.h> /* for struct neighbour */
257#include <net/dst.h> /* for struct dst_entry */ 257#include <net/dst.h> /* for struct dst_entry */
258#include <net/tcp.h>
259#include <net/udp.h> 258#include <net/udp.h>
260#include <linux/compiler.h> 259#include <linux/compiler.h>
261 260
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 69324465e8b3..3203eaff4bd4 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -104,6 +104,7 @@ struct frag_hdr {
104 104
105#ifdef __KERNEL__ 105#ifdef __KERNEL__
106 106
107#include <linux/config.h>
107#include <net/sock.h> 108#include <net/sock.h>
108 109
109/* sysctls */ 110/* sysctls */
@@ -145,7 +146,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
145#define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) 146#define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field)
146#define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) 147#define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field)
147#define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) 148#define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field)
148extern atomic_t inet6_sock_nr;
149 149
150int snmp6_register_dev(struct inet6_dev *idev); 150int snmp6_register_dev(struct inet6_dev *idev);
151int snmp6_unregister_dev(struct inet6_dev *idev); 151int snmp6_unregister_dev(struct inet6_dev *idev);
@@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a)
346 346
347extern int ipv6_rcv(struct sk_buff *skb, 347extern int ipv6_rcv(struct sk_buff *skb,
348 struct net_device *dev, 348 struct net_device *dev,
349 struct packet_type *pt); 349 struct packet_type *pt,
350 struct net_device *orig_dev);
350 351
351/* 352/*
352 * upper-layer output functions 353 * upper-layer output functions
@@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh;
464extern int sysctl_ip6frag_time; 465extern int sysctl_ip6frag_time;
465extern int sysctl_ip6frag_secret_interval; 466extern int sysctl_ip6frag_secret_interval;
466 467
467#endif /* __KERNEL__ */ 468extern struct proto_ops inet6_stream_ops;
468#endif /* _NET_IPV6_H */ 469extern struct proto_ops inet6_dgram_ops;
470
471extern int ip6_mc_source(int add, int omode, struct sock *sk,
472 struct group_source_req *pgsr);
473extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
474extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
475 struct group_filter __user *optval,
476 int __user *optlen);
477
478#ifdef CONFIG_PROC_FS
479extern int ac6_proc_init(void);
480extern void ac6_proc_exit(void);
481extern int raw6_proc_init(void);
482extern void raw6_proc_exit(void);
483extern int tcp6_proc_init(void);
484extern void tcp6_proc_exit(void);
485extern int udp6_proc_init(void);
486extern void udp6_proc_exit(void);
487extern int ipv6_misc_proc_init(void);
488extern void ipv6_misc_proc_exit(void);
489
490extern struct rt6_statistics rt6_stats;
491#endif
469 492
493#ifdef CONFIG_SYSCTL
494extern ctl_table ipv6_route_table[];
495extern ctl_table ipv6_icmp_table[];
470 496
497extern void ipv6_sysctl_register(void);
498extern void ipv6_sysctl_unregister(void);
499#endif
471 500
501#endif /* __KERNEL__ */
502#endif /* _NET_IPV6_H */
diff --git a/include/net/llc.h b/include/net/llc.h
index c9aed2a8b4e2..71769a5aeef3 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -46,7 +46,8 @@ struct llc_sap {
46 unsigned char f_bit; 46 unsigned char f_bit;
47 int (*rcv_func)(struct sk_buff *skb, 47 int (*rcv_func)(struct sk_buff *skb,
48 struct net_device *dev, 48 struct net_device *dev,
49 struct packet_type *pt); 49 struct packet_type *pt,
50 struct net_device *orig_dev);
50 struct llc_addr laddr; 51 struct llc_addr laddr;
51 struct list_head node; 52 struct list_head node;
52 struct { 53 struct {
@@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock;
64extern unsigned char llc_station_mac_sa[ETH_ALEN]; 65extern unsigned char llc_station_mac_sa[ETH_ALEN];
65 66
66extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, 67extern int llc_rcv(struct sk_buff *skb, struct net_device *dev,
67 struct packet_type *pt); 68 struct packet_type *pt, struct net_device *orig_dev);
68 69
69extern int llc_mac_hdr_init(struct sk_buff *skb, 70extern int llc_mac_hdr_init(struct sk_buff *skb,
70 unsigned char *sa, unsigned char *da); 71 unsigned char *sa, unsigned char *da);
@@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb));
78extern struct llc_sap *llc_sap_open(unsigned char lsap, 79extern struct llc_sap *llc_sap_open(unsigned char lsap,
79 int (*rcv)(struct sk_buff *skb, 80 int (*rcv)(struct sk_buff *skb,
80 struct net_device *dev, 81 struct net_device *dev,
81 struct packet_type *pt)); 82 struct packet_type *pt,
83 struct net_device *orig_dev));
82extern void llc_sap_close(struct llc_sap *sap); 84extern void llc_sap_close(struct llc_sap *sap);
83 85
84extern struct llc_sap *llc_sap_find(unsigned char sap_value); 86extern struct llc_sap *llc_sap_find(unsigned char sap_value);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 89809891e5ab..34c07731933d 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,
363 return neigh_create(tbl, pkey, dev); 363 return neigh_create(tbl, pkey, dev);
364} 364}
365 365
366#define LOCALLY_ENQUEUED -2 366struct neighbour_cb {
367 unsigned long sched_next;
368 unsigned int flags;
369};
370
371#define LOCALLY_ENQUEUED 0x1
372
373#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
367 374
368#endif 375#endif
369#endif 376#endif
diff --git a/include/net/p8022.h b/include/net/p8022.h
index 3c99a86c3581..42e9fac51b31 100644
--- a/include/net/p8022.h
+++ b/include/net/p8022.h
@@ -4,7 +4,10 @@ extern struct datalink_proto *
4 register_8022_client(unsigned char type, 4 register_8022_client(unsigned char type,
5 int (*func)(struct sk_buff *skb, 5 int (*func)(struct sk_buff *skb,
6 struct net_device *dev, 6 struct net_device *dev,
7 struct packet_type *pt)); 7 struct packet_type *pt,
8 struct net_device *orig_dev));
8extern void unregister_8022_client(struct datalink_proto *proto); 9extern void unregister_8022_client(struct datalink_proto *proto);
9 10
11extern struct datalink_proto *make_8023_client(void);
12extern void destroy_8023_client(struct datalink_proto *dl);
10#endif 13#endif
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4abda6aec05a..b902d24a3256 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv)
352static inline int 352static inline int
353tcf_match_indev(struct sk_buff *skb, char *indev) 353tcf_match_indev(struct sk_buff *skb, char *indev)
354{ 354{
355 if (0 != indev[0]) { 355 if (indev[0]) {
356 if (NULL == skb->input_dev) 356 if (!skb->input_dev)
357 return 0; 357 return 0;
358 else if (0 != strcmp(indev, skb->input_dev->name)) 358 if (strcmp(indev, skb->input_dev->name))
359 return 0; 359 return 0;
360 } 360 }
361 361
diff --git a/include/net/psnap.h b/include/net/psnap.h
index 9c94e8f98b36..b2e01cc3fc8a 100644
--- a/include/net/psnap.h
+++ b/include/net/psnap.h
@@ -1,7 +1,7 @@
1#ifndef _NET_PSNAP_H 1#ifndef _NET_PSNAP_H
2#define _NET_PSNAP_H 2#define _NET_PSNAP_H
3 3
4extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); 4extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev));
5extern void unregister_snap_client(struct datalink_proto *proto); 5extern void unregister_snap_client(struct datalink_proto *proto);
6 6
7#endif 7#endif
diff --git a/include/net/raw.h b/include/net/raw.h
index 1c411c45587a..f47917469b12 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -17,10 +17,10 @@
17#ifndef _RAW_H 17#ifndef _RAW_H
18#define _RAW_H 18#define _RAW_H
19 19
20#include <linux/config.h>
20 21
21extern struct proto raw_prot; 22extern struct proto raw_prot;
22 23
23
24extern void raw_err(struct sock *, struct sk_buff *, u32 info); 24extern void raw_err(struct sock *, struct sk_buff *, u32 info);
25extern int raw_rcv(struct sock *, struct sk_buff *); 25extern int raw_rcv(struct sock *, struct sk_buff *);
26 26
@@ -37,6 +37,11 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
37 unsigned long raddr, unsigned long laddr, 37 unsigned long raddr, unsigned long laddr,
38 int dif); 38 int dif);
39 39
40extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); 40extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
41
42#ifdef CONFIG_PROC_FS
43extern int raw_proc_init(void);
44extern void raw_proc_exit(void);
45#endif
41 46
42#endif /* _RAW_H */ 47#endif /* _RAW_H */
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index 23fd9a6a221a..14476a71725e 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -7,10 +7,11 @@
7extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; 7extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
8extern rwlock_t raw_v6_lock; 8extern rwlock_t raw_v6_lock;
9 9
10extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); 10extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr);
11 11
12extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, 12extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
13 struct in6_addr *loc_addr, struct in6_addr *rmt_addr); 13 struct in6_addr *loc_addr, struct in6_addr *rmt_addr,
14 int dif);
14 15
15extern int rawv6_rcv(struct sock *sk, 16extern int rawv6_rcv(struct sock *sk,
16 struct sk_buff *skb); 17 struct sk_buff *skb);
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 72fd6f5e86b1..b52cc52ffe39 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -89,6 +89,7 @@ struct listen_sock {
89 int qlen_young; 89 int qlen_young;
90 int clock_hand; 90 int clock_hand;
91 u32 hash_rnd; 91 u32 hash_rnd;
92 u32 nr_table_entries;
92 struct request_sock *syn_table[0]; 93 struct request_sock *syn_table[0];
93}; 94};
94 95
@@ -96,6 +97,7 @@ struct listen_sock {
96 * 97 *
97 * @rskq_accept_head - FIFO head of established children 98 * @rskq_accept_head - FIFO head of established children
98 * @rskq_accept_tail - FIFO tail of established children 99 * @rskq_accept_tail - FIFO tail of established children
100 * @rskq_defer_accept - User waits for some data after accept()
99 * @syn_wait_lock - serializer 101 * @syn_wait_lock - serializer
100 * 102 *
101 * %syn_wait_lock is necessary only to avoid proc interface having to grab the main 103 * %syn_wait_lock is necessary only to avoid proc interface having to grab the main
@@ -111,6 +113,8 @@ struct request_sock_queue {
111 struct request_sock *rskq_accept_head; 113 struct request_sock *rskq_accept_head;
112 struct request_sock *rskq_accept_tail; 114 struct request_sock *rskq_accept_tail;
113 rwlock_t syn_wait_lock; 115 rwlock_t syn_wait_lock;
116 u8 rskq_defer_accept;
117 /* 3 bytes hole, try to pack */
114 struct listen_sock *listen_opt; 118 struct listen_sock *listen_opt;
115}; 119};
116 120
@@ -129,11 +133,13 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock
129 return lopt; 133 return lopt;
130} 134}
131 135
132static inline void reqsk_queue_destroy(struct request_sock_queue *queue) 136static inline void __reqsk_queue_destroy(struct request_sock_queue *queue)
133{ 137{
134 kfree(reqsk_queue_yank_listen_sk(queue)); 138 kfree(reqsk_queue_yank_listen_sk(queue));
135} 139}
136 140
141extern void reqsk_queue_destroy(struct request_sock_queue *queue);
142
137static inline struct request_sock * 143static inline struct request_sock *
138 reqsk_queue_yank_acceptq(struct request_sock_queue *queue) 144 reqsk_queue_yank_acceptq(struct request_sock_queue *queue)
139{ 145{
@@ -221,17 +227,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue)
221 return prev_qlen; 227 return prev_qlen;
222} 228}
223 229
224static inline int reqsk_queue_len(struct request_sock_queue *queue) 230static inline int reqsk_queue_len(const struct request_sock_queue *queue)
225{ 231{
226 return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; 232 return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0;
227} 233}
228 234
229static inline int reqsk_queue_len_young(struct request_sock_queue *queue) 235static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
230{ 236{
231 return queue->listen_opt->qlen_young; 237 return queue->listen_opt->qlen_young;
232} 238}
233 239
234static inline int reqsk_queue_is_full(struct request_sock_queue *queue) 240static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
235{ 241{
236 return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; 242 return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
237} 243}
diff --git a/include/net/route.h b/include/net/route.h
index c3cd069a9aca..dbe79ca67d31 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -105,10 +105,6 @@ struct rt_cache_stat
105 unsigned int out_hlist_search; 105 unsigned int out_hlist_search;
106}; 106};
107 107
108extern struct rt_cache_stat *rt_cache_stat;
109#define RT_CACHE_STAT_INC(field) \
110 (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
111
112extern struct ip_rt_acct *ip_rt_acct; 108extern struct ip_rt_acct *ip_rt_acct;
113 109
114struct in_device; 110struct in_device;
@@ -199,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt)
199 return rt->peer; 195 return rt->peer;
200} 196}
201 197
198extern ctl_table ipv4_route_table[];
199
202#endif /* _ROUTE_H */ 200#endif /* _ROUTE_H */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 5999e5684bbf..c51541ee0247 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -47,10 +47,10 @@
47#ifndef __sctp_constants_h__ 47#ifndef __sctp_constants_h__
48#define __sctp_constants_h__ 48#define __sctp_constants_h__
49 49
50#include <linux/tcp.h> /* For TCP states used in sctp_sock_state_t */
51#include <linux/sctp.h> 50#include <linux/sctp.h>
52#include <linux/ipv6.h> /* For ipv6hdr. */ 51#include <linux/ipv6.h> /* For ipv6hdr. */
53#include <net/sctp/user.h> 52#include <net/sctp/user.h>
53#include <net/tcp_states.h> /* For TCP states used in sctp_sock_state_t */
54 54
55/* Value used for stream negotiation. */ 55/* Value used for stream negotiation. */
56enum { SCTP_MAX_STREAM = 0xffff }; 56enum { SCTP_MAX_STREAM = 0xffff };
diff --git a/include/net/sock.h b/include/net/sock.h
index a1042d08becd..312cb25cbd18 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \
88} while(0) 88} while(0)
89 89
90struct sock; 90struct sock;
91struct proto;
91 92
92/** 93/**
93 * struct sock_common - minimal network layer representation of sockets 94 * struct sock_common - minimal network layer representation of sockets
@@ -98,10 +99,11 @@ struct sock;
98 * @skc_node: main hash linkage for various protocol lookup tables 99 * @skc_node: main hash linkage for various protocol lookup tables
99 * @skc_bind_node: bind hash linkage for various protocol lookup tables 100 * @skc_bind_node: bind hash linkage for various protocol lookup tables
100 * @skc_refcnt: reference count 101 * @skc_refcnt: reference count
102 * @skc_prot: protocol handlers inside a network family
101 * 103 *
102 * This is the minimal network layer representation of sockets, the header 104 * This is the minimal network layer representation of sockets, the header
103 * for struct sock and struct tcp_tw_bucket. 105 * for struct sock and struct inet_timewait_sock.
104 */ 106 */
105struct sock_common { 107struct sock_common {
106 unsigned short skc_family; 108 unsigned short skc_family;
107 volatile unsigned char skc_state; 109 volatile unsigned char skc_state;
@@ -110,11 +112,12 @@ struct sock_common {
110 struct hlist_node skc_node; 112 struct hlist_node skc_node;
111 struct hlist_node skc_bind_node; 113 struct hlist_node skc_bind_node;
112 atomic_t skc_refcnt; 114 atomic_t skc_refcnt;
115 struct proto *skc_prot;
113}; 116};
114 117
115/** 118/**
116 * struct sock - network layer representation of sockets 119 * struct sock - network layer representation of sockets
117 * @__sk_common: shared layout with tcp_tw_bucket 120 * @__sk_common: shared layout with inet_timewait_sock
118 * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN 121 * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
119 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 122 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
120 * @sk_lock: synchronizer 123 * @sk_lock: synchronizer
@@ -136,11 +139,10 @@ struct sock_common {
136 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets 139 * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
137 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) 140 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
138 * @sk_lingertime: %SO_LINGER l_linger setting 141 * @sk_lingertime: %SO_LINGER l_linger setting
139 * @sk_hashent: hash entry in several tables (e.g. tcp_ehash) 142 * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash)
140 * @sk_backlog: always used with the per-socket spinlock held 143 * @sk_backlog: always used with the per-socket spinlock held
141 * @sk_callback_lock: used with the callbacks in the end of this struct 144 * @sk_callback_lock: used with the callbacks in the end of this struct
142 * @sk_error_queue: rarely used 145 * @sk_error_queue: rarely used
143 * @sk_prot: protocol handlers inside a network family
144 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) 146 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance)
145 * @sk_err: last error 147 * @sk_err: last error
146 * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' 148 * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out'
@@ -173,7 +175,7 @@ struct sock_common {
173 */ 175 */
174struct sock { 176struct sock {
175 /* 177 /*
176 * Now struct tcp_tw_bucket also uses sock_common, so please just 178 * Now struct inet_timewait_sock also uses sock_common, so please just
177 * don't add nothing before this first member (__sk_common) --acme 179 * don't add nothing before this first member (__sk_common) --acme
178 */ 180 */
179 struct sock_common __sk_common; 181 struct sock_common __sk_common;
@@ -184,6 +186,7 @@ struct sock {
184#define sk_node __sk_common.skc_node 186#define sk_node __sk_common.skc_node
185#define sk_bind_node __sk_common.skc_bind_node 187#define sk_bind_node __sk_common.skc_bind_node
186#define sk_refcnt __sk_common.skc_refcnt 188#define sk_refcnt __sk_common.skc_refcnt
189#define sk_prot __sk_common.skc_prot
187 unsigned char sk_shutdown : 2, 190 unsigned char sk_shutdown : 2,
188 sk_no_check : 2, 191 sk_no_check : 2,
189 sk_userlocks : 4; 192 sk_userlocks : 4;
@@ -218,7 +221,6 @@ struct sock {
218 struct sk_buff *tail; 221 struct sk_buff *tail;
219 } sk_backlog; 222 } sk_backlog;
220 struct sk_buff_head sk_error_queue; 223 struct sk_buff_head sk_error_queue;
221 struct proto *sk_prot;
222 struct proto *sk_prot_creator; 224 struct proto *sk_prot_creator;
223 rwlock_t sk_callback_lock; 225 rwlock_t sk_callback_lock;
224 int sk_err, 226 int sk_err,
@@ -253,28 +255,28 @@ struct sock {
253/* 255/*
254 * Hashed lists helper routines 256 * Hashed lists helper routines
255 */ 257 */
256static inline struct sock *__sk_head(struct hlist_head *head) 258static inline struct sock *__sk_head(const struct hlist_head *head)
257{ 259{
258 return hlist_entry(head->first, struct sock, sk_node); 260 return hlist_entry(head->first, struct sock, sk_node);
259} 261}
260 262
261static inline struct sock *sk_head(struct hlist_head *head) 263static inline struct sock *sk_head(const struct hlist_head *head)
262{ 264{
263 return hlist_empty(head) ? NULL : __sk_head(head); 265 return hlist_empty(head) ? NULL : __sk_head(head);
264} 266}
265 267
266static inline struct sock *sk_next(struct sock *sk) 268static inline struct sock *sk_next(const struct sock *sk)
267{ 269{
268 return sk->sk_node.next ? 270 return sk->sk_node.next ?
269 hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; 271 hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL;
270} 272}
271 273
272static inline int sk_unhashed(struct sock *sk) 274static inline int sk_unhashed(const struct sock *sk)
273{ 275{
274 return hlist_unhashed(&sk->sk_node); 276 return hlist_unhashed(&sk->sk_node);
275} 277}
276 278
277static inline int sk_hashed(struct sock *sk) 279static inline int sk_hashed(const struct sock *sk)
278{ 280{
279 return sk->sk_node.pprev != NULL; 281 return sk->sk_node.pprev != NULL;
280} 282}
@@ -384,6 +386,11 @@ enum sock_flags {
384 SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ 386 SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
385}; 387};
386 388
389static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
390{
391 nsk->sk_flags = osk->sk_flags;
392}
393
387static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) 394static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
388{ 395{
389 __set_bit(flag, &sk->sk_flags); 396 __set_bit(flag, &sk->sk_flags);
@@ -549,6 +556,10 @@ struct proto {
549 kmem_cache_t *slab; 556 kmem_cache_t *slab;
550 unsigned int obj_size; 557 unsigned int obj_size;
551 558
559 kmem_cache_t *twsk_slab;
560 unsigned int twsk_obj_size;
561 atomic_t *orphan_count;
562
552 struct request_sock_ops *rsk_prot; 563 struct request_sock_ops *rsk_prot;
553 564
554 struct module *owner; 565 struct module *owner;
@@ -556,7 +567,9 @@ struct proto {
556 char name[32]; 567 char name[32];
557 568
558 struct list_head node; 569 struct list_head node;
559 570#ifdef SOCK_REFCNT_DEBUG
571 atomic_t socks;
572#endif
560 struct { 573 struct {
561 int inuse; 574 int inuse;
562 u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; 575 u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
@@ -566,6 +579,31 @@ struct proto {
566extern int proto_register(struct proto *prot, int alloc_slab); 579extern int proto_register(struct proto *prot, int alloc_slab);
567extern void proto_unregister(struct proto *prot); 580extern void proto_unregister(struct proto *prot);
568 581
582#ifdef SOCK_REFCNT_DEBUG
583static inline void sk_refcnt_debug_inc(struct sock *sk)
584{
585 atomic_inc(&sk->sk_prot->socks);
586}
587
588static inline void sk_refcnt_debug_dec(struct sock *sk)
589{
590 atomic_dec(&sk->sk_prot->socks);
591 printk(KERN_DEBUG "%s socket %p released, %d are still alive\n",
592 sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
593}
594
595static inline void sk_refcnt_debug_release(const struct sock *sk)
596{
597 if (atomic_read(&sk->sk_refcnt) != 1)
598 printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
599 sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
600}
601#else /* SOCK_REFCNT_DEBUG */
602#define sk_refcnt_debug_inc(sk) do { } while (0)
603#define sk_refcnt_debug_dec(sk) do { } while (0)
604#define sk_refcnt_debug_release(sk) do { } while (0)
605#endif /* SOCK_REFCNT_DEBUG */
606
569/* Called with local bh disabled */ 607/* Called with local bh disabled */
570static __inline__ void sock_prot_inc_use(struct proto *prot) 608static __inline__ void sock_prot_inc_use(struct proto *prot)
571{ 609{
@@ -577,6 +615,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot)
577 prot->stats[smp_processor_id()].inuse--; 615 prot->stats[smp_processor_id()].inuse--;
578} 616}
579 617
618/* With per-bucket locks this operation is not-atomic, so that
619 * this version is not worse.
620 */
621static inline void __sk_prot_rehash(struct sock *sk)
622{
623 sk->sk_prot->unhash(sk);
624 sk->sk_prot->hash(sk);
625}
626
580/* About 10 seconds */ 627/* About 10 seconds */
581#define SOCK_DESTROY_TIME (10*HZ) 628#define SOCK_DESTROY_TIME (10*HZ)
582 629
@@ -688,6 +735,8 @@ extern struct sock *sk_alloc(int family,
688 unsigned int __nocast priority, 735 unsigned int __nocast priority,
689 struct proto *prot, int zero_it); 736 struct proto *prot, int zero_it);
690extern void sk_free(struct sock *sk); 737extern void sk_free(struct sock *sk);
738extern struct sock *sk_clone(const struct sock *sk,
739 const unsigned int __nocast priority);
691 740
692extern struct sk_buff *sock_wmalloc(struct sock *sk, 741extern struct sk_buff *sock_wmalloc(struct sock *sk,
693 unsigned long size, int force, 742 unsigned long size, int force,
@@ -981,6 +1030,16 @@ sk_dst_check(struct sock *sk, u32 cookie)
981 return dst; 1030 return dst;
982} 1031}
983 1032
1033static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1034{
1035 __sk_dst_set(sk, dst);
1036 sk->sk_route_caps = dst->dev->features;
1037 if (sk->sk_route_caps & NETIF_F_TSO) {
1038 if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
1039 sk->sk_route_caps &= ~NETIF_F_TSO;
1040 }
1041}
1042
984static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) 1043static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
985{ 1044{
986 sk->sk_wmem_queued += skb->truesize; 1045 sk->sk_wmem_queued += skb->truesize;
@@ -1141,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
1141 int hdr_len; 1200 int hdr_len;
1142 1201
1143 hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); 1202 hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
1144 skb = alloc_skb(size + hdr_len, gfp); 1203 skb = alloc_skb_fclone(size + hdr_len, gfp);
1145 if (skb) { 1204 if (skb) {
1146 skb->truesize += mem; 1205 skb->truesize += mem;
1147 if (sk->sk_forward_alloc >= (int)skb->truesize || 1206 if (sk->sk_forward_alloc >= (int)skb->truesize ||
@@ -1223,16 +1282,19 @@ static inline int sock_intr_errno(long timeo)
1223static __inline__ void 1282static __inline__ void
1224sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 1283sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1225{ 1284{
1226 struct timeval *stamp = &skb->stamp; 1285 struct timeval stamp;
1286
1287 skb_get_timestamp(skb, &stamp);
1227 if (sock_flag(sk, SOCK_RCVTSTAMP)) { 1288 if (sock_flag(sk, SOCK_RCVTSTAMP)) {
1228 /* Race occurred between timestamp enabling and packet 1289 /* Race occurred between timestamp enabling and packet
1229 receiving. Fill in the current time for now. */ 1290 receiving. Fill in the current time for now. */
1230 if (stamp->tv_sec == 0) 1291 if (stamp.tv_sec == 0)
1231 do_gettimeofday(stamp); 1292 do_gettimeofday(&stamp);
1293 skb_set_timestamp(skb, &stamp);
1232 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), 1294 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval),
1233 stamp); 1295 &stamp);
1234 } else 1296 } else
1235 sk->sk_stamp = *stamp; 1297 sk->sk_stamp = stamp;
1236} 1298}
1237 1299
1238/** 1300/**
@@ -1257,11 +1319,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *);
1257 */ 1319 */
1258 1320
1259#if 0 1321#if 0
1260#define NETDEBUG(x) do { } while (0) 1322#define NETDEBUG(fmt, args...) do { } while (0)
1261#define LIMIT_NETDEBUG(x) do {} while(0) 1323#define LIMIT_NETDEBUG(fmt, args...) do { } while(0)
1262#else 1324#else
1263#define NETDEBUG(x) do { x; } while (0) 1325#define NETDEBUG(fmt, args...) printk(fmt,##args)
1264#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) 1326#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0)
1265#endif 1327#endif
1266 1328
1267/* 1329/*
@@ -1308,4 +1370,14 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign
1308} 1370}
1309#endif 1371#endif
1310 1372
1373extern void sk_init(void);
1374
1375#ifdef CONFIG_SYSCTL
1376extern struct ctl_table core_table[];
1377extern int sysctl_optmem_max;
1378#endif
1379
1380extern __u32 sysctl_wmem_default;
1381extern __u32 sysctl_rmem_default;
1382
1311#endif /* _SOCK_H */ 1383#endif /* _SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5010f0c5a56e..d6bcf1317a6a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -21,360 +21,29 @@
21#define TCP_DEBUG 1 21#define TCP_DEBUG 1
22#define FASTRETRANS_DEBUG 1 22#define FASTRETRANS_DEBUG 1
23 23
24/* Cancel timers, when they are not required. */
25#undef TCP_CLEAR_TIMERS
26
27#include <linux/config.h> 24#include <linux/config.h>
28#include <linux/list.h> 25#include <linux/list.h>
29#include <linux/tcp.h> 26#include <linux/tcp.h>
30#include <linux/slab.h> 27#include <linux/slab.h>
31#include <linux/cache.h> 28#include <linux/cache.h>
32#include <linux/percpu.h> 29#include <linux/percpu.h>
30
31#include <net/inet_connection_sock.h>
32#include <net/inet_timewait_sock.h>
33#include <net/inet_hashtables.h>
33#include <net/checksum.h> 34#include <net/checksum.h>
34#include <net/request_sock.h> 35#include <net/request_sock.h>
35#include <net/sock.h> 36#include <net/sock.h>
36#include <net/snmp.h> 37#include <net/snmp.h>
37#include <net/ip.h> 38#include <net/ip.h>
38#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 39#include <net/tcp_states.h>
39#include <linux/ipv6.h>
40#endif
41#include <linux/seq_file.h>
42
43/* This is for all connections with a full identity, no wildcards.
44 * New scheme, half the table is for TIME_WAIT, the other half is
45 * for the rest. I'll experiment with dynamic table growth later.
46 */
47struct tcp_ehash_bucket {
48 rwlock_t lock;
49 struct hlist_head chain;
50} __attribute__((__aligned__(8)));
51
52/* This is for listening sockets, thus all sockets which possess wildcards. */
53#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
54
55/* There are a few simple rules, which allow for local port reuse by
56 * an application. In essence:
57 *
58 * 1) Sockets bound to different interfaces may share a local port.
59 * Failing that, goto test 2.
60 * 2) If all sockets have sk->sk_reuse set, and none of them are in
61 * TCP_LISTEN state, the port may be shared.
62 * Failing that, goto test 3.
63 * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
64 * address, and none of them are the same, the port may be
65 * shared.
66 * Failing this, the port cannot be shared.
67 *
68 * The interesting point, is test #2. This is what an FTP server does
69 * all day. To optimize this case we use a specific flag bit defined
70 * below. As we add sockets to a bind bucket list, we perform a
71 * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
72 * As long as all sockets added to a bind bucket pass this test,
73 * the flag bit will be set.
74 * The resulting situation is that tcp_v[46]_verify_bind() can just check
75 * for this flag bit, if it is set and the socket trying to bind has
76 * sk->sk_reuse set, we don't even have to walk the owners list at all,
77 * we return that it is ok to bind this socket to the requested local port.
78 *
79 * Sounds like a lot of work, but it is worth it. In a more naive
80 * implementation (ie. current FreeBSD etc.) the entire list of ports
81 * must be walked for each data port opened by an ftp server. Needless
82 * to say, this does not scale at all. With a couple thousand FTP
83 * users logged onto your box, isn't it nice to know that new data
84 * ports are created in O(1) time? I thought so. ;-) -DaveM
85 */
86struct tcp_bind_bucket {
87 unsigned short port;
88 signed short fastreuse;
89 struct hlist_node node;
90 struct hlist_head owners;
91};
92
93#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)
94
95struct tcp_bind_hashbucket {
96 spinlock_t lock;
97 struct hlist_head chain;
98};
99
100static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head)
101{
102 return hlist_entry(head->chain.first, struct tcp_bind_bucket, node);
103}
104
105static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head)
106{
107 return hlist_empty(&head->chain) ? NULL : __tb_head(head);
108}
109
110extern struct tcp_hashinfo {
111 /* This is for sockets with full identity only. Sockets here will
112 * always be without wildcards and will have the following invariant:
113 *
114 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
115 *
116 * First half of the table is for sockets not in TIME_WAIT, second half
117 * is for TIME_WAIT sockets only.
118 */
119 struct tcp_ehash_bucket *__tcp_ehash;
120
121 /* Ok, let's try this, I give up, we do need a local binding
122 * TCP hash as well as the others for fast bind/connect.
123 */
124 struct tcp_bind_hashbucket *__tcp_bhash;
125 40
126 int __tcp_bhash_size; 41#include <linux/seq_file.h>
127 int __tcp_ehash_size;
128
129 /* All sockets in TCP_LISTEN state will be in here. This is the only
130 * table where wildcard'd TCP sockets can exist. Hash function here
131 * is just local port number.
132 */
133 struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE];
134
135 /* All the above members are written once at bootup and
136 * never written again _or_ are predominantly read-access.
137 *
138 * Now align to a new cache line as all the following members
139 * are often dirty.
140 */
141 rwlock_t __tcp_lhash_lock ____cacheline_aligned;
142 atomic_t __tcp_lhash_users;
143 wait_queue_head_t __tcp_lhash_wait;
144 spinlock_t __tcp_portalloc_lock;
145} tcp_hashinfo;
146
147#define tcp_ehash (tcp_hashinfo.__tcp_ehash)
148#define tcp_bhash (tcp_hashinfo.__tcp_bhash)
149#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)
150#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)
151#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
152#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)
153#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)
154#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
155#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
156
157extern kmem_cache_t *tcp_bucket_cachep;
158extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
159 unsigned short snum);
160extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
161extern void tcp_bucket_unlock(struct sock *sk);
162extern int tcp_port_rover;
163
164/* These are AF independent. */
165static __inline__ int tcp_bhashfn(__u16 lport)
166{
167 return (lport & (tcp_bhash_size - 1));
168}
169
170extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
171 unsigned short snum);
172
173#if (BITS_PER_LONG == 64)
174#define TCP_ADDRCMP_ALIGN_BYTES 8
175#else
176#define TCP_ADDRCMP_ALIGN_BYTES 4
177#endif
178
179/* This is a TIME_WAIT bucket. It works around the memory consumption
180 * problems of sockets in such a state on heavily loaded servers, but
181 * without violating the protocol specification.
182 */
183struct tcp_tw_bucket {
184 /*
185 * Now struct sock also uses sock_common, so please just
186 * don't add nothing before this first member (__tw_common) --acme
187 */
188 struct sock_common __tw_common;
189#define tw_family __tw_common.skc_family
190#define tw_state __tw_common.skc_state
191#define tw_reuse __tw_common.skc_reuse
192#define tw_bound_dev_if __tw_common.skc_bound_dev_if
193#define tw_node __tw_common.skc_node
194#define tw_bind_node __tw_common.skc_bind_node
195#define tw_refcnt __tw_common.skc_refcnt
196 volatile unsigned char tw_substate;
197 unsigned char tw_rcv_wscale;
198 __u16 tw_sport;
199 /* Socket demultiplex comparisons on incoming packets. */
200 /* these five are in inet_sock */
201 __u32 tw_daddr
202 __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES)));
203 __u32 tw_rcv_saddr;
204 __u16 tw_dport;
205 __u16 tw_num;
206 /* And these are ours. */
207 int tw_hashent;
208 int tw_timeout;
209 __u32 tw_rcv_nxt;
210 __u32 tw_snd_nxt;
211 __u32 tw_rcv_wnd;
212 __u32 tw_ts_recent;
213 long tw_ts_recent_stamp;
214 unsigned long tw_ttd;
215 struct tcp_bind_bucket *tw_tb;
216 struct hlist_node tw_death_node;
217#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
218 struct in6_addr tw_v6_daddr;
219 struct in6_addr tw_v6_rcv_saddr;
220 int tw_v6_ipv6only;
221#endif
222};
223
224static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
225 struct hlist_head *list)
226{
227 hlist_add_head(&tw->tw_node, list);
228}
229
230static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw,
231 struct hlist_head *list)
232{
233 hlist_add_head(&tw->tw_bind_node, list);
234}
235
236static inline int tw_dead_hashed(struct tcp_tw_bucket *tw)
237{
238 return tw->tw_death_node.pprev != NULL;
239}
240
241static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw)
242{
243 tw->tw_death_node.pprev = NULL;
244}
245
246static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw)
247{
248 __hlist_del(&tw->tw_death_node);
249 tw_dead_node_init(tw);
250}
251
252static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw)
253{
254 if (tw_dead_hashed(tw)) {
255 __tw_del_dead_node(tw);
256 return 1;
257 }
258 return 0;
259}
260
261#define tw_for_each(tw, node, head) \
262 hlist_for_each_entry(tw, node, head, tw_node)
263
264#define tw_for_each_inmate(tw, node, jail) \
265 hlist_for_each_entry(tw, node, jail, tw_death_node)
266
267#define tw_for_each_inmate_safe(tw, node, safe, jail) \
268 hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
269
270#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk))
271
272static inline u32 tcp_v4_rcv_saddr(const struct sock *sk)
273{
274 return likely(sk->sk_state != TCP_TIME_WAIT) ?
275 inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr;
276}
277
278#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
279static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
280{
281 return likely(sk->sk_state != TCP_TIME_WAIT) ?
282 &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr;
283}
284
285static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
286{
287 return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
288}
289
290#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only)
291
292static inline int tcp_v6_ipv6only(const struct sock *sk)
293{
294 return likely(sk->sk_state != TCP_TIME_WAIT) ?
295 ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk);
296}
297#else
298# define __tcp_v6_rcv_saddr(__sk) NULL
299# define tcp_v6_rcv_saddr(__sk) NULL
300# define tcptw_sk_ipv6only(__sk) 0
301# define tcp_v6_ipv6only(__sk) 0
302#endif
303 42
304extern kmem_cache_t *tcp_timewait_cachep; 43extern struct inet_hashinfo tcp_hashinfo;
305
306static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
307{
308 if (atomic_dec_and_test(&tw->tw_refcnt)) {
309#ifdef INET_REFCNT_DEBUG
310 printk(KERN_DEBUG "tw_bucket %p released\n", tw);
311#endif
312 kmem_cache_free(tcp_timewait_cachep, tw);
313 }
314}
315 44
316extern atomic_t tcp_orphan_count; 45extern atomic_t tcp_orphan_count;
317extern int tcp_tw_count;
318extern void tcp_time_wait(struct sock *sk, int state, int timeo); 46extern void tcp_time_wait(struct sock *sk, int state, int timeo);
319extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
320
321
322/* Socket demux engine toys. */
323#ifdef __BIG_ENDIAN
324#define TCP_COMBINED_PORTS(__sport, __dport) \
325 (((__u32)(__sport)<<16) | (__u32)(__dport))
326#else /* __LITTLE_ENDIAN */
327#define TCP_COMBINED_PORTS(__sport, __dport) \
328 (((__u32)(__dport)<<16) | (__u32)(__sport))
329#endif
330
331#if (BITS_PER_LONG == 64)
332#ifdef __BIG_ENDIAN
333#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
334 __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
335#else /* __LITTLE_ENDIAN */
336#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
337 __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
338#endif /* __BIG_ENDIAN */
339#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
340 (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
341 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
342 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
343#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
344 (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \
345 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
346 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
347#else /* 32-bit arch */
348#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
349#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
350 ((inet_sk(__sk)->daddr == (__saddr)) && \
351 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
352 ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
353 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
354#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
355 ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \
356 (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \
357 ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
358 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
359#endif /* 64-bit arch */
360
361#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
362 (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
363 ((__sk)->sk_family == AF_INET6) && \
364 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
365 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
366 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
367
368/* These can have wildcards, don't try too hard. */
369static __inline__ int tcp_lhashfn(unsigned short num)
370{
371 return num & (TCP_LHTABLE_SIZE - 1);
372}
373
374static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
375{
376 return tcp_lhashfn(inet_sk(sk)->num);
377}
378 47
379#define MAX_TCP_HEADER (128 + MAX_HEADER) 48#define MAX_TCP_HEADER (128 + MAX_HEADER)
380 49
@@ -478,33 +147,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
478 * timestamps. It must be less than 147 * timestamps. It must be less than
479 * minimal timewait lifetime. 148 * minimal timewait lifetime.
480 */ 149 */
481
482#define TCP_TW_RECYCLE_SLOTS_LOG 5
483#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
484
485/* If time > 4sec, it is "slow" path, no recycling is required,
486 so that we select tick to get range about 4 seconds.
487 */
488
489#if HZ <= 16 || HZ > 4096
490# error Unsupported: HZ <= 16 or HZ > 4096
491#elif HZ <= 32
492# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
493#elif HZ <= 64
494# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
495#elif HZ <= 128
496# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
497#elif HZ <= 256
498# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
499#elif HZ <= 512
500# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
501#elif HZ <= 1024
502# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
503#elif HZ <= 2048
504# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
505#else
506# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
507#endif
508/* 150/*
509 * TCP option 151 * TCP option
510 */ 152 */
@@ -534,22 +176,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
534#define TCPOLEN_SACK_BASE_ALIGNED 4 176#define TCPOLEN_SACK_BASE_ALIGNED 4
535#define TCPOLEN_SACK_PERBLOCK 8 177#define TCPOLEN_SACK_PERBLOCK 8
536 178
537#define TCP_TIME_RETRANS 1 /* Retransmit timer */
538#define TCP_TIME_DACK 2 /* Delayed ack timer */
539#define TCP_TIME_PROBE0 3 /* Zero window probe timer */
540#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */
541
542/* Flags in tp->nonagle */ 179/* Flags in tp->nonagle */
543#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ 180#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
544#define TCP_NAGLE_CORK 2 /* Socket is corked */ 181#define TCP_NAGLE_CORK 2 /* Socket is corked */
545#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ 182#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */
546 183
184extern struct inet_timewait_death_row tcp_death_row;
185
547/* sysctl variables for tcp */ 186/* sysctl variables for tcp */
548extern int sysctl_tcp_timestamps; 187extern int sysctl_tcp_timestamps;
549extern int sysctl_tcp_window_scaling; 188extern int sysctl_tcp_window_scaling;
550extern int sysctl_tcp_sack; 189extern int sysctl_tcp_sack;
551extern int sysctl_tcp_fin_timeout; 190extern int sysctl_tcp_fin_timeout;
552extern int sysctl_tcp_tw_recycle;
553extern int sysctl_tcp_keepalive_time; 191extern int sysctl_tcp_keepalive_time;
554extern int sysctl_tcp_keepalive_probes; 192extern int sysctl_tcp_keepalive_probes;
555extern int sysctl_tcp_keepalive_intvl; 193extern int sysctl_tcp_keepalive_intvl;
@@ -564,7 +202,6 @@ extern int sysctl_tcp_stdurg;
564extern int sysctl_tcp_rfc1337; 202extern int sysctl_tcp_rfc1337;
565extern int sysctl_tcp_abort_on_overflow; 203extern int sysctl_tcp_abort_on_overflow;
566extern int sysctl_tcp_max_orphans; 204extern int sysctl_tcp_max_orphans;
567extern int sysctl_tcp_max_tw_buckets;
568extern int sysctl_tcp_fack; 205extern int sysctl_tcp_fack;
569extern int sysctl_tcp_reordering; 206extern int sysctl_tcp_reordering;
570extern int sysctl_tcp_ecn; 207extern int sysctl_tcp_ecn;
@@ -585,12 +222,6 @@ extern atomic_t tcp_memory_allocated;
585extern atomic_t tcp_sockets_allocated; 222extern atomic_t tcp_sockets_allocated;
586extern int tcp_memory_pressure; 223extern int tcp_memory_pressure;
587 224
588#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
589#define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
590#else
591#define TCP_INET_FAMILY(fam) 1
592#endif
593
594/* 225/*
595 * Pointers to address related TCP functions 226 * Pointers to address related TCP functions
596 * (i.e. things that depend on the address family) 227 * (i.e. things that depend on the address family)
@@ -671,9 +302,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
671#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) 302#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
672#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) 303#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
673 304
674extern void tcp_put_port(struct sock *sk);
675extern void tcp_inherit_port(struct sock *sk, struct sock *child);
676
677extern void tcp_v4_err(struct sk_buff *skb, u32); 305extern void tcp_v4_err(struct sk_buff *skb, u32);
678 306
679extern void tcp_shutdown (struct sock *sk, int how); 307extern void tcp_shutdown (struct sock *sk, int how);
@@ -682,7 +310,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb);
682 310
683extern int tcp_v4_remember_stamp(struct sock *sk); 311extern int tcp_v4_remember_stamp(struct sock *sk);
684 312
685extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); 313extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
686 314
687extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, 315extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
688 struct msghdr *msg, size_t size); 316 struct msghdr *msg, size_t size);
@@ -704,42 +332,22 @@ extern int tcp_rcv_established(struct sock *sk,
704 332
705extern void tcp_rcv_space_adjust(struct sock *sk); 333extern void tcp_rcv_space_adjust(struct sock *sk);
706 334
707enum tcp_ack_state_t 335static inline void tcp_dec_quickack_mode(struct sock *sk,
708{ 336 const unsigned int pkts)
709 TCP_ACK_SCHED = 1,
710 TCP_ACK_TIMER = 2,
711 TCP_ACK_PUSHED= 4
712};
713
714static inline void tcp_schedule_ack(struct tcp_sock *tp)
715{ 337{
716 tp->ack.pending |= TCP_ACK_SCHED; 338 struct inet_connection_sock *icsk = inet_csk(sk);
717}
718
719static inline int tcp_ack_scheduled(struct tcp_sock *tp)
720{
721 return tp->ack.pending&TCP_ACK_SCHED;
722}
723
724static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts)
725{
726 if (tp->ack.quick) {
727 if (pkts >= tp->ack.quick) {
728 tp->ack.quick = 0;
729 339
340 if (icsk->icsk_ack.quick) {
341 if (pkts >= icsk->icsk_ack.quick) {
342 icsk->icsk_ack.quick = 0;
730 /* Leaving quickack mode we deflate ATO. */ 343 /* Leaving quickack mode we deflate ATO. */
731 tp->ack.ato = TCP_ATO_MIN; 344 icsk->icsk_ack.ato = TCP_ATO_MIN;
732 } else 345 } else
733 tp->ack.quick -= pkts; 346 icsk->icsk_ack.quick -= pkts;
734 } 347 }
735} 348}
736 349
737extern void tcp_enter_quickack_mode(struct tcp_sock *tp); 350extern void tcp_enter_quickack_mode(struct sock *sk);
738
739static __inline__ void tcp_delack_init(struct tcp_sock *tp)
740{
741 memset(&tp->ack, 0, sizeof(tp->ack));
742}
743 351
744static inline void tcp_clear_options(struct tcp_options_received *rx_opt) 352static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
745{ 353{
@@ -755,10 +363,9 @@ enum tcp_tw_status
755}; 363};
756 364
757 365
758extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, 366extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
759 struct sk_buff *skb, 367 struct sk_buff *skb,
760 struct tcphdr *th, 368 const struct tcphdr *th);
761 unsigned len);
762 369
763extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, 370extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
764 struct request_sock *req, 371 struct request_sock *req,
@@ -773,7 +380,6 @@ extern void tcp_update_metrics(struct sock *sk);
773 380
774extern void tcp_close(struct sock *sk, 381extern void tcp_close(struct sock *sk,
775 long timeout); 382 long timeout);
776extern struct sock * tcp_accept(struct sock *sk, int flags, int *err);
777extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); 383extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
778 384
779extern int tcp_getsockopt(struct sock *sk, int level, 385extern int tcp_getsockopt(struct sock *sk, int level,
@@ -789,8 +395,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
789 size_t len, int nonblock, 395 size_t len, int nonblock,
790 int flags, int *addr_len); 396 int flags, int *addr_len);
791 397
792extern int tcp_listen_start(struct sock *sk);
793
794extern void tcp_parse_options(struct sk_buff *skb, 398extern void tcp_parse_options(struct sk_buff *skb,
795 struct tcp_options_received *opt_rx, 399 struct tcp_options_received *opt_rx,
796 int estab); 400 int estab);
@@ -799,11 +403,6 @@ extern void tcp_parse_options(struct sk_buff *skb,
799 * TCP v4 functions exported for the inet6 API 403 * TCP v4 functions exported for the inet6 API
800 */ 404 */
801 405
802extern int tcp_v4_rebuild_header(struct sock *sk);
803
804extern int tcp_v4_build_header(struct sock *sk,
805 struct sk_buff *skb);
806
807extern void tcp_v4_send_check(struct sock *sk, 406extern void tcp_v4_send_check(struct sock *sk,
808 struct tcphdr *th, int len, 407 struct tcphdr *th, int len,
809 struct sk_buff *skb); 408 struct sk_buff *skb);
@@ -872,18 +471,15 @@ extern void tcp_cwnd_application_limited(struct sock *sk);
872 471
873/* tcp_timer.c */ 472/* tcp_timer.c */
874extern void tcp_init_xmit_timers(struct sock *); 473extern void tcp_init_xmit_timers(struct sock *);
875extern void tcp_clear_xmit_timers(struct sock *); 474static inline void tcp_clear_xmit_timers(struct sock *sk)
475{
476 inet_csk_clear_xmit_timers(sk);
477}
876 478
877extern void tcp_delete_keepalive_timer(struct sock *);
878extern void tcp_reset_keepalive_timer(struct sock *, unsigned long);
879extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); 479extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
880extern unsigned int tcp_current_mss(struct sock *sk, int large); 480extern unsigned int tcp_current_mss(struct sock *sk, int large);
881 481
882#ifdef TCP_DEBUG 482/* tcp.c */
883extern const char tcp_timer_bug_msg[];
884#endif
885
886/* tcp_diag.c */
887extern void tcp_get_info(struct sock *, struct tcp_info *); 483extern void tcp_get_info(struct sock *, struct tcp_info *);
888 484
889/* Read 'sendfile()'-style from a TCP socket */ 485/* Read 'sendfile()'-style from a TCP socket */
@@ -892,72 +488,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
892extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, 488extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
893 sk_read_actor_t recv_actor); 489 sk_read_actor_t recv_actor);
894 490
895static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
896{
897 struct tcp_sock *tp = tcp_sk(sk);
898
899 switch (what) {
900 case TCP_TIME_RETRANS:
901 case TCP_TIME_PROBE0:
902 tp->pending = 0;
903
904#ifdef TCP_CLEAR_TIMERS
905 sk_stop_timer(sk, &tp->retransmit_timer);
906#endif
907 break;
908 case TCP_TIME_DACK:
909 tp->ack.blocked = 0;
910 tp->ack.pending = 0;
911
912#ifdef TCP_CLEAR_TIMERS
913 sk_stop_timer(sk, &tp->delack_timer);
914#endif
915 break;
916 default:
917#ifdef TCP_DEBUG
918 printk(tcp_timer_bug_msg);
919#endif
920 return;
921 };
922
923}
924
925/*
926 * Reset the retransmission timer
927 */
928static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
929{
930 struct tcp_sock *tp = tcp_sk(sk);
931
932 if (when > TCP_RTO_MAX) {
933#ifdef TCP_DEBUG
934 printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
935#endif
936 when = TCP_RTO_MAX;
937 }
938
939 switch (what) {
940 case TCP_TIME_RETRANS:
941 case TCP_TIME_PROBE0:
942 tp->pending = what;
943 tp->timeout = jiffies+when;
944 sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
945 break;
946
947 case TCP_TIME_DACK:
948 tp->ack.pending |= TCP_ACK_TIMER;
949 tp->ack.timeout = jiffies+when;
950 sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
951 break;
952
953 default:
954#ifdef TCP_DEBUG
955 printk(tcp_timer_bug_msg);
956#endif
957 return;
958 };
959}
960
961/* Initialize RCV_MSS value. 491/* Initialize RCV_MSS value.
962 * RCV_MSS is an our guess about MSS used by the peer. 492 * RCV_MSS is an our guess about MSS used by the peer.
963 * We haven't any direct information about the MSS. 493 * We haven't any direct information about the MSS.
@@ -975,7 +505,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk)
975 hint = min(hint, TCP_MIN_RCVMSS); 505 hint = min(hint, TCP_MIN_RCVMSS);
976 hint = max(hint, TCP_MIN_MSS); 506 hint = max(hint, TCP_MIN_MSS);
977 507
978 tp->ack.rcv_mss = hint; 508 inet_csk(sk)->icsk_ack.rcv_mss = hint;
979} 509}
980 510
981static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) 511static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
@@ -1110,7 +640,8 @@ static inline void tcp_packets_out_inc(struct sock *sk,
1110 640
1111 tp->packets_out += tcp_skb_pcount(skb); 641 tp->packets_out += tcp_skb_pcount(skb);
1112 if (!orig) 642 if (!orig)
1113 tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); 643 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
644 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
1114} 645}
1115 646
1116static inline void tcp_packets_out_dec(struct tcp_sock *tp, 647static inline void tcp_packets_out_dec(struct tcp_sock *tp,
@@ -1138,29 +669,29 @@ struct tcp_congestion_ops {
1138 struct list_head list; 669 struct list_head list;
1139 670
1140 /* initialize private data (optional) */ 671 /* initialize private data (optional) */
1141 void (*init)(struct tcp_sock *tp); 672 void (*init)(struct sock *sk);
1142 /* cleanup private data (optional) */ 673 /* cleanup private data (optional) */
1143 void (*release)(struct tcp_sock *tp); 674 void (*release)(struct sock *sk);
1144 675
1145 /* return slow start threshold (required) */ 676 /* return slow start threshold (required) */
1146 u32 (*ssthresh)(struct tcp_sock *tp); 677 u32 (*ssthresh)(struct sock *sk);
1147 /* lower bound for congestion window (optional) */ 678 /* lower bound for congestion window (optional) */
1148 u32 (*min_cwnd)(struct tcp_sock *tp); 679 u32 (*min_cwnd)(struct sock *sk);
1149 /* do new cwnd calculation (required) */ 680 /* do new cwnd calculation (required) */
1150 void (*cong_avoid)(struct tcp_sock *tp, u32 ack, 681 void (*cong_avoid)(struct sock *sk, u32 ack,
1151 u32 rtt, u32 in_flight, int good_ack); 682 u32 rtt, u32 in_flight, int good_ack);
1152 /* round trip time sample per acked packet (optional) */ 683 /* round trip time sample per acked packet (optional) */
1153 void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); 684 void (*rtt_sample)(struct sock *sk, u32 usrtt);
1154 /* call before changing ca_state (optional) */ 685 /* call before changing ca_state (optional) */
1155 void (*set_state)(struct tcp_sock *tp, u8 new_state); 686 void (*set_state)(struct sock *sk, u8 new_state);
1156 /* call when cwnd event occurs (optional) */ 687 /* call when cwnd event occurs (optional) */
1157 void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); 688 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
1158 /* new value of cwnd after loss (optional) */ 689 /* new value of cwnd after loss (optional) */
1159 u32 (*undo_cwnd)(struct tcp_sock *tp); 690 u32 (*undo_cwnd)(struct sock *sk);
1160 /* hook for packet ack accounting (optional) */ 691 /* hook for packet ack accounting (optional) */
1161 void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); 692 void (*pkts_acked)(struct sock *sk, u32 num_acked);
1162 /* get info for tcp_diag (optional) */ 693 /* get info for inet_diag (optional) */
1163 void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); 694 void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
1164 695
1165 char name[TCP_CA_NAME_MAX]; 696 char name[TCP_CA_NAME_MAX];
1166 struct module *owner; 697 struct module *owner;
@@ -1169,30 +700,34 @@ struct tcp_congestion_ops {
1169extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); 700extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
1170extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); 701extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
1171 702
1172extern void tcp_init_congestion_control(struct tcp_sock *tp); 703extern void tcp_init_congestion_control(struct sock *sk);
1173extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); 704extern void tcp_cleanup_congestion_control(struct sock *sk);
1174extern int tcp_set_default_congestion_control(const char *name); 705extern int tcp_set_default_congestion_control(const char *name);
1175extern void tcp_get_default_congestion_control(char *name); 706extern void tcp_get_default_congestion_control(char *name);
1176extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); 707extern int tcp_set_congestion_control(struct sock *sk, const char *name);
1177 708
1178extern struct tcp_congestion_ops tcp_init_congestion_ops; 709extern struct tcp_congestion_ops tcp_init_congestion_ops;
1179extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); 710extern u32 tcp_reno_ssthresh(struct sock *sk);
1180extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, 711extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
1181 u32 rtt, u32 in_flight, int flag); 712 u32 rtt, u32 in_flight, int flag);
1182extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); 713extern u32 tcp_reno_min_cwnd(struct sock *sk);
1183extern struct tcp_congestion_ops tcp_reno; 714extern struct tcp_congestion_ops tcp_reno;
1184 715
1185static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) 716static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
1186{ 717{
1187 if (tp->ca_ops->set_state) 718 struct inet_connection_sock *icsk = inet_csk(sk);
1188 tp->ca_ops->set_state(tp, ca_state); 719
1189 tp->ca_state = ca_state; 720 if (icsk->icsk_ca_ops->set_state)
721 icsk->icsk_ca_ops->set_state(sk, ca_state);
722 icsk->icsk_ca_state = ca_state;
1190} 723}
1191 724
1192static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) 725static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
1193{ 726{
1194 if (tp->ca_ops->cwnd_event) 727 const struct inet_connection_sock *icsk = inet_csk(sk);
1195 tp->ca_ops->cwnd_event(tp, event); 728
729 if (icsk->icsk_ca_ops->cwnd_event)
730 icsk->icsk_ca_ops->cwnd_event(sk, event);
1196} 731}
1197 732
1198/* This determines how many packets are "in the network" to the best 733/* This determines how many packets are "in the network" to the best
@@ -1218,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
1218 * The exception is rate halving phase, when cwnd is decreasing towards 753 * The exception is rate halving phase, when cwnd is decreasing towards
1219 * ssthresh. 754 * ssthresh.
1220 */ 755 */
1221static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) 756static inline __u32 tcp_current_ssthresh(const struct sock *sk)
1222{ 757{
1223 if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) 758 const struct tcp_sock *tp = tcp_sk(sk);
759 if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
1224 return tp->snd_ssthresh; 760 return tp->snd_ssthresh;
1225 else 761 else
1226 return max(tp->snd_ssthresh, 762 return max(tp->snd_ssthresh,
@@ -1237,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
1237} 773}
1238 774
1239/* Set slow start threshold and cwnd not falling to slow start */ 775/* Set slow start threshold and cwnd not falling to slow start */
1240static inline void __tcp_enter_cwr(struct tcp_sock *tp) 776static inline void __tcp_enter_cwr(struct sock *sk)
1241{ 777{
778 const struct inet_connection_sock *icsk = inet_csk(sk);
779 struct tcp_sock *tp = tcp_sk(sk);
780
1242 tp->undo_marker = 0; 781 tp->undo_marker = 0;
1243 tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); 782 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1244 tp->snd_cwnd = min(tp->snd_cwnd, 783 tp->snd_cwnd = min(tp->snd_cwnd,
1245 tcp_packets_in_flight(tp) + 1U); 784 tcp_packets_in_flight(tp) + 1U);
1246 tp->snd_cwnd_cnt = 0; 785 tp->snd_cwnd_cnt = 0;
@@ -1249,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp)
1249 TCP_ECN_queue_cwr(tp); 788 TCP_ECN_queue_cwr(tp);
1250} 789}
1251 790
1252static inline void tcp_enter_cwr(struct tcp_sock *tp) 791static inline void tcp_enter_cwr(struct sock *sk)
1253{ 792{
793 struct tcp_sock *tp = tcp_sk(sk);
794
1254 tp->prior_ssthresh = 0; 795 tp->prior_ssthresh = 0;
1255 if (tp->ca_state < TCP_CA_CWR) { 796 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
1256 __tcp_enter_cwr(tp); 797 __tcp_enter_cwr(sk);
1257 tcp_set_ca_state(tp, TCP_CA_CWR); 798 tcp_set_ca_state(sk, TCP_CA_CWR);
1258 } 799 }
1259} 800}
1260 801
@@ -1277,8 +818,10 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
1277 818
1278static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) 819static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
1279{ 820{
1280 if (!tp->packets_out && !tp->pending) 821 const struct inet_connection_sock *icsk = inet_csk(sk);
1281 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); 822 if (!tp->packets_out && !icsk->icsk_pending)
823 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
824 icsk->icsk_rto, TCP_RTO_MAX);
1282} 825}
1283 826
1284static __inline__ void tcp_push_pending_frames(struct sock *sk, 827static __inline__ void tcp_push_pending_frames(struct sock *sk,
@@ -1297,9 +840,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
1297 tp->snd_wl1 = seq; 840 tp->snd_wl1 = seq;
1298} 841}
1299 842
1300extern void tcp_destroy_sock(struct sock *sk);
1301
1302
1303/* 843/*
1304 * Calculate(/check) TCP checksum 844 * Calculate(/check) TCP checksum
1305 */ 845 */
@@ -1359,8 +899,10 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1359 tp->ucopy.memory = 0; 899 tp->ucopy.memory = 0;
1360 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 900 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1361 wake_up_interruptible(sk->sk_sleep); 901 wake_up_interruptible(sk->sk_sleep);
1362 if (!tcp_ack_scheduled(tp)) 902 if (!inet_csk_ack_scheduled(sk))
1363 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); 903 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
904 (3 * TCP_RTO_MIN) / 4,
905 TCP_RTO_MAX);
1364 } 906 }
1365 return 1; 907 return 1;
1366 } 908 }
@@ -1393,9 +935,9 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
1393 TCP_INC_STATS(TCP_MIB_ESTABRESETS); 935 TCP_INC_STATS(TCP_MIB_ESTABRESETS);
1394 936
1395 sk->sk_prot->unhash(sk); 937 sk->sk_prot->unhash(sk);
1396 if (tcp_sk(sk)->bind_hash && 938 if (inet_csk(sk)->icsk_bind_hash &&
1397 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) 939 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1398 tcp_put_port(sk); 940 inet_put_port(&tcp_hashinfo, sk);
1399 /* fall through */ 941 /* fall through */
1400 default: 942 default:
1401 if (oldstate==TCP_ESTABLISHED) 943 if (oldstate==TCP_ESTABLISHED)
@@ -1422,7 +964,7 @@ static __inline__ void tcp_done(struct sock *sk)
1422 if (!sock_flag(sk, SOCK_DEAD)) 964 if (!sock_flag(sk, SOCK_DEAD))
1423 sk->sk_state_change(sk); 965 sk->sk_state_change(sk);
1424 else 966 else
1425 tcp_destroy_sock(sk); 967 inet_csk_destroy_sock(sk);
1426} 968}
1427 969
1428static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) 970static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
@@ -1524,54 +1066,6 @@ static inline int tcp_full_space(const struct sock *sk)
1524 return tcp_win_from_space(sk->sk_rcvbuf); 1066 return tcp_win_from_space(sk->sk_rcvbuf);
1525} 1067}
1526 1068
1527static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req,
1528 struct sock *child)
1529{
1530 reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child);
1531}
1532
1533static inline void
1534tcp_synq_removed(struct sock *sk, struct request_sock *req)
1535{
1536 if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0)
1537 tcp_delete_keepalive_timer(sk);
1538}
1539
1540static inline void tcp_synq_added(struct sock *sk)
1541{
1542 if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0)
1543 tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
1544}
1545
1546static inline int tcp_synq_len(struct sock *sk)
1547{
1548 return reqsk_queue_len(&tcp_sk(sk)->accept_queue);
1549}
1550
1551static inline int tcp_synq_young(struct sock *sk)
1552{
1553 return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue);
1554}
1555
1556static inline int tcp_synq_is_full(struct sock *sk)
1557{
1558 return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue);
1559}
1560
1561static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req,
1562 struct request_sock **prev)
1563{
1564 reqsk_queue_unlink(&tp->accept_queue, req, prev);
1565}
1566
1567static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req,
1568 struct request_sock **prev)
1569{
1570 tcp_synq_unlink(tcp_sk(sk), req, prev);
1571 tcp_synq_removed(sk, req);
1572 reqsk_free(req);
1573}
1574
1575static __inline__ void tcp_openreq_init(struct request_sock *req, 1069static __inline__ void tcp_openreq_init(struct request_sock *req,
1576 struct tcp_options_received *rx_opt, 1070 struct tcp_options_received *rx_opt,
1577 struct sk_buff *skb) 1071 struct sk_buff *skb)
@@ -1593,27 +1087,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,
1593 1087
1594extern void tcp_enter_memory_pressure(void); 1088extern void tcp_enter_memory_pressure(void);
1595 1089
1596extern void tcp_listen_wlock(void);
1597
1598/* - We may sleep inside this lock.
1599 * - If sleeping is not required (or called from BH),
1600 * use plain read_(un)lock(&tcp_lhash_lock).
1601 */
1602
1603static inline void tcp_listen_lock(void)
1604{
1605 /* read_lock synchronizes to candidates to writers */
1606 read_lock(&tcp_lhash_lock);
1607 atomic_inc(&tcp_lhash_users);
1608 read_unlock(&tcp_lhash_lock);
1609}
1610
1611static inline void tcp_listen_unlock(void)
1612{
1613 if (atomic_dec_and_test(&tcp_lhash_users))
1614 wake_up(&tcp_lhash_wait);
1615}
1616
1617static inline int keepalive_intvl_when(const struct tcp_sock *tp) 1090static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1618{ 1091{
1619 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; 1092 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
@@ -1624,12 +1097,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp)
1624 return tp->keepalive_time ? : sysctl_tcp_keepalive_time; 1097 return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1625} 1098}
1626 1099
1627static inline int tcp_fin_time(const struct tcp_sock *tp) 1100static inline int tcp_fin_time(const struct sock *sk)
1628{ 1101{
1629 int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; 1102 int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
1103 const int rto = inet_csk(sk)->icsk_rto;
1630 1104
1631 if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) 1105 if (fin_timeout < (rto << 2) - (rto >> 1))
1632 fin_timeout = (tp->rto<<2) - (tp->rto>>1); 1106 fin_timeout = (rto << 2) - (rto >> 1);
1633 1107
1634 return fin_timeout; 1108 return fin_timeout;
1635} 1109}
@@ -1658,15 +1132,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
1658 return 1; 1132 return 1;
1659} 1133}
1660 1134
1661static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst)
1662{
1663 sk->sk_route_caps = dst->dev->features;
1664 if (sk->sk_route_caps & NETIF_F_TSO) {
1665 if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
1666 sk->sk_route_caps &= ~NETIF_F_TSO;
1667 }
1668}
1669
1670#define TCP_CHECK_TIMER(sk) do { } while (0) 1135#define TCP_CHECK_TIMER(sk) do { } while (0)
1671 1136
1672static inline int tcp_use_frto(const struct sock *sk) 1137static inline int tcp_use_frto(const struct sock *sk)
@@ -1718,4 +1183,16 @@ struct tcp_iter_state {
1718extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); 1183extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
1719extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); 1184extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
1720 1185
1186extern struct request_sock_ops tcp_request_sock_ops;
1187
1188extern int tcp_v4_destroy_sock(struct sock *sk);
1189
1190#ifdef CONFIG_PROC_FS
1191extern int tcp4_proc_init(void);
1192extern void tcp4_proc_exit(void);
1193#endif
1194
1195extern void tcp_v4_init(struct net_proto_family *ops);
1196extern void tcp_init(void);
1197
1721#endif /* _TCP_H */ 1198#endif /* _TCP_H */
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index 64980ee8c92a..c6b84397448d 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
88 * it is surely retransmit. It is not in ECN RFC, 88 * it is surely retransmit. It is not in ECN RFC,
89 * but Linux follows this rule. */ 89 * but Linux follows this rule. */
90 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) 90 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
91 tcp_enter_quickack_mode(tp); 91 tcp_enter_quickack_mode((struct sock *)tp);
92 } 92 }
93} 93}
94 94
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
new file mode 100644
index 000000000000..b9d4176b2d15
--- /dev/null
+++ b/include/net/tcp_states.h
@@ -0,0 +1,34 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Definitions for the TCP protocol sk_state field.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13#ifndef _LINUX_TCP_STATES_H
14#define _LINUX_TCP_STATES_H
15
16enum {
17 TCP_ESTABLISHED = 1,
18 TCP_SYN_SENT,
19 TCP_SYN_RECV,
20 TCP_FIN_WAIT1,
21 TCP_FIN_WAIT2,
22 TCP_TIME_WAIT,
23 TCP_CLOSE,
24 TCP_CLOSE_WAIT,
25 TCP_LAST_ACK,
26 TCP_LISTEN,
27 TCP_CLOSING, /* Now a valid state */
28
29 TCP_MAX_STATES /* Leave at the end! */
30};
31
32#define TCP_STATE_MASK 0xF
33
34#endif /* _LINUX_TCP_STATES_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index ac229b761dbc..107b9d791a1f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -94,6 +94,11 @@ struct udp_iter_state {
94 struct seq_operations seq_ops; 94 struct seq_operations seq_ops;
95}; 95};
96 96
97#ifdef CONFIG_PROC_FS
97extern int udp_proc_register(struct udp_seq_afinfo *afinfo); 98extern int udp_proc_register(struct udp_seq_afinfo *afinfo);
98extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); 99extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo);
100
101extern int udp4_proc_init(void);
102extern void udp4_proc_exit(void);
103#endif
99#endif /* _UDP_H */ 104#endif /* _UDP_H */
diff --git a/include/net/x25.h b/include/net/x25.h
index 8b39b98876e8..fee62ff8c194 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *);
175 175
176/* x25_dev.c */ 176/* x25_dev.c */
177extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); 177extern void x25_send_frame(struct sk_buff *, struct x25_neigh *);
178extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); 178extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
179extern void x25_establish_link(struct x25_neigh *); 179extern void x25_establish_link(struct x25_neigh *);
180extern void x25_terminate_link(struct x25_neigh *); 180extern void x25_terminate_link(struct x25_neigh *);
181 181
diff --git a/include/net/x25device.h b/include/net/x25device.h
index d45ae883bd1d..1a318374faef 100644
--- a/include/net/x25device.h
+++ b/include/net/x25device.h
@@ -8,7 +8,6 @@
8static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) 8static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev)
9{ 9{
10 skb->mac.raw = skb->data; 10 skb->mac.raw = skb->data;
11 skb->input_dev = skb->dev = dev;
12 skb->pkt_type = PACKET_HOST; 11 skb->pkt_type = PACKET_HOST;
13 12
14 return htons(ETH_P_X25); 13 return htons(ETH_P_X25);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 868ef88ef971..a9d0d8c5dfbf 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -818,7 +818,6 @@ extern void xfrm6_init(void);
818extern void xfrm6_fini(void); 818extern void xfrm6_fini(void);
819extern void xfrm_state_init(void); 819extern void xfrm_state_init(void);
820extern void xfrm4_state_init(void); 820extern void xfrm4_state_init(void);
821extern void xfrm4_state_fini(void);
822extern void xfrm6_state_init(void); 821extern void xfrm6_state_init(void);
823extern void xfrm6_state_fini(void); 822extern void xfrm6_state_fini(void);
824 823