aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/ah4.c4
-rw-r--r--net/ipv4/cipso_ipv4.c1607
-rw-r--r--net/ipv4/devinet.c246
-rw-r--r--net/ipv4/esp4.c15
-rw-r--r--net/ipv4/fib_frontend.c472
-rw-r--r--net/ipv4/fib_hash.c126
-rw-r--r--net/ipv4/fib_lookup.h13
-rw-r--r--net/ipv4/fib_rules.c620
-rw-r--r--net/ipv4/fib_semantics.c518
-rw-r--r--net/ipv4/fib_trie.c110
-rw-r--r--net/ipv4/icmp.c16
-rw-r--r--net/ipv4/igmp.c6
-rw-r--r--net/ipv4/inet_connection_sock.c3
-rw-r--r--net/ipv4/inet_hashtables.c33
-rw-r--r--net/ipv4/inetpeer.c5
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_gre.c27
-rw-r--r--net/ipv4/ip_options.c20
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ipcomp.c8
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/ipip.c22
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c8
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv4/netfilter/Kconfig22
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/arp_tables.c18
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c4
-rw-r--r--net/ipv4/netfilter/arptable_filter.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c215
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c516
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c76
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c52
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c31
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_sip.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c5
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c56
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c63
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c188
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c27
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c8
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c7
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c15
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c14
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c13
-rw-r--r--net/ipv4/netfilter/ip_queue.c16
-rw-r--r--net/ipv4/netfilter/ip_tables.c184
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c7
-rw-r--r--net/ipv4/netfilter/ipt_DSCP.c96
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c48
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c4
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c4
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c4
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c4
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c7
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c135
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c26
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c12
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c3
-rw-r--r--net/ipv4/netfilter/ipt_ah.c1
-rw-r--r--net/ipv4/netfilter/ipt_dscp.c54
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c3
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c33
-rw-r--r--net/ipv4/netfilter/ipt_owner.c1
-rw-r--r--net/ipv4/netfilter/ipt_recent.c13
-rw-r--r--net/ipv4/netfilter/iptable_filter.c4
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c4
-rw-r--r--net/ipv4/netfilter/iptable_raw.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c166
-rw-r--r--net/ipv4/syncookies.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c35
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_highspeed.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_input.c40
-rw-r--r--net/ipv4/tcp_ipv4.c25
-rw-r--r--net/ipv4/tcp_lp.c3
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c30
-rw-r--r--net/ipv4/tcp_timer.c16
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/tcp_veno.c3
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/udp.c120
-rw-r--r--net/ipv4/xfrm4_input.c2
-rw-r--r--net/ipv4/xfrm4_mode_transport.c4
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c3
-rw-r--r--net/ipv4/xfrm4_output.c10
-rw-r--r--net/ipv4/xfrm4_policy.c26
-rw-r--r--net/ipv4/xfrm4_state.c84
-rw-r--r--net/ipv4/xfrm4_tunnel.c2
108 files changed, 3898 insertions, 2707 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 3b5d504a74be..1650b64415aa 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -88,6 +88,7 @@ config IP_FIB_HASH
88config IP_MULTIPLE_TABLES 88config IP_MULTIPLE_TABLES
89 bool "IP: policy routing" 89 bool "IP: policy routing"
90 depends on IP_ADVANCED_ROUTER 90 depends on IP_ADVANCED_ROUTER
91 select FIB_RULES
91 ---help--- 92 ---help---
92 Normally, a router decides what to do with a received packet based 93 Normally, a router decides what to do with a received packet based
93 solely on the packet's final destination address. If you say Y here, 94 solely on the packet's final destination address. If you say Y here,
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4878fc5be85f..f66049e28aeb 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
47obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o 47obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
48obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o 48obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o 49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
50obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
50 51
51obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 52obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
52 xfrm4_output.o 53 xfrm4_output.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c84a32070f8d..fdd89e37b9aa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -67,7 +67,6 @@
67 * 2 of the License, or (at your option) any later version. 67 * 2 of the License, or (at your option) any later version.
68 */ 68 */
69 69
70#include <linux/config.h>
71#include <linux/err.h> 70#include <linux/err.h>
72#include <linux/errno.h> 71#include <linux/errno.h>
73#include <linux/types.h> 72#include <linux/types.h>
@@ -392,7 +391,7 @@ int inet_release(struct socket *sock)
392} 391}
393 392
394/* It is off by default, see below. */ 393/* It is off by default, see below. */
395int sysctl_ip_nonlocal_bind; 394int sysctl_ip_nonlocal_bind __read_mostly;
396 395
397int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 396int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
398{ 397{
@@ -988,7 +987,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
988 * Shall we try to damage output packets if routing dev changes? 987 * Shall we try to damage output packets if routing dev changes?
989 */ 988 */
990 989
991int sysctl_ip_dynaddr; 990int sysctl_ip_dynaddr __read_mostly;
992 991
993static int inet_sk_reselect_saddr(struct sock *sk) 992static int inet_sk_reselect_saddr(struct sock *sk)
994{ 993{
@@ -1074,6 +1073,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1074 }, 1073 },
1075 }; 1074 };
1076 1075
1076 security_sk_classify_flow(sk, &fl);
1077 err = ip_route_output_flow(&rt, &fl, sk, 0); 1077 err = ip_route_output_flow(&rt, &fl, sk, 0);
1078} 1078}
1079 if (!err) 1079 if (!err)
@@ -1254,10 +1254,7 @@ static int __init inet_init(void)
1254 struct list_head *r; 1254 struct list_head *r;
1255 int rc = -EINVAL; 1255 int rc = -EINVAL;
1256 1256
1257 if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { 1257 BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
1258 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
1259 goto out;
1260 }
1261 1258
1262 rc = proto_register(&tcp_prot, 1); 1259 rc = proto_register(&tcp_prot, 1);
1263 if (rc) 1260 if (rc)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 2b98943e6b02..99542977e47e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
35 switch (*optptr) { 35 switch (*optptr) {
36 case IPOPT_SEC: 36 case IPOPT_SEC:
37 case 0x85: /* Some "Extended Security" crap. */ 37 case 0x85: /* Some "Extended Security" crap. */
38 case 0x86: /* Another "Commercial Security" crap. */ 38 case IPOPT_CIPSO:
39 case IPOPT_RA: 39 case IPOPT_RA:
40 case 0x80|21: /* RFC1770 */ 40 case 0x80|21: /* RFC1770 */
41 break; 41 break;
@@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x)
265 goto error; 265 goto error;
266 266
267 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); 267 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
268 if (x->props.mode) 268 if (x->props.mode == XFRM_MODE_TUNNEL)
269 x->props.header_len += sizeof(struct iphdr); 269 x->props.header_len += sizeof(struct iphdr);
270 x->data = ahp; 270 x->data = ahp;
271 271
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
new file mode 100644
index 000000000000..80a2a0911b49
--- /dev/null
+++ b/net/ipv4/cipso_ipv4.c
@@ -0,0 +1,1607 @@
1/*
2 * CIPSO - Commercial IP Security Option
3 *
4 * This is an implementation of the CIPSO 2.2 protocol as specified in
5 * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
6 * FIPS-188, copies of both documents can be found in the Documentation
7 * directory. While CIPSO never became a full IETF RFC standard many vendors
8 * have chosen to adopt the protocol and over the years it has become a
9 * de-facto standard for labeled networking.
10 *
11 * Author: Paul Moore <paul.moore@hp.com>
12 *
13 */
14
15/*
16 * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
26 * the GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 *
32 */
33
34#include <linux/init.h>
35#include <linux/types.h>
36#include <linux/rcupdate.h>
37#include <linux/list.h>
38#include <linux/spinlock.h>
39#include <linux/string.h>
40#include <linux/jhash.h>
41#include <net/ip.h>
42#include <net/icmp.h>
43#include <net/tcp.h>
44#include <net/netlabel.h>
45#include <net/cipso_ipv4.h>
46#include <asm/bug.h>
47
48struct cipso_v4_domhsh_entry {
49 char *domain;
50 u32 valid;
51 struct list_head list;
52 struct rcu_head rcu;
53};
54
55/* List of available DOI definitions */
56/* XXX - Updates should be minimal so having a single lock for the
57 * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
58 * okay. */
59/* XXX - This currently assumes a minimal number of different DOIs in use,
60 * if in practice there are a lot of different DOIs this list should
61 * probably be turned into a hash table or something similar so we
62 * can do quick lookups. */
63static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
64static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
65
66/* Label mapping cache */
67int cipso_v4_cache_enabled = 1;
68int cipso_v4_cache_bucketsize = 10;
69#define CIPSO_V4_CACHE_BUCKETBITS 7
70#define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS)
71#define CIPSO_V4_CACHE_REORDERLIMIT 10
72struct cipso_v4_map_cache_bkt {
73 spinlock_t lock;
74 u32 size;
75 struct list_head list;
76};
77struct cipso_v4_map_cache_entry {
78 u32 hash;
79 unsigned char *key;
80 size_t key_len;
81
82 struct netlbl_lsm_cache lsm_data;
83
84 u32 activity;
85 struct list_head list;
86};
87static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
88
89/* Restricted bitmap (tag #1) flags */
90int cipso_v4_rbm_optfmt = 0;
91int cipso_v4_rbm_strictvalid = 1;
92
93/*
94 * Helper Functions
95 */
96
97/**
98 * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit
99 * @bitmap: the bitmap
100 * @bitmap_len: length in bits
101 * @offset: starting offset
102 * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit
103 *
104 * Description:
105 * Starting at @offset, walk the bitmap from left to right until either the
106 * desired bit is found or we reach the end. Return the bit offset, -1 if
107 * not found, or -2 if error.
108 */
109static int cipso_v4_bitmap_walk(const unsigned char *bitmap,
110 u32 bitmap_len,
111 u32 offset,
112 u8 state)
113{
114 u32 bit_spot;
115 u32 byte_offset;
116 unsigned char bitmask;
117 unsigned char byte;
118
119 /* gcc always rounds to zero when doing integer division */
120 byte_offset = offset / 8;
121 byte = bitmap[byte_offset];
122 bit_spot = offset;
123 bitmask = 0x80 >> (offset % 8);
124
125 while (bit_spot < bitmap_len) {
126 if ((state && (byte & bitmask) == bitmask) ||
127 (state == 0 && (byte & bitmask) == 0))
128 return bit_spot;
129
130 bit_spot++;
131 bitmask >>= 1;
132 if (bitmask == 0) {
133 byte = bitmap[++byte_offset];
134 bitmask = 0x80;
135 }
136 }
137
138 return -1;
139}
140
141/**
142 * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap
143 * @bitmap: the bitmap
144 * @bit: the bit
145 * @state: if non-zero, set the bit (1) else clear the bit (0)
146 *
147 * Description:
148 * Set a single bit in the bitmask. Returns zero on success, negative values
149 * on error.
150 */
151static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
152 u32 bit,
153 u8 state)
154{
155 u32 byte_spot;
156 u8 bitmask;
157
158 /* gcc always rounds to zero when doing integer division */
159 byte_spot = bit / 8;
160 bitmask = 0x80 >> (bit % 8);
161 if (state)
162 bitmap[byte_spot] |= bitmask;
163 else
164 bitmap[byte_spot] &= ~bitmask;
165}
166
167/**
168 * cipso_v4_doi_domhsh_free - Frees a domain list entry
169 * @entry: the entry's RCU field
170 *
171 * Description:
172 * This function is designed to be used as a callback to the call_rcu()
173 * function so that the memory allocated to a domain list entry can be released
174 * safely.
175 *
176 */
177static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
178{
179 struct cipso_v4_domhsh_entry *ptr;
180
181 ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
182 kfree(ptr->domain);
183 kfree(ptr);
184}
185
186/**
187 * cipso_v4_cache_entry_free - Frees a cache entry
188 * @entry: the entry to free
189 *
190 * Description:
191 * This function frees the memory associated with a cache entry.
192 *
193 */
194static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry)
195{
196 if (entry->lsm_data.free)
197 entry->lsm_data.free(entry->lsm_data.data);
198 kfree(entry->key);
199 kfree(entry);
200}
201
202/**
203 * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache
204 * @key: the hash key
205 * @key_len: the length of the key in bytes
206 *
207 * Description:
208 * The CIPSO tag hashing function. Returns a 32-bit hash value.
209 *
210 */
211static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
212{
213 return jhash(key, key_len, 0);
214}
215
216/*
217 * Label Mapping Cache Functions
218 */
219
220/**
221 * cipso_v4_cache_init - Initialize the CIPSO cache
222 *
223 * Description:
224 * Initializes the CIPSO label mapping cache, this function should be called
225 * before any of the other functions defined in this file. Returns zero on
226 * success, negative values on error.
227 *
228 */
229static int cipso_v4_cache_init(void)
230{
231 u32 iter;
232
233 cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS,
234 sizeof(struct cipso_v4_map_cache_bkt),
235 GFP_KERNEL);
236 if (cipso_v4_cache == NULL)
237 return -ENOMEM;
238
239 for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
240 spin_lock_init(&cipso_v4_cache[iter].lock);
241 cipso_v4_cache[iter].size = 0;
242 INIT_LIST_HEAD(&cipso_v4_cache[iter].list);
243 }
244
245 return 0;
246}
247
248/**
249 * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
250 *
251 * Description:
252 * Invalidates and frees any entries in the CIPSO cache. Returns zero on
253 * success and negative values on failure.
254 *
255 */
256void cipso_v4_cache_invalidate(void)
257{
258 struct cipso_v4_map_cache_entry *entry, *tmp_entry;
259 u32 iter;
260
261 for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
262 spin_lock(&cipso_v4_cache[iter].lock);
263 list_for_each_entry_safe(entry,
264 tmp_entry,
265 &cipso_v4_cache[iter].list, list) {
266 list_del(&entry->list);
267 cipso_v4_cache_entry_free(entry);
268 }
269 cipso_v4_cache[iter].size = 0;
270 spin_unlock(&cipso_v4_cache[iter].lock);
271 }
272
273 return;
274}
275
276/**
277 * cipso_v4_cache_check - Check the CIPSO cache for a label mapping
278 * @key: the buffer to check
279 * @key_len: buffer length in bytes
280 * @secattr: the security attribute struct to use
281 *
282 * Description:
283 * This function checks the cache to see if a label mapping already exists for
284 * the given key. If there is a match then the cache is adjusted and the
285 * @secattr struct is populated with the correct LSM security attributes. The
286 * cache is adjusted in the following manner if the entry is not already the
287 * first in the cache bucket:
288 *
289 * 1. The cache entry's activity counter is incremented
290 * 2. The previous (higher ranking) entry's activity counter is decremented
291 * 3. If the difference between the two activity counters is geater than
292 * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped
293 *
294 * Returns zero on success, -ENOENT for a cache miss, and other negative values
295 * on error.
296 *
297 */
298static int cipso_v4_cache_check(const unsigned char *key,
299 u32 key_len,
300 struct netlbl_lsm_secattr *secattr)
301{
302 u32 bkt;
303 struct cipso_v4_map_cache_entry *entry;
304 struct cipso_v4_map_cache_entry *prev_entry = NULL;
305 u32 hash;
306
307 if (!cipso_v4_cache_enabled)
308 return -ENOENT;
309
310 hash = cipso_v4_map_cache_hash(key, key_len);
311 bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
312 spin_lock(&cipso_v4_cache[bkt].lock);
313 list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
314 if (entry->hash == hash &&
315 entry->key_len == key_len &&
316 memcmp(entry->key, key, key_len) == 0) {
317 entry->activity += 1;
318 secattr->cache.free = entry->lsm_data.free;
319 secattr->cache.data = entry->lsm_data.data;
320 if (prev_entry == NULL) {
321 spin_unlock(&cipso_v4_cache[bkt].lock);
322 return 0;
323 }
324
325 if (prev_entry->activity > 0)
326 prev_entry->activity -= 1;
327 if (entry->activity > prev_entry->activity &&
328 entry->activity - prev_entry->activity >
329 CIPSO_V4_CACHE_REORDERLIMIT) {
330 __list_del(entry->list.prev, entry->list.next);
331 __list_add(&entry->list,
332 prev_entry->list.prev,
333 &prev_entry->list);
334 }
335
336 spin_unlock(&cipso_v4_cache[bkt].lock);
337 return 0;
338 }
339 prev_entry = entry;
340 }
341 spin_unlock(&cipso_v4_cache[bkt].lock);
342
343 return -ENOENT;
344}
345
346/**
347 * cipso_v4_cache_add - Add an entry to the CIPSO cache
348 * @skb: the packet
349 * @secattr: the packet's security attributes
350 *
351 * Description:
352 * Add a new entry into the CIPSO label mapping cache. Add the new entry to
353 * head of the cache bucket's list, if the cache bucket is out of room remove
354 * the last entry in the list first. It is important to note that there is
355 * currently no checking for duplicate keys. Returns zero on success,
356 * negative values on failure.
357 *
358 */
359int cipso_v4_cache_add(const struct sk_buff *skb,
360 const struct netlbl_lsm_secattr *secattr)
361{
362 int ret_val = -EPERM;
363 u32 bkt;
364 struct cipso_v4_map_cache_entry *entry = NULL;
365 struct cipso_v4_map_cache_entry *old_entry = NULL;
366 unsigned char *cipso_ptr;
367 u32 cipso_ptr_len;
368
369 if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
370 return 0;
371
372 cipso_ptr = CIPSO_V4_OPTPTR(skb);
373 cipso_ptr_len = cipso_ptr[1];
374
375 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
376 if (entry == NULL)
377 return -ENOMEM;
378 entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC);
379 if (entry->key == NULL) {
380 ret_val = -ENOMEM;
381 goto cache_add_failure;
382 }
383 memcpy(entry->key, cipso_ptr, cipso_ptr_len);
384 entry->key_len = cipso_ptr_len;
385 entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
386 entry->lsm_data.free = secattr->cache.free;
387 entry->lsm_data.data = secattr->cache.data;
388
389 bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
390 spin_lock(&cipso_v4_cache[bkt].lock);
391 if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
392 list_add(&entry->list, &cipso_v4_cache[bkt].list);
393 cipso_v4_cache[bkt].size += 1;
394 } else {
395 old_entry = list_entry(cipso_v4_cache[bkt].list.prev,
396 struct cipso_v4_map_cache_entry, list);
397 list_del(&old_entry->list);
398 list_add(&entry->list, &cipso_v4_cache[bkt].list);
399 cipso_v4_cache_entry_free(old_entry);
400 }
401 spin_unlock(&cipso_v4_cache[bkt].lock);
402
403 return 0;
404
405cache_add_failure:
406 if (entry)
407 cipso_v4_cache_entry_free(entry);
408 return ret_val;
409}
410
411/*
412 * DOI List Functions
413 */
414
415/**
416 * cipso_v4_doi_search - Searches for a DOI definition
417 * @doi: the DOI to search for
418 *
419 * Description:
420 * Search the DOI definition list for a DOI definition with a DOI value that
421 * matches @doi. The caller is responsibile for calling rcu_read_[un]lock().
422 * Returns a pointer to the DOI definition on success and NULL on failure.
423 */
424static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
425{
426 struct cipso_v4_doi *iter;
427
428 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
429 if (iter->doi == doi && iter->valid)
430 return iter;
431 return NULL;
432}
433
434/**
435 * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine
436 * @doi_def: the DOI structure
437 *
438 * Description:
439 * The caller defines a new DOI for use by the CIPSO engine and calls this
440 * function to add it to the list of acceptable domains. The caller must
441 * ensure that the mapping table specified in @doi_def->map meets all of the
442 * requirements of the mapping type (see cipso_ipv4.h for details). Returns
443 * zero on success and non-zero on failure.
444 *
445 */
446int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
447{
448 if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
449 return -EINVAL;
450
451 doi_def->valid = 1;
452 INIT_RCU_HEAD(&doi_def->rcu);
453 INIT_LIST_HEAD(&doi_def->dom_list);
454
455 rcu_read_lock();
456 if (cipso_v4_doi_search(doi_def->doi) != NULL)
457 goto doi_add_failure_rlock;
458 spin_lock(&cipso_v4_doi_list_lock);
459 if (cipso_v4_doi_search(doi_def->doi) != NULL)
460 goto doi_add_failure_slock;
461 list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list);
462 spin_unlock(&cipso_v4_doi_list_lock);
463 rcu_read_unlock();
464
465 return 0;
466
467doi_add_failure_slock:
468 spin_unlock(&cipso_v4_doi_list_lock);
469doi_add_failure_rlock:
470 rcu_read_unlock();
471 return -EEXIST;
472}
473
474/**
475 * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
476 * @doi: the DOI value
477 * @callback: the DOI cleanup/free callback
478 *
479 * Description:
480 * Removes a DOI definition from the CIPSO engine, @callback is called to
481 * free any memory. The NetLabel routines will be called to release their own
482 * LSM domain mappings as well as our own domain list. Returns zero on
483 * success and negative values on failure.
484 *
485 */
486int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head))
487{
488 struct cipso_v4_doi *doi_def;
489 struct cipso_v4_domhsh_entry *dom_iter;
490
491 rcu_read_lock();
492 if (cipso_v4_doi_search(doi) != NULL) {
493 spin_lock(&cipso_v4_doi_list_lock);
494 doi_def = cipso_v4_doi_search(doi);
495 if (doi_def == NULL) {
496 spin_unlock(&cipso_v4_doi_list_lock);
497 rcu_read_unlock();
498 return -ENOENT;
499 }
500 doi_def->valid = 0;
501 list_del_rcu(&doi_def->list);
502 spin_unlock(&cipso_v4_doi_list_lock);
503 list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
504 if (dom_iter->valid)
505 netlbl_domhsh_remove(dom_iter->domain);
506 cipso_v4_cache_invalidate();
507 rcu_read_unlock();
508
509 call_rcu(&doi_def->rcu, callback);
510 return 0;
511 }
512 rcu_read_unlock();
513
514 return -ENOENT;
515}
516
517/**
518 * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
519 * @doi: the DOI value
520 *
521 * Description:
522 * Searches for a valid DOI definition and if one is found it is returned to
523 * the caller. Otherwise NULL is returned. The caller must ensure that
524 * rcu_read_lock() is held while accessing the returned definition.
525 *
526 */
527struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
528{
529 return cipso_v4_doi_search(doi);
530}
531
532/**
533 * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff
534 * @headroom: the amount of headroom to allocate for the sk_buff
535 *
536 * Description:
537 * Dump a list of all the configured DOI values into a sk_buff. The returned
538 * sk_buff has room at the front of the sk_buff for @headroom bytes. See
539 * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This
540 * function may fail if another process is changing the DOI list at the same
541 * time. Returns a pointer to a sk_buff on success, NULL on error.
542 *
543 */
544struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
545{
546 struct sk_buff *skb = NULL;
547 struct cipso_v4_doi *iter;
548 u32 doi_cnt = 0;
549 ssize_t buf_len;
550
551 buf_len = NETLBL_LEN_U32;
552 rcu_read_lock();
553 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
554 if (iter->valid) {
555 doi_cnt += 1;
556 buf_len += 2 * NETLBL_LEN_U32;
557 }
558
559 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
560 if (skb == NULL)
561 goto doi_dump_all_failure;
562
563 if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0)
564 goto doi_dump_all_failure;
565 buf_len -= NETLBL_LEN_U32;
566 list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
567 if (iter->valid) {
568 if (buf_len < 2 * NETLBL_LEN_U32)
569 goto doi_dump_all_failure;
570 if (nla_put_u32(skb, NLA_U32, iter->doi) != 0)
571 goto doi_dump_all_failure;
572 if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
573 goto doi_dump_all_failure;
574 buf_len -= 2 * NETLBL_LEN_U32;
575 }
576 rcu_read_unlock();
577
578 return skb;
579
580doi_dump_all_failure:
581 rcu_read_unlock();
582 kfree(skb);
583 return NULL;
584}
585
586/**
587 * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff
588 * @doi: the DOI value
589 * @headroom: the amount of headroom to allocate for the sk_buff
590 *
591 * Description:
592 * Lookup the DOI definition matching @doi and dump it's contents into a
593 * sk_buff. The returned sk_buff has room at the front of the sk_buff for
594 * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message
595 * format. This function may fail if another process is changing the DOI list
596 * at the same time. Returns a pointer to a sk_buff on success, NULL on error.
597 *
598 */
599struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
600{
601 struct sk_buff *skb = NULL;
602 struct cipso_v4_doi *iter;
603 u32 tag_cnt = 0;
604 u32 lvl_cnt = 0;
605 u32 cat_cnt = 0;
606 ssize_t buf_len;
607 ssize_t tmp;
608
609 rcu_read_lock();
610 iter = cipso_v4_doi_getdef(doi);
611 if (iter == NULL)
612 goto doi_dump_failure;
613 buf_len = NETLBL_LEN_U32;
614 switch (iter->type) {
615 case CIPSO_V4_MAP_PASS:
616 buf_len += NETLBL_LEN_U32;
617 while(tag_cnt < CIPSO_V4_TAG_MAXCNT &&
618 iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
619 tag_cnt += 1;
620 buf_len += NETLBL_LEN_U8;
621 }
622 break;
623 case CIPSO_V4_MAP_STD:
624 buf_len += 3 * NETLBL_LEN_U32;
625 while (tag_cnt < CIPSO_V4_TAG_MAXCNT &&
626 iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
627 tag_cnt += 1;
628 buf_len += NETLBL_LEN_U8;
629 }
630 for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
631 if (iter->map.std->lvl.local[tmp] !=
632 CIPSO_V4_INV_LVL) {
633 lvl_cnt += 1;
634 buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8;
635 }
636 for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
637 if (iter->map.std->cat.local[tmp] !=
638 CIPSO_V4_INV_CAT) {
639 cat_cnt += 1;
640 buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16;
641 }
642 break;
643 }
644
645 skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
646 if (skb == NULL)
647 goto doi_dump_failure;
648
649 if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
650 goto doi_dump_failure;
651 buf_len -= NETLBL_LEN_U32;
652 if (iter != cipso_v4_doi_getdef(doi))
653 goto doi_dump_failure;
654 switch (iter->type) {
655 case CIPSO_V4_MAP_PASS:
656 if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
657 goto doi_dump_failure;
658 buf_len -= NETLBL_LEN_U32;
659 for (tmp = 0;
660 tmp < CIPSO_V4_TAG_MAXCNT &&
661 iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
662 tmp++) {
663 if (buf_len < NETLBL_LEN_U8)
664 goto doi_dump_failure;
665 if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
666 goto doi_dump_failure;
667 buf_len -= NETLBL_LEN_U8;
668 }
669 break;
670 case CIPSO_V4_MAP_STD:
671 if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
672 goto doi_dump_failure;
673 if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0)
674 goto doi_dump_failure;
675 if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0)
676 goto doi_dump_failure;
677 buf_len -= 3 * NETLBL_LEN_U32;
678 for (tmp = 0;
679 tmp < CIPSO_V4_TAG_MAXCNT &&
680 iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
681 tmp++) {
682 if (buf_len < NETLBL_LEN_U8)
683 goto doi_dump_failure;
684 if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
685 goto doi_dump_failure;
686 buf_len -= NETLBL_LEN_U8;
687 }
688 for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
689 if (iter->map.std->lvl.local[tmp] !=
690 CIPSO_V4_INV_LVL) {
691 if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8)
692 goto doi_dump_failure;
693 if (nla_put_u32(skb, NLA_U32, tmp) != 0)
694 goto doi_dump_failure;
695 if (nla_put_u8(skb,
696 NLA_U8,
697 iter->map.std->lvl.local[tmp]) != 0)
698 goto doi_dump_failure;
699 buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8;
700 }
701 for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
702 if (iter->map.std->cat.local[tmp] !=
703 CIPSO_V4_INV_CAT) {
704 if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16)
705 goto doi_dump_failure;
706 if (nla_put_u32(skb, NLA_U32, tmp) != 0)
707 goto doi_dump_failure;
708 if (nla_put_u16(skb,
709 NLA_U16,
710 iter->map.std->cat.local[tmp]) != 0)
711 goto doi_dump_failure;
712 buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16;
713 }
714 break;
715 }
716 rcu_read_unlock();
717
718 return skb;
719
720doi_dump_failure:
721 rcu_read_unlock();
722 kfree(skb);
723 return NULL;
724}
725
726/**
727 * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
728 * @doi_def: the DOI definition
729 * @domain: the domain to add
730 *
731 * Description:
732 * Adds the @domain to the the DOI specified by @doi_def, this function
733 * should only be called by external functions (i.e. NetLabel). This function
734 * does allocate memory. Returns zero on success, negative values on failure.
735 *
736 */
737int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
738{
739 struct cipso_v4_domhsh_entry *iter;
740 struct cipso_v4_domhsh_entry *new_dom;
741
742 new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
743 if (new_dom == NULL)
744 return -ENOMEM;
745 if (domain) {
746 new_dom->domain = kstrdup(domain, GFP_KERNEL);
747 if (new_dom->domain == NULL) {
748 kfree(new_dom);
749 return -ENOMEM;
750 }
751 }
752 new_dom->valid = 1;
753 INIT_RCU_HEAD(&new_dom->rcu);
754
755 rcu_read_lock();
756 spin_lock(&cipso_v4_doi_list_lock);
757 list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
758 if (iter->valid &&
759 ((domain != NULL && iter->domain != NULL &&
760 strcmp(iter->domain, domain) == 0) ||
761 (domain == NULL && iter->domain == NULL))) {
762 spin_unlock(&cipso_v4_doi_list_lock);
763 rcu_read_unlock();
764 kfree(new_dom->domain);
765 kfree(new_dom);
766 return -EEXIST;
767 }
768 list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
769 spin_unlock(&cipso_v4_doi_list_lock);
770 rcu_read_unlock();
771
772 return 0;
773}
774
775/**
776 * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
777 * @doi_def: the DOI definition
778 * @domain: the domain to remove
779 *
780 * Description:
781 * Removes the @domain from the DOI specified by @doi_def, this function
782 * should only be called by external functions (i.e. NetLabel). Returns zero
783 * on success and negative values on error.
784 *
785 */
786int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
787 const char *domain)
788{
789 struct cipso_v4_domhsh_entry *iter;
790
791 rcu_read_lock();
792 spin_lock(&cipso_v4_doi_list_lock);
793 list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
794 if (iter->valid &&
795 ((domain != NULL && iter->domain != NULL &&
796 strcmp(iter->domain, domain) == 0) ||
797 (domain == NULL && iter->domain == NULL))) {
798 iter->valid = 0;
799 list_del_rcu(&iter->list);
800 spin_unlock(&cipso_v4_doi_list_lock);
801 rcu_read_unlock();
802 call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
803
804 return 0;
805 }
806 spin_unlock(&cipso_v4_doi_list_lock);
807 rcu_read_unlock();
808
809 return -ENOENT;
810}
811
812/*
813 * Label Mapping Functions
814 */
815
816/**
817 * cipso_v4_map_lvl_valid - Checks to see if the given level is understood
818 * @doi_def: the DOI definition
819 * @level: the level to check
820 *
821 * Description:
822 * Checks the given level against the given DOI definition and returns a
823 * negative value if the level does not have a valid mapping and a zero value
824 * if the level is defined by the DOI.
825 *
826 */
827static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
828{
829 switch (doi_def->type) {
830 case CIPSO_V4_MAP_PASS:
831 return 0;
832 case CIPSO_V4_MAP_STD:
833 if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
834 return 0;
835 break;
836 }
837
838 return -EFAULT;
839}
840
841/**
842 * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network
843 * @doi_def: the DOI definition
844 * @host_lvl: the host MLS level
845 * @net_lvl: the network/CIPSO MLS level
846 *
847 * Description:
848 * Perform a label mapping to translate a local MLS level to the correct
849 * CIPSO level using the given DOI definition. Returns zero on success,
850 * negative values otherwise.
851 *
852 */
853static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
854 u32 host_lvl,
855 u32 *net_lvl)
856{
857 switch (doi_def->type) {
858 case CIPSO_V4_MAP_PASS:
859 *net_lvl = host_lvl;
860 return 0;
861 case CIPSO_V4_MAP_STD:
862 if (host_lvl < doi_def->map.std->lvl.local_size) {
863 *net_lvl = doi_def->map.std->lvl.local[host_lvl];
864 return 0;
865 }
866 break;
867 }
868
869 return -EINVAL;
870}
871
872/**
873 * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host
874 * @doi_def: the DOI definition
875 * @net_lvl: the network/CIPSO MLS level
876 * @host_lvl: the host MLS level
877 *
878 * Description:
879 * Perform a label mapping to translate a CIPSO level to the correct local MLS
880 * level using the given DOI definition. Returns zero on success, negative
881 * values otherwise.
882 *
883 */
884static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
885 u32 net_lvl,
886 u32 *host_lvl)
887{
888 struct cipso_v4_std_map_tbl *map_tbl;
889
890 switch (doi_def->type) {
891 case CIPSO_V4_MAP_PASS:
892 *host_lvl = net_lvl;
893 return 0;
894 case CIPSO_V4_MAP_STD:
895 map_tbl = doi_def->map.std;
896 if (net_lvl < map_tbl->lvl.cipso_size &&
897 map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
898 *host_lvl = doi_def->map.std->lvl.cipso[net_lvl];
899 return 0;
900 }
901 break;
902 }
903
904 return -EINVAL;
905}
906
907/**
908 * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid
909 * @doi_def: the DOI definition
910 * @bitmap: category bitmap
911 * @bitmap_len: bitmap length in bytes
912 *
913 * Description:
914 * Checks the given category bitmap against the given DOI definition and
915 * returns a negative value if any of the categories in the bitmap do not have
916 * a valid mapping and a zero value if all of the categories are valid.
917 *
918 */
919static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
920 const unsigned char *bitmap,
921 u32 bitmap_len)
922{
923 int cat = -1;
924 u32 bitmap_len_bits = bitmap_len * 8;
925 u32 cipso_cat_size = doi_def->map.std->cat.cipso_size;
926 u32 *cipso_array = doi_def->map.std->cat.cipso;
927
928 switch (doi_def->type) {
929 case CIPSO_V4_MAP_PASS:
930 return 0;
931 case CIPSO_V4_MAP_STD:
932 for (;;) {
933 cat = cipso_v4_bitmap_walk(bitmap,
934 bitmap_len_bits,
935 cat + 1,
936 1);
937 if (cat < 0)
938 break;
939 if (cat >= cipso_cat_size ||
940 cipso_array[cat] >= CIPSO_V4_INV_CAT)
941 return -EFAULT;
942 }
943
944 if (cat == -1)
945 return 0;
946 break;
947 }
948
949 return -EFAULT;
950}
951
952/**
953 * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network
954 * @doi_def: the DOI definition
955 * @host_cat: the category bitmap in host format
956 * @host_cat_len: the length of the host's category bitmap in bytes
957 * @net_cat: the zero'd out category bitmap in network/CIPSO format
958 * @net_cat_len: the length of the CIPSO bitmap in bytes
959 *
960 * Description:
961 * Perform a label mapping to translate a local MLS category bitmap to the
962 * correct CIPSO bitmap using the given DOI definition. Returns the minimum
963 * size in bytes of the network bitmap on success, negative values otherwise.
964 *
965 */
966static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
967 const unsigned char *host_cat,
968 u32 host_cat_len,
969 unsigned char *net_cat,
970 u32 net_cat_len)
971{
972 int host_spot = -1;
973 u32 net_spot;
974 u32 net_spot_max = 0;
975 u32 host_clen_bits = host_cat_len * 8;
976 u32 net_clen_bits = net_cat_len * 8;
977 u32 host_cat_size = doi_def->map.std->cat.local_size;
978 u32 *host_cat_array = doi_def->map.std->cat.local;
979
980 switch (doi_def->type) {
981 case CIPSO_V4_MAP_PASS:
982 net_spot_max = host_cat_len - 1;
983 while (net_spot_max > 0 && host_cat[net_spot_max] == 0)
984 net_spot_max--;
985 if (net_spot_max > net_cat_len)
986 return -EINVAL;
987 memcpy(net_cat, host_cat, net_spot_max);
988 return net_spot_max;
989 case CIPSO_V4_MAP_STD:
990 for (;;) {
991 host_spot = cipso_v4_bitmap_walk(host_cat,
992 host_clen_bits,
993 host_spot + 1,
994 1);
995 if (host_spot < 0)
996 break;
997 if (host_spot >= host_cat_size)
998 return -EPERM;
999
1000 net_spot = host_cat_array[host_spot];
1001 if (net_spot >= net_clen_bits)
1002 return -ENOSPC;
1003 cipso_v4_bitmap_setbit(net_cat, net_spot, 1);
1004
1005 if (net_spot > net_spot_max)
1006 net_spot_max = net_spot;
1007 }
1008
1009 if (host_spot == -2)
1010 return -EFAULT;
1011
1012 if (++net_spot_max % 8)
1013 return net_spot_max / 8 + 1;
1014 return net_spot_max / 8;
1015 }
1016
1017 return -EINVAL;
1018}
1019
1020/**
1021 * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host
1022 * @doi_def: the DOI definition
1023 * @net_cat: the category bitmap in network/CIPSO format
1024 * @net_cat_len: the length of the CIPSO bitmap in bytes
1025 * @host_cat: the zero'd out category bitmap in host format
1026 * @host_cat_len: the length of the host's category bitmap in bytes
1027 *
1028 * Description:
1029 * Perform a label mapping to translate a CIPSO bitmap to the correct local
1030 * MLS category bitmap using the given DOI definition. Returns the minimum
1031 * size in bytes of the host bitmap on success, negative values otherwise.
1032 *
1033 */
1034static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
1035 const unsigned char *net_cat,
1036 u32 net_cat_len,
1037 unsigned char *host_cat,
1038 u32 host_cat_len)
1039{
1040 u32 host_spot;
1041 u32 host_spot_max = 0;
1042 int net_spot = -1;
1043 u32 net_clen_bits = net_cat_len * 8;
1044 u32 host_clen_bits = host_cat_len * 8;
1045 u32 net_cat_size = doi_def->map.std->cat.cipso_size;
1046 u32 *net_cat_array = doi_def->map.std->cat.cipso;
1047
1048 switch (doi_def->type) {
1049 case CIPSO_V4_MAP_PASS:
1050 if (net_cat_len > host_cat_len)
1051 return -EINVAL;
1052 memcpy(host_cat, net_cat, net_cat_len);
1053 return net_cat_len;
1054 case CIPSO_V4_MAP_STD:
1055 for (;;) {
1056 net_spot = cipso_v4_bitmap_walk(net_cat,
1057 net_clen_bits,
1058 net_spot + 1,
1059 1);
1060 if (net_spot < 0)
1061 break;
1062 if (net_spot >= net_cat_size ||
1063 net_cat_array[net_spot] >= CIPSO_V4_INV_CAT)
1064 return -EPERM;
1065
1066 host_spot = net_cat_array[net_spot];
1067 if (host_spot >= host_clen_bits)
1068 return -ENOSPC;
1069 cipso_v4_bitmap_setbit(host_cat, host_spot, 1);
1070
1071 if (host_spot > host_spot_max)
1072 host_spot_max = host_spot;
1073 }
1074
1075 if (net_spot == -2)
1076 return -EFAULT;
1077
1078 if (++host_spot_max % 8)
1079 return host_spot_max / 8 + 1;
1080 return host_spot_max / 8;
1081 }
1082
1083 return -EINVAL;
1084}
1085
1086/*
1087 * Protocol Handling Functions
1088 */
1089
1090#define CIPSO_V4_HDR_LEN 6
1091
1092/**
1093 * cipso_v4_gentag_hdr - Generate a CIPSO option header
1094 * @doi_def: the DOI definition
1095 * @len: the total tag length in bytes
1096 * @buf: the CIPSO option buffer
1097 *
1098 * Description:
1099 * Write a CIPSO header into the beginning of @buffer. Return zero on success,
1100 * negative values on failure.
1101 *
1102 */
1103static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
1104 u32 len,
1105 unsigned char *buf)
1106{
1107 if (CIPSO_V4_HDR_LEN + len > 40)
1108 return -ENOSPC;
1109
1110 buf[0] = IPOPT_CIPSO;
1111 buf[1] = CIPSO_V4_HDR_LEN + len;
1112 *(u32 *)&buf[2] = htonl(doi_def->doi);
1113
1114 return 0;
1115}
1116
1117#define CIPSO_V4_TAG1_CAT_LEN 30
1118
1119/**
1120 * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1)
1121 * @doi_def: the DOI definition
1122 * @secattr: the security attributes
1123 * @buffer: the option buffer
1124 * @buffer_len: length of buffer in bytes
1125 *
1126 * Description:
1127 * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The
1128 * actual buffer length may be larger than the indicated size due to
1129 * translation between host and network category bitmaps. Returns zero on
1130 * success, negative values on failure.
1131 *
1132 */
1133static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
1134 const struct netlbl_lsm_secattr *secattr,
1135 unsigned char **buffer,
1136 u32 *buffer_len)
1137{
1138 int ret_val = -EPERM;
1139 unsigned char *buf = NULL;
1140 u32 buf_len;
1141 u32 level;
1142
1143 if (secattr->mls_cat) {
1144 buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN,
1145 GFP_ATOMIC);
1146 if (buf == NULL)
1147 return -ENOMEM;
1148
1149 ret_val = cipso_v4_map_cat_rbm_hton(doi_def,
1150 secattr->mls_cat,
1151 secattr->mls_cat_len,
1152 &buf[CIPSO_V4_HDR_LEN + 4],
1153 CIPSO_V4_TAG1_CAT_LEN);
1154 if (ret_val < 0)
1155 goto gentag_failure;
1156
1157 /* This will send packets using the "optimized" format when
1158 * possibile as specified in section 3.4.2.6 of the
1159 * CIPSO draft. */
1160 if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10))
1161 ret_val = 10;
1162
1163 buf_len = 4 + ret_val;
1164 } else {
1165 buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC);
1166 if (buf == NULL)
1167 return -ENOMEM;
1168 buf_len = 4;
1169 }
1170
1171 ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
1172 if (ret_val != 0)
1173 goto gentag_failure;
1174
1175 ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf);
1176 if (ret_val != 0)
1177 goto gentag_failure;
1178
1179 buf[CIPSO_V4_HDR_LEN] = 0x01;
1180 buf[CIPSO_V4_HDR_LEN + 1] = buf_len;
1181 buf[CIPSO_V4_HDR_LEN + 3] = level;
1182
1183 *buffer = buf;
1184 *buffer_len = CIPSO_V4_HDR_LEN + buf_len;
1185
1186 return 0;
1187
1188gentag_failure:
1189 kfree(buf);
1190 return ret_val;
1191}
1192
1193/**
1194 * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag
1195 * @doi_def: the DOI definition
1196 * @tag: the CIPSO tag
1197 * @secattr: the security attributes
1198 *
1199 * Description:
1200 * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security
1201 * attributes in @secattr. Return zero on success, negatives values on
1202 * failure.
1203 *
1204 */
1205static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
1206 const unsigned char *tag,
1207 struct netlbl_lsm_secattr *secattr)
1208{
1209 int ret_val;
1210 u8 tag_len = tag[1];
1211 u32 level;
1212
1213 ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
1214 if (ret_val != 0)
1215 return ret_val;
1216 secattr->mls_lvl = level;
1217 secattr->mls_lvl_vld = 1;
1218
1219 if (tag_len > 4) {
1220 switch (doi_def->type) {
1221 case CIPSO_V4_MAP_PASS:
1222 secattr->mls_cat_len = tag_len - 4;
1223 break;
1224 case CIPSO_V4_MAP_STD:
1225 secattr->mls_cat_len =
1226 doi_def->map.std->cat.local_size;
1227 break;
1228 }
1229 secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC);
1230 if (secattr->mls_cat == NULL)
1231 return -ENOMEM;
1232
1233 ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
1234 &tag[4],
1235 tag_len - 4,
1236 secattr->mls_cat,
1237 secattr->mls_cat_len);
1238 if (ret_val < 0) {
1239 kfree(secattr->mls_cat);
1240 return ret_val;
1241 }
1242 secattr->mls_cat_len = ret_val;
1243 }
1244
1245 return 0;
1246}
1247
1248/**
1249 * cipso_v4_validate - Validate a CIPSO option
1250 * @option: the start of the option, on error it is set to point to the error
1251 *
1252 * Description:
1253 * This routine is called to validate a CIPSO option, it checks all of the
1254 * fields to ensure that they are at least valid, see the draft snippet below
1255 * for details. If the option is valid then a zero value is returned and
1256 * the value of @option is unchanged. If the option is invalid then a
1257 * non-zero value is returned and @option is adjusted to point to the
1258 * offending portion of the option. From the IETF draft ...
1259 *
1260 * "If any field within the CIPSO options, such as the DOI identifier, is not
1261 * recognized the IP datagram is discarded and an ICMP 'parameter problem'
1262 * (type 12) is generated and returned. The ICMP code field is set to 'bad
1263 * parameter' (code 0) and the pointer is set to the start of the CIPSO field
1264 * that is unrecognized."
1265 *
1266 */
1267int cipso_v4_validate(unsigned char **option)
1268{
1269 unsigned char *opt = *option;
1270 unsigned char *tag;
1271 unsigned char opt_iter;
1272 unsigned char err_offset = 0;
1273 u8 opt_len;
1274 u8 tag_len;
1275 struct cipso_v4_doi *doi_def = NULL;
1276 u32 tag_iter;
1277
1278 /* caller already checks for length values that are too large */
1279 opt_len = opt[1];
1280 if (opt_len < 8) {
1281 err_offset = 1;
1282 goto validate_return;
1283 }
1284
1285 rcu_read_lock();
1286 doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2])));
1287 if (doi_def == NULL) {
1288 err_offset = 2;
1289 goto validate_return_locked;
1290 }
1291
1292 opt_iter = 6;
1293 tag = opt + opt_iter;
1294 while (opt_iter < opt_len) {
1295 for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
1296 if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID ||
1297 ++tag_iter == CIPSO_V4_TAG_MAXCNT) {
1298 err_offset = opt_iter;
1299 goto validate_return_locked;
1300 }
1301
1302 tag_len = tag[1];
1303 if (tag_len > (opt_len - opt_iter)) {
1304 err_offset = opt_iter + 1;
1305 goto validate_return_locked;
1306 }
1307
1308 switch (tag[0]) {
1309 case CIPSO_V4_TAG_RBITMAP:
1310 if (tag_len < 4) {
1311 err_offset = opt_iter + 1;
1312 goto validate_return_locked;
1313 }
1314
1315 /* We are already going to do all the verification
1316 * necessary at the socket layer so from our point of
1317 * view it is safe to turn these checks off (and less
1318 * work), however, the CIPSO draft says we should do
1319 * all the CIPSO validations here but it doesn't
1320 * really specify _exactly_ what we need to validate
1321 * ... so, just make it a sysctl tunable. */
1322 if (cipso_v4_rbm_strictvalid) {
1323 if (cipso_v4_map_lvl_valid(doi_def,
1324 tag[3]) < 0) {
1325 err_offset = opt_iter + 3;
1326 goto validate_return_locked;
1327 }
1328 if (tag_len > 4 &&
1329 cipso_v4_map_cat_rbm_valid(doi_def,
1330 &tag[4],
1331 tag_len - 4) < 0) {
1332 err_offset = opt_iter + 4;
1333 goto validate_return_locked;
1334 }
1335 }
1336 break;
1337 default:
1338 err_offset = opt_iter;
1339 goto validate_return_locked;
1340 }
1341
1342 tag += tag_len;
1343 opt_iter += tag_len;
1344 }
1345
1346validate_return_locked:
1347 rcu_read_unlock();
1348validate_return:
1349 *option = opt + err_offset;
1350 return err_offset;
1351}
1352
1353/**
1354 * cipso_v4_error - Send the correct reponse for a bad packet
1355 * @skb: the packet
1356 * @error: the error code
1357 * @gateway: CIPSO gateway flag
1358 *
1359 * Description:
1360 * Based on the error code given in @error, send an ICMP error message back to
1361 * the originating host. From the IETF draft ...
1362 *
1363 * "If the contents of the CIPSO [option] are valid but the security label is
1364 * outside of the configured host or port label range, the datagram is
1365 * discarded and an ICMP 'destination unreachable' (type 3) is generated and
1366 * returned. The code field of the ICMP is set to 'communication with
1367 * destination network administratively prohibited' (code 9) or to
1368 * 'communication with destination host administratively prohibited'
1369 * (code 10). The value of the code is dependent on whether the originator
1370 * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The
1371 * recipient of the ICMP message MUST be able to handle either value. The
1372 * same procedure is performed if a CIPSO [option] can not be added to an
1373 * IP packet because it is too large to fit in the IP options area."
1374 *
1375 * "If the error is triggered by receipt of an ICMP message, the message is
1376 * discarded and no response is permitted (consistent with general ICMP
1377 * processing rules)."
1378 *
1379 */
1380void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
1381{
1382 if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
1383 return;
1384
1385 if (gateway)
1386 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
1387 else
1388 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
1389}
1390
1391/**
1392 * cipso_v4_socket_setattr - Add a CIPSO option to a socket
1393 * @sock: the socket
1394 * @doi_def: the CIPSO DOI to use
1395 * @secattr: the specific security attributes of the socket
1396 *
1397 * Description:
1398 * Set the CIPSO option on the given socket using the DOI definition and
1399 * security attributes passed to the function. This function requires
1400 * exclusive access to @sock->sk, which means it either needs to be in the
1401 * process of being created or locked via lock_sock(sock->sk). Returns zero on
1402 * success and negative values on failure.
1403 *
1404 */
1405int cipso_v4_socket_setattr(const struct socket *sock,
1406 const struct cipso_v4_doi *doi_def,
1407 const struct netlbl_lsm_secattr *secattr)
1408{
1409 int ret_val = -EPERM;
1410 u32 iter;
1411 unsigned char *buf = NULL;
1412 u32 buf_len = 0;
1413 u32 opt_len;
1414 struct ip_options *opt = NULL;
1415 struct sock *sk;
1416 struct inet_sock *sk_inet;
1417 struct inet_connection_sock *sk_conn;
1418
1419 /* In the case of sock_create_lite(), the sock->sk field is not
1420 * defined yet but it is not a problem as the only users of these
1421 * "lite" PF_INET sockets are functions which do an accept() call
1422 * afterwards so we will label the socket as part of the accept(). */
1423 sk = sock->sk;
1424 if (sk == NULL)
1425 return 0;
1426
1427 /* XXX - This code assumes only one tag per CIPSO option which isn't
1428 * really a good assumption to make but since we only support the MAC
1429 * tags right now it is a safe assumption. */
1430 iter = 0;
1431 do {
1432 switch (doi_def->tags[iter]) {
1433 case CIPSO_V4_TAG_RBITMAP:
1434 ret_val = cipso_v4_gentag_rbm(doi_def,
1435 secattr,
1436 &buf,
1437 &buf_len);
1438 break;
1439 default:
1440 ret_val = -EPERM;
1441 goto socket_setattr_failure;
1442 }
1443
1444 iter++;
1445 } while (ret_val != 0 &&
1446 iter < CIPSO_V4_TAG_MAXCNT &&
1447 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
1448 if (ret_val != 0)
1449 goto socket_setattr_failure;
1450
1451 /* We can't use ip_options_get() directly because it makes a call to
1452 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
1453 * we can't block here. */
1454 opt_len = (buf_len + 3) & ~3;
1455 opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC);
1456 if (opt == NULL) {
1457 ret_val = -ENOMEM;
1458 goto socket_setattr_failure;
1459 }
1460 memcpy(opt->__data, buf, buf_len);
1461 opt->optlen = opt_len;
1462 opt->is_data = 1;
1463 kfree(buf);
1464 buf = NULL;
1465 ret_val = ip_options_compile(opt, NULL);
1466 if (ret_val != 0)
1467 goto socket_setattr_failure;
1468
1469 sk_inet = inet_sk(sk);
1470 if (sk_inet->is_icsk) {
1471 sk_conn = inet_csk(sk);
1472 if (sk_inet->opt)
1473 sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
1474 sk_conn->icsk_ext_hdr_len += opt->optlen;
1475 sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
1476 }
1477 opt = xchg(&sk_inet->opt, opt);
1478 kfree(opt);
1479
1480 return 0;
1481
1482socket_setattr_failure:
1483 kfree(buf);
1484 kfree(opt);
1485 return ret_val;
1486}
1487
1488/**
1489 * cipso_v4_socket_getattr - Get the security attributes from a socket
1490 * @sock: the socket
1491 * @secattr: the security attributes
1492 *
1493 * Description:
1494 * Query @sock to see if there is a CIPSO option attached to the socket and if
1495 * there is return the CIPSO security attributes in @secattr. Returns zero on
1496 * success and negative values on failure.
1497 *
1498 */
1499int cipso_v4_socket_getattr(const struct socket *sock,
1500 struct netlbl_lsm_secattr *secattr)
1501{
1502 int ret_val = -ENOMSG;
1503 struct sock *sk;
1504 struct inet_sock *sk_inet;
1505 unsigned char *cipso_ptr;
1506 u32 doi;
1507 struct cipso_v4_doi *doi_def;
1508
1509 sk = sock->sk;
1510 lock_sock(sk);
1511 sk_inet = inet_sk(sk);
1512 if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
1513 goto socket_getattr_return;
1514 cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
1515 sizeof(struct iphdr);
1516 ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
1517 if (ret_val == 0)
1518 goto socket_getattr_return;
1519
1520 doi = ntohl(*(u32 *)&cipso_ptr[2]);
1521 rcu_read_lock();
1522 doi_def = cipso_v4_doi_getdef(doi);
1523 if (doi_def == NULL) {
1524 rcu_read_unlock();
1525 goto socket_getattr_return;
1526 }
1527 switch (cipso_ptr[6]) {
1528 case CIPSO_V4_TAG_RBITMAP:
1529 ret_val = cipso_v4_parsetag_rbm(doi_def,
1530 &cipso_ptr[6],
1531 secattr);
1532 break;
1533 }
1534 rcu_read_unlock();
1535
1536socket_getattr_return:
1537 release_sock(sk);
1538 return ret_val;
1539}
1540
1541/**
1542 * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
1543 * @skb: the packet
1544 * @secattr: the security attributes
1545 *
1546 * Description:
1547 * Parse the given packet's CIPSO option and return the security attributes.
1548 * Returns zero on success and negative values on failure.
1549 *
1550 */
1551int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
1552 struct netlbl_lsm_secattr *secattr)
1553{
1554 int ret_val = -ENOMSG;
1555 unsigned char *cipso_ptr;
1556 u32 doi;
1557 struct cipso_v4_doi *doi_def;
1558
1559 if (!CIPSO_V4_OPTEXIST(skb))
1560 return -ENOMSG;
1561 cipso_ptr = CIPSO_V4_OPTPTR(skb);
1562 if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
1563 return 0;
1564
1565 doi = ntohl(*(u32 *)&cipso_ptr[2]);
1566 rcu_read_lock();
1567 doi_def = cipso_v4_doi_getdef(doi);
1568 if (doi_def == NULL)
1569 goto skbuff_getattr_return;
1570 switch (cipso_ptr[6]) {
1571 case CIPSO_V4_TAG_RBITMAP:
1572 ret_val = cipso_v4_parsetag_rbm(doi_def,
1573 &cipso_ptr[6],
1574 secattr);
1575 break;
1576 }
1577
1578skbuff_getattr_return:
1579 rcu_read_unlock();
1580 return ret_val;
1581}
1582
1583/*
1584 * Setup Functions
1585 */
1586
1587/**
1588 * cipso_v4_init - Initialize the CIPSO module
1589 *
1590 * Description:
1591 * Initialize the CIPSO module and prepare it for use. Returns zero on success
1592 * and negative values on failure.
1593 *
1594 */
1595static int __init cipso_v4_init(void)
1596{
1597 int ret_val;
1598
1599 ret_val = cipso_v4_cache_init();
1600 if (ret_val != 0)
1601 panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n",
1602 ret_val);
1603
1604 return 0;
1605}
1606
1607subsys_initcall(cipso_v4_init);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a6cc31d911eb..8e8d1f17d77a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -43,6 +43,7 @@
43#include <linux/in.h> 43#include <linux/in.h>
44#include <linux/errno.h> 44#include <linux/errno.h>
45#include <linux/interrupt.h> 45#include <linux/interrupt.h>
46#include <linux/if_addr.h>
46#include <linux/if_ether.h> 47#include <linux/if_ether.h>
47#include <linux/inet.h> 48#include <linux/inet.h>
48#include <linux/netdevice.h> 49#include <linux/netdevice.h>
@@ -62,6 +63,7 @@
62#include <net/ip.h> 63#include <net/ip.h>
63#include <net/route.h> 64#include <net/route.h>
64#include <net/ip_fib.h> 65#include <net/ip_fib.h>
66#include <net/netlink.h>
65 67
66struct ipv4_devconf ipv4_devconf = { 68struct ipv4_devconf ipv4_devconf = {
67 .accept_redirects = 1, 69 .accept_redirects = 1,
@@ -78,7 +80,15 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
78 .accept_source_route = 1, 80 .accept_source_route = 1,
79}; 81};
80 82
81static void rtmsg_ifa(int event, struct in_ifaddr *); 83static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
84 [IFA_LOCAL] = { .type = NLA_U32 },
85 [IFA_ADDRESS] = { .type = NLA_U32 },
86 [IFA_BROADCAST] = { .type = NLA_U32 },
87 [IFA_ANYCAST] = { .type = NLA_U32 },
88 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
89};
90
91static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
82 92
83static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 93static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
84static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 94static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
@@ -229,8 +239,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
229 return 0; 239 return 0;
230} 240}
231 241
232static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 242static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
233 int destroy) 243 int destroy, struct nlmsghdr *nlh, u32 pid)
234{ 244{
235 struct in_ifaddr *promote = NULL; 245 struct in_ifaddr *promote = NULL;
236 struct in_ifaddr *ifa, *ifa1 = *ifap; 246 struct in_ifaddr *ifa, *ifa1 = *ifap;
@@ -263,7 +273,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
263 if (!do_promote) { 273 if (!do_promote) {
264 *ifap1 = ifa->ifa_next; 274 *ifap1 = ifa->ifa_next;
265 275
266 rtmsg_ifa(RTM_DELADDR, ifa); 276 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
267 blocking_notifier_call_chain(&inetaddr_chain, 277 blocking_notifier_call_chain(&inetaddr_chain,
268 NETDEV_DOWN, ifa); 278 NETDEV_DOWN, ifa);
269 inet_free_ifa(ifa); 279 inet_free_ifa(ifa);
@@ -288,7 +298,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
288 is valid, it will try to restore deleted routes... Grr. 298 is valid, it will try to restore deleted routes... Grr.
289 So that, this order is correct. 299 So that, this order is correct.
290 */ 300 */
291 rtmsg_ifa(RTM_DELADDR, ifa1); 301 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
292 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 302 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
293 303
294 if (promote) { 304 if (promote) {
@@ -300,7 +310,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
300 } 310 }
301 311
302 promote->ifa_flags &= ~IFA_F_SECONDARY; 312 promote->ifa_flags &= ~IFA_F_SECONDARY;
303 rtmsg_ifa(RTM_NEWADDR, promote); 313 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
304 blocking_notifier_call_chain(&inetaddr_chain, 314 blocking_notifier_call_chain(&inetaddr_chain,
305 NETDEV_UP, promote); 315 NETDEV_UP, promote);
306 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { 316 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
@@ -319,7 +329,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319 } 329 }
320} 330}
321 331
322static int inet_insert_ifa(struct in_ifaddr *ifa) 332static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333 int destroy)
334{
335 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
336}
337
338static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
339 u32 pid)
323{ 340{
324 struct in_device *in_dev = ifa->ifa_dev; 341 struct in_device *in_dev = ifa->ifa_dev;
325 struct in_ifaddr *ifa1, **ifap, **last_primary; 342 struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -364,12 +381,17 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
364 /* Send message first, then call notifier. 381 /* Send message first, then call notifier.
365 Notifier will trigger FIB update, so that 382 Notifier will trigger FIB update, so that
366 listeners of netlink will know about new ifaddr */ 383 listeners of netlink will know about new ifaddr */
367 rtmsg_ifa(RTM_NEWADDR, ifa); 384 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
368 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 385 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
369 386
370 return 0; 387 return 0;
371} 388}
372 389
390static int inet_insert_ifa(struct in_ifaddr *ifa)
391{
392 return __inet_insert_ifa(ifa, NULL, 0);
393}
394
373static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 395static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
374{ 396{
375 struct in_device *in_dev = __in_dev_get_rtnl(dev); 397 struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -421,87 +443,134 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix,
421 443
422static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 444static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
423{ 445{
424 struct rtattr **rta = arg; 446 struct nlattr *tb[IFA_MAX+1];
425 struct in_device *in_dev; 447 struct in_device *in_dev;
426 struct ifaddrmsg *ifm = NLMSG_DATA(nlh); 448 struct ifaddrmsg *ifm;
427 struct in_ifaddr *ifa, **ifap; 449 struct in_ifaddr *ifa, **ifap;
450 int err = -EINVAL;
428 451
429 ASSERT_RTNL(); 452 ASSERT_RTNL();
430 453
431 if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL) 454 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
432 goto out; 455 if (err < 0)
456 goto errout;
457
458 ifm = nlmsg_data(nlh);
459 in_dev = inetdev_by_index(ifm->ifa_index);
460 if (in_dev == NULL) {
461 err = -ENODEV;
462 goto errout;
463 }
464
433 __in_dev_put(in_dev); 465 __in_dev_put(in_dev);
434 466
435 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 467 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
436 ifap = &ifa->ifa_next) { 468 ifap = &ifa->ifa_next) {
437 if ((rta[IFA_LOCAL - 1] && 469 if (tb[IFA_LOCAL] &&
438 memcmp(RTA_DATA(rta[IFA_LOCAL - 1]), 470 ifa->ifa_local != nla_get_u32(tb[IFA_LOCAL]))
439 &ifa->ifa_local, 4)) || 471 continue;
440 (rta[IFA_LABEL - 1] && 472
441 rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) || 473 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
442 (rta[IFA_ADDRESS - 1] && 474 continue;
443 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 475
444 !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]), 476 if (tb[IFA_ADDRESS] &&
445 ifa)))) 477 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
478 !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa)))
446 continue; 479 continue;
447 inet_del_ifa(in_dev, ifap, 1); 480
481 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
448 return 0; 482 return 0;
449 } 483 }
450out: 484
451 return -EADDRNOTAVAIL; 485 err = -EADDRNOTAVAIL;
486errout:
487 return err;
452} 488}
453 489
454static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 490static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
455{ 491{
456 struct rtattr **rta = arg; 492 struct nlattr *tb[IFA_MAX+1];
493 struct in_ifaddr *ifa;
494 struct ifaddrmsg *ifm;
457 struct net_device *dev; 495 struct net_device *dev;
458 struct in_device *in_dev; 496 struct in_device *in_dev;
459 struct ifaddrmsg *ifm = NLMSG_DATA(nlh); 497 int err = -EINVAL;
460 struct in_ifaddr *ifa;
461 int rc = -EINVAL;
462 498
463 ASSERT_RTNL(); 499 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
500 if (err < 0)
501 goto errout;
464 502
465 if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1]) 503 ifm = nlmsg_data(nlh);
466 goto out; 504 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
505 goto errout;
467 506
468 rc = -ENODEV; 507 dev = __dev_get_by_index(ifm->ifa_index);
469 if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) 508 if (dev == NULL) {
470 goto out; 509 err = -ENODEV;
510 goto errout;
511 }
471 512
472 rc = -ENOBUFS; 513 in_dev = __in_dev_get_rtnl(dev);
473 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { 514 if (in_dev == NULL) {
474 in_dev = inetdev_init(dev); 515 in_dev = inetdev_init(dev);
475 if (!in_dev) 516 if (in_dev == NULL) {
476 goto out; 517 err = -ENOBUFS;
518 goto errout;
519 }
477 } 520 }
478 521
479 if ((ifa = inet_alloc_ifa()) == NULL) 522 ifa = inet_alloc_ifa();
480 goto out; 523 if (ifa == NULL) {
524 /*
525 * A potential indev allocation can be left alive, it stays
526 * assigned to its device and is destroy with it.
527 */
528 err = -ENOBUFS;
529 goto errout;
530 }
531
532 in_dev_hold(in_dev);
533
534 if (tb[IFA_ADDRESS] == NULL)
535 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
481 536
482 if (!rta[IFA_ADDRESS - 1])
483 rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
484 memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4);
485 memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4);
486 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 537 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
487 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 538 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
488 if (rta[IFA_BROADCAST - 1])
489 memcpy(&ifa->ifa_broadcast,
490 RTA_DATA(rta[IFA_BROADCAST - 1]), 4);
491 if (rta[IFA_ANYCAST - 1])
492 memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4);
493 ifa->ifa_flags = ifm->ifa_flags; 539 ifa->ifa_flags = ifm->ifa_flags;
494 ifa->ifa_scope = ifm->ifa_scope; 540 ifa->ifa_scope = ifm->ifa_scope;
495 in_dev_hold(in_dev); 541 ifa->ifa_dev = in_dev;
496 ifa->ifa_dev = in_dev; 542
497 if (rta[IFA_LABEL - 1]) 543 ifa->ifa_local = nla_get_u32(tb[IFA_LOCAL]);
498 rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ); 544 ifa->ifa_address = nla_get_u32(tb[IFA_ADDRESS]);
545
546 if (tb[IFA_BROADCAST])
547 ifa->ifa_broadcast = nla_get_u32(tb[IFA_BROADCAST]);
548
549 if (tb[IFA_ANYCAST])
550 ifa->ifa_anycast = nla_get_u32(tb[IFA_ANYCAST]);
551
552 if (tb[IFA_LABEL])
553 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
499 else 554 else
500 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 555 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
501 556
502 rc = inet_insert_ifa(ifa); 557 return ifa;
503out: 558
504 return rc; 559errout:
560 return ERR_PTR(err);
561}
562
563static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
564{
565 struct in_ifaddr *ifa;
566
567 ASSERT_RTNL();
568
569 ifa = rtm_to_ifaddr(nlh);
570 if (IS_ERR(ifa))
571 return PTR_ERR(ifa);
572
573 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
505} 574}
506 575
507/* 576/*
@@ -1056,32 +1125,37 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1056{ 1125{
1057 struct ifaddrmsg *ifm; 1126 struct ifaddrmsg *ifm;
1058 struct nlmsghdr *nlh; 1127 struct nlmsghdr *nlh;
1059 unsigned char *b = skb->tail;
1060 1128
1061 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); 1129 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1062 ifm = NLMSG_DATA(nlh); 1130 if (nlh == NULL)
1131 return -ENOBUFS;
1132
1133 ifm = nlmsg_data(nlh);
1063 ifm->ifa_family = AF_INET; 1134 ifm->ifa_family = AF_INET;
1064 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1135 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1065 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; 1136 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1066 ifm->ifa_scope = ifa->ifa_scope; 1137 ifm->ifa_scope = ifa->ifa_scope;
1067 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1138 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1139
1068 if (ifa->ifa_address) 1140 if (ifa->ifa_address)
1069 RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address); 1141 NLA_PUT_U32(skb, IFA_ADDRESS, ifa->ifa_address);
1142
1070 if (ifa->ifa_local) 1143 if (ifa->ifa_local)
1071 RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local); 1144 NLA_PUT_U32(skb, IFA_LOCAL, ifa->ifa_local);
1145
1072 if (ifa->ifa_broadcast) 1146 if (ifa->ifa_broadcast)
1073 RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast); 1147 NLA_PUT_U32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1148
1074 if (ifa->ifa_anycast) 1149 if (ifa->ifa_anycast)
1075 RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast); 1150 NLA_PUT_U32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1151
1076 if (ifa->ifa_label[0]) 1152 if (ifa->ifa_label[0])
1077 RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label); 1153 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1078 nlh->nlmsg_len = skb->tail - b;
1079 return skb->len;
1080 1154
1081nlmsg_failure: 1155 return nlmsg_end(skb, nlh);
1082rtattr_failure: 1156
1083 skb_trim(skb, b - skb->data); 1157nla_put_failure:
1084 return -1; 1158 return nlmsg_cancel(skb, nlh);
1085} 1159}
1086 1160
1087static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1161static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1127,19 +1201,27 @@ done:
1127 return skb->len; 1201 return skb->len;
1128} 1202}
1129 1203
1130static void rtmsg_ifa(int event, struct in_ifaddr* ifa) 1204static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1205 u32 pid)
1131{ 1206{
1132 int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128); 1207 struct sk_buff *skb;
1133 struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); 1208 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1209 int err = -ENOBUFS;
1210
1211 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
1212 if (skb == NULL)
1213 goto errout;
1134 1214
1135 if (!skb) 1215 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1136 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); 1216 if (err < 0) {
1137 else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
1138 kfree_skb(skb); 1217 kfree_skb(skb);
1139 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); 1218 goto errout;
1140 } else {
1141 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
1142 } 1219 }
1220
1221 err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1222errout:
1223 if (err < 0)
1224 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1143} 1225}
1144 1226
1145static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { 1227static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -1151,9 +1233,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1151 [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, 1233 [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
1152 .dumpit = inet_dump_fib, }, 1234 .dumpit = inet_dump_fib, },
1153#ifdef CONFIG_IP_MULTIPLE_TABLES 1235#ifdef CONFIG_IP_MULTIPLE_TABLES
1154 [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, }, 1236 [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
1155 [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, },
1156 [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, },
1157#endif 1237#endif
1158}; 1238};
1159 1239
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b428489f6ccd..13b29360d102 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -95,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
95 esph->seq_no = htonl(++x->replay.oseq); 95 esph->seq_no = htonl(++x->replay.oseq);
96 xfrm_aevent_doreplay(x); 96 xfrm_aevent_doreplay(x);
97 97
98 if (esp->conf.ivlen) 98 if (esp->conf.ivlen) {
99 if (unlikely(!esp->conf.ivinitted)) {
100 get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
101 esp->conf.ivinitted = 1;
102 }
99 crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); 103 crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
104 }
100 105
101 do { 106 do {
102 struct scatterlist *sg = &esp->sgbuf[0]; 107 struct scatterlist *sg = &esp->sgbuf[0];
@@ -248,7 +253,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
248 * as per draft-ietf-ipsec-udp-encaps-06, 253 * as per draft-ietf-ipsec-udp-encaps-06,
249 * section 3.1.2 254 * section 3.1.2
250 */ 255 */
251 if (!x->props.mode) 256 if (x->props.mode == XFRM_MODE_TRANSPORT)
252 skb->ip_summed = CHECKSUM_UNNECESSARY; 257 skb->ip_summed = CHECKSUM_UNNECESSARY;
253 } 258 }
254 259
@@ -267,7 +272,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
267 struct esp_data *esp = x->data; 272 struct esp_data *esp = x->data;
268 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 273 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
269 274
270 if (x->props.mode) { 275 if (x->props.mode == XFRM_MODE_TUNNEL) {
271 mtu = ALIGN(mtu + 2, blksize); 276 mtu = ALIGN(mtu + 2, blksize);
272 } else { 277 } else {
273 /* The worst case. */ 278 /* The worst case. */
@@ -378,12 +383,12 @@ static int esp_init_state(struct xfrm_state *x)
378 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); 383 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
379 if (unlikely(esp->conf.ivec == NULL)) 384 if (unlikely(esp->conf.ivec == NULL))
380 goto error; 385 goto error;
381 get_random_bytes(esp->conf.ivec, esp->conf.ivlen); 386 esp->conf.ivinitted = 0;
382 } 387 }
383 if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) 388 if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
384 goto error; 389 goto error;
385 x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; 390 x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
386 if (x->props.mode) 391 if (x->props.mode == XFRM_MODE_TUNNEL)
387 x->props.header_len += sizeof(struct iphdr); 392 x->props.header_len += sizeof(struct iphdr);
388 if (x->encap) { 393 if (x->encap) {
389 struct xfrm_encap_tmpl *encap = x->encap; 394 struct xfrm_encap_tmpl *encap = x->encap;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ba2a70745a63..cfb527c060e4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,10 +32,12 @@
32#include <linux/inet.h> 32#include <linux/inet.h>
33#include <linux/inetdevice.h> 33#include <linux/inetdevice.h>
34#include <linux/netdevice.h> 34#include <linux/netdevice.h>
35#include <linux/if_addr.h>
35#include <linux/if_arp.h> 36#include <linux/if_arp.h>
36#include <linux/skbuff.h> 37#include <linux/skbuff.h>
37#include <linux/netlink.h> 38#include <linux/netlink.h>
38#include <linux/init.h> 39#include <linux/init.h>
40#include <linux/list.h>
39 41
40#include <net/ip.h> 42#include <net/ip.h>
41#include <net/protocol.h> 43#include <net/protocol.h>
@@ -50,48 +52,67 @@
50 52
51#ifndef CONFIG_IP_MULTIPLE_TABLES 53#ifndef CONFIG_IP_MULTIPLE_TABLES
52 54
53#define RT_TABLE_MIN RT_TABLE_MAIN
54
55struct fib_table *ip_fib_local_table; 55struct fib_table *ip_fib_local_table;
56struct fib_table *ip_fib_main_table; 56struct fib_table *ip_fib_main_table;
57 57
58#else 58#define FIB_TABLE_HASHSZ 1
59static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59 60
60#define RT_TABLE_MIN 1 61#else
61 62
62struct fib_table *fib_tables[RT_TABLE_MAX+1]; 63#define FIB_TABLE_HASHSZ 256
64static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
63 65
64struct fib_table *__fib_new_table(int id) 66struct fib_table *fib_new_table(u32 id)
65{ 67{
66 struct fib_table *tb; 68 struct fib_table *tb;
69 unsigned int h;
67 70
71 if (id == 0)
72 id = RT_TABLE_MAIN;
73 tb = fib_get_table(id);
74 if (tb)
75 return tb;
68 tb = fib_hash_init(id); 76 tb = fib_hash_init(id);
69 if (!tb) 77 if (!tb)
70 return NULL; 78 return NULL;
71 fib_tables[id] = tb; 79 h = id & (FIB_TABLE_HASHSZ - 1);
80 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
72 return tb; 81 return tb;
73} 82}
74 83
84struct fib_table *fib_get_table(u32 id)
85{
86 struct fib_table *tb;
87 struct hlist_node *node;
88 unsigned int h;
75 89
90 if (id == 0)
91 id = RT_TABLE_MAIN;
92 h = id & (FIB_TABLE_HASHSZ - 1);
93 rcu_read_lock();
94 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
95 if (tb->tb_id == id) {
96 rcu_read_unlock();
97 return tb;
98 }
99 }
100 rcu_read_unlock();
101 return NULL;
102}
76#endif /* CONFIG_IP_MULTIPLE_TABLES */ 103#endif /* CONFIG_IP_MULTIPLE_TABLES */
77 104
78
79static void fib_flush(void) 105static void fib_flush(void)
80{ 106{
81 int flushed = 0; 107 int flushed = 0;
82#ifdef CONFIG_IP_MULTIPLE_TABLES
83 struct fib_table *tb; 108 struct fib_table *tb;
84 int id; 109 struct hlist_node *node;
110 unsigned int h;
85 111
86 for (id = RT_TABLE_MAX; id>0; id--) { 112 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
87 if ((tb = fib_get_table(id))==NULL) 113 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
88 continue; 114 flushed += tb->tb_flush(tb);
89 flushed += tb->tb_flush(tb);
90 } 115 }
91#else /* CONFIG_IP_MULTIPLE_TABLES */
92 flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
93 flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
94#endif /* CONFIG_IP_MULTIPLE_TABLES */
95 116
96 if (flushed) 117 if (flushed)
97 rt_cache_flush(-1); 118 rt_cache_flush(-1);
@@ -232,42 +253,190 @@ e_inval:
232 253
233#ifndef CONFIG_IP_NOSIOCRT 254#ifndef CONFIG_IP_NOSIOCRT
234 255
256static inline u32 sk_extract_addr(struct sockaddr *addr)
257{
258 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
259}
260
261static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
262{
263 struct nlattr *nla;
264
265 nla = (struct nlattr *) ((char *) mx + len);
266 nla->nla_type = type;
267 nla->nla_len = nla_attr_size(4);
268 *(u32 *) nla_data(nla) = value;
269
270 return len + nla_total_size(4);
271}
272
273static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
274 struct fib_config *cfg)
275{
276 u32 addr;
277 int plen;
278
279 memset(cfg, 0, sizeof(*cfg));
280
281 if (rt->rt_dst.sa_family != AF_INET)
282 return -EAFNOSUPPORT;
283
284 /*
285 * Check mask for validity:
286 * a) it must be contiguous.
287 * b) destination must have all host bits clear.
288 * c) if application forgot to set correct family (AF_INET),
289 * reject request unless it is absolutely clear i.e.
290 * both family and mask are zero.
291 */
292 plen = 32;
293 addr = sk_extract_addr(&rt->rt_dst);
294 if (!(rt->rt_flags & RTF_HOST)) {
295 u32 mask = sk_extract_addr(&rt->rt_genmask);
296
297 if (rt->rt_genmask.sa_family != AF_INET) {
298 if (mask || rt->rt_genmask.sa_family)
299 return -EAFNOSUPPORT;
300 }
301
302 if (bad_mask(mask, addr))
303 return -EINVAL;
304
305 plen = inet_mask_len(mask);
306 }
307
308 cfg->fc_dst_len = plen;
309 cfg->fc_dst = addr;
310
311 if (cmd != SIOCDELRT) {
312 cfg->fc_nlflags = NLM_F_CREATE;
313 cfg->fc_protocol = RTPROT_BOOT;
314 }
315
316 if (rt->rt_metric)
317 cfg->fc_priority = rt->rt_metric - 1;
318
319 if (rt->rt_flags & RTF_REJECT) {
320 cfg->fc_scope = RT_SCOPE_HOST;
321 cfg->fc_type = RTN_UNREACHABLE;
322 return 0;
323 }
324
325 cfg->fc_scope = RT_SCOPE_NOWHERE;
326 cfg->fc_type = RTN_UNICAST;
327
328 if (rt->rt_dev) {
329 char *colon;
330 struct net_device *dev;
331 char devname[IFNAMSIZ];
332
333 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
334 return -EFAULT;
335
336 devname[IFNAMSIZ-1] = 0;
337 colon = strchr(devname, ':');
338 if (colon)
339 *colon = 0;
340 dev = __dev_get_by_name(devname);
341 if (!dev)
342 return -ENODEV;
343 cfg->fc_oif = dev->ifindex;
344 if (colon) {
345 struct in_ifaddr *ifa;
346 struct in_device *in_dev = __in_dev_get_rtnl(dev);
347 if (!in_dev)
348 return -ENODEV;
349 *colon = ':';
350 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
351 if (strcmp(ifa->ifa_label, devname) == 0)
352 break;
353 if (ifa == NULL)
354 return -ENODEV;
355 cfg->fc_prefsrc = ifa->ifa_local;
356 }
357 }
358
359 addr = sk_extract_addr(&rt->rt_gateway);
360 if (rt->rt_gateway.sa_family == AF_INET && addr) {
361 cfg->fc_gw = addr;
362 if (rt->rt_flags & RTF_GATEWAY &&
363 inet_addr_type(addr) == RTN_UNICAST)
364 cfg->fc_scope = RT_SCOPE_UNIVERSE;
365 }
366
367 if (cmd == SIOCDELRT)
368 return 0;
369
370 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
371 return -EINVAL;
372
373 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
374 cfg->fc_scope = RT_SCOPE_LINK;
375
376 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
377 struct nlattr *mx;
378 int len = 0;
379
380 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
381 if (mx == NULL)
382 return -ENOMEM;
383
384 if (rt->rt_flags & RTF_MTU)
385 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
386
387 if (rt->rt_flags & RTF_WINDOW)
388 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
389
390 if (rt->rt_flags & RTF_IRTT)
391 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
392
393 cfg->fc_mx = mx;
394 cfg->fc_mx_len = len;
395 }
396
397 return 0;
398}
399
235/* 400/*
236 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 401 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
237 */ 402 */
238 403
239int ip_rt_ioctl(unsigned int cmd, void __user *arg) 404int ip_rt_ioctl(unsigned int cmd, void __user *arg)
240{ 405{
406 struct fib_config cfg;
407 struct rtentry rt;
241 int err; 408 int err;
242 struct kern_rta rta;
243 struct rtentry r;
244 struct {
245 struct nlmsghdr nlh;
246 struct rtmsg rtm;
247 } req;
248 409
249 switch (cmd) { 410 switch (cmd) {
250 case SIOCADDRT: /* Add a route */ 411 case SIOCADDRT: /* Add a route */
251 case SIOCDELRT: /* Delete a route */ 412 case SIOCDELRT: /* Delete a route */
252 if (!capable(CAP_NET_ADMIN)) 413 if (!capable(CAP_NET_ADMIN))
253 return -EPERM; 414 return -EPERM;
254 if (copy_from_user(&r, arg, sizeof(struct rtentry))) 415
416 if (copy_from_user(&rt, arg, sizeof(rt)))
255 return -EFAULT; 417 return -EFAULT;
418
256 rtnl_lock(); 419 rtnl_lock();
257 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r); 420 err = rtentry_to_fib_config(cmd, &rt, &cfg);
258 if (err == 0) { 421 if (err == 0) {
422 struct fib_table *tb;
423
259 if (cmd == SIOCDELRT) { 424 if (cmd == SIOCDELRT) {
260 struct fib_table *tb = fib_get_table(req.rtm.rtm_table); 425 tb = fib_get_table(cfg.fc_table);
261 err = -ESRCH;
262 if (tb) 426 if (tb)
263 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); 427 err = tb->tb_delete(tb, &cfg);
428 else
429 err = -ESRCH;
264 } else { 430 } else {
265 struct fib_table *tb = fib_new_table(req.rtm.rtm_table); 431 tb = fib_new_table(cfg.fc_table);
266 err = -ENOBUFS;
267 if (tb) 432 if (tb)
268 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); 433 err = tb->tb_insert(tb, &cfg);
434 else
435 err = -ENOBUFS;
269 } 436 }
270 kfree(rta.rta_mx); 437
438 /* allocated by rtentry_to_fib_config() */
439 kfree(cfg.fc_mx);
271 } 440 }
272 rtnl_unlock(); 441 rtnl_unlock();
273 return err; 442 return err;
@@ -284,77 +453,169 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
284 453
285#endif 454#endif
286 455
287static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) 456struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
457 [RTA_DST] = { .type = NLA_U32 },
458 [RTA_SRC] = { .type = NLA_U32 },
459 [RTA_IIF] = { .type = NLA_U32 },
460 [RTA_OIF] = { .type = NLA_U32 },
461 [RTA_GATEWAY] = { .type = NLA_U32 },
462 [RTA_PRIORITY] = { .type = NLA_U32 },
463 [RTA_PREFSRC] = { .type = NLA_U32 },
464 [RTA_METRICS] = { .type = NLA_NESTED },
465 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
466 [RTA_PROTOINFO] = { .type = NLA_U32 },
467 [RTA_FLOW] = { .type = NLA_U32 },
468 [RTA_MP_ALGO] = { .type = NLA_U32 },
469};
470
471static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
472 struct fib_config *cfg)
288{ 473{
289 int i; 474 struct nlattr *attr;
290 475 int err, remaining;
291 for (i=1; i<=RTA_MAX; i++, rta++) { 476 struct rtmsg *rtm;
292 struct rtattr *attr = *rta; 477
293 if (attr) { 478 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
294 if (RTA_PAYLOAD(attr) < 4) 479 if (err < 0)
295 return -EINVAL; 480 goto errout;
296 if (i != RTA_MULTIPATH && i != RTA_METRICS) 481
297 *rta = (struct rtattr*)RTA_DATA(attr); 482 memset(cfg, 0, sizeof(*cfg));
483
484 rtm = nlmsg_data(nlh);
485 cfg->fc_family = rtm->rtm_family;
486 cfg->fc_dst_len = rtm->rtm_dst_len;
487 cfg->fc_src_len = rtm->rtm_src_len;
488 cfg->fc_tos = rtm->rtm_tos;
489 cfg->fc_table = rtm->rtm_table;
490 cfg->fc_protocol = rtm->rtm_protocol;
491 cfg->fc_scope = rtm->rtm_scope;
492 cfg->fc_type = rtm->rtm_type;
493 cfg->fc_flags = rtm->rtm_flags;
494 cfg->fc_nlflags = nlh->nlmsg_flags;
495
496 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
497 cfg->fc_nlinfo.nlh = nlh;
498
499 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
500 switch (attr->nla_type) {
501 case RTA_DST:
502 cfg->fc_dst = nla_get_u32(attr);
503 break;
504 case RTA_SRC:
505 cfg->fc_src = nla_get_u32(attr);
506 break;
507 case RTA_OIF:
508 cfg->fc_oif = nla_get_u32(attr);
509 break;
510 case RTA_GATEWAY:
511 cfg->fc_gw = nla_get_u32(attr);
512 break;
513 case RTA_PRIORITY:
514 cfg->fc_priority = nla_get_u32(attr);
515 break;
516 case RTA_PREFSRC:
517 cfg->fc_prefsrc = nla_get_u32(attr);
518 break;
519 case RTA_METRICS:
520 cfg->fc_mx = nla_data(attr);
521 cfg->fc_mx_len = nla_len(attr);
522 break;
523 case RTA_MULTIPATH:
524 cfg->fc_mp = nla_data(attr);
525 cfg->fc_mp_len = nla_len(attr);
526 break;
527 case RTA_FLOW:
528 cfg->fc_flow = nla_get_u32(attr);
529 break;
530 case RTA_MP_ALGO:
531 cfg->fc_mp_alg = nla_get_u32(attr);
532 break;
533 case RTA_TABLE:
534 cfg->fc_table = nla_get_u32(attr);
535 break;
298 } 536 }
299 } 537 }
538
300 return 0; 539 return 0;
540errout:
541 return err;
301} 542}
302 543
303int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 544int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
304{ 545{
305 struct fib_table * tb; 546 struct fib_config cfg;
306 struct rtattr **rta = arg; 547 struct fib_table *tb;
307 struct rtmsg *r = NLMSG_DATA(nlh); 548 int err;
308 549
309 if (inet_check_attr(r, rta)) 550 err = rtm_to_fib_config(skb, nlh, &cfg);
310 return -EINVAL; 551 if (err < 0)
552 goto errout;
311 553
312 tb = fib_get_table(r->rtm_table); 554 tb = fib_get_table(cfg.fc_table);
313 if (tb) 555 if (tb == NULL) {
314 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); 556 err = -ESRCH;
315 return -ESRCH; 557 goto errout;
558 }
559
560 err = tb->tb_delete(tb, &cfg);
561errout:
562 return err;
316} 563}
317 564
318int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 565int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
319{ 566{
320 struct fib_table * tb; 567 struct fib_config cfg;
321 struct rtattr **rta = arg; 568 struct fib_table *tb;
322 struct rtmsg *r = NLMSG_DATA(nlh); 569 int err;
323 570
324 if (inet_check_attr(r, rta)) 571 err = rtm_to_fib_config(skb, nlh, &cfg);
325 return -EINVAL; 572 if (err < 0)
573 goto errout;
326 574
327 tb = fib_new_table(r->rtm_table); 575 tb = fib_new_table(cfg.fc_table);
328 if (tb) 576 if (tb == NULL) {
329 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); 577 err = -ENOBUFS;
330 return -ENOBUFS; 578 goto errout;
579 }
580
581 err = tb->tb_insert(tb, &cfg);
582errout:
583 return err;
331} 584}
332 585
333int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 586int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
334{ 587{
335 int t; 588 unsigned int h, s_h;
336 int s_t; 589 unsigned int e = 0, s_e;
337 struct fib_table *tb; 590 struct fib_table *tb;
591 struct hlist_node *node;
592 int dumped = 0;
338 593
339 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && 594 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
340 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) 595 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
341 return ip_rt_dump(skb, cb); 596 return ip_rt_dump(skb, cb);
342 597
343 s_t = cb->args[0]; 598 s_h = cb->args[0];
344 if (s_t == 0) 599 s_e = cb->args[1];
345 s_t = cb->args[0] = RT_TABLE_MIN; 600
346 601 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
347 for (t=s_t; t<=RT_TABLE_MAX; t++) { 602 e = 0;
348 if (t < s_t) continue; 603 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
349 if (t > s_t) 604 if (e < s_e)
350 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 605 goto next;
351 if ((tb = fib_get_table(t))==NULL) 606 if (dumped)
352 continue; 607 memset(&cb->args[2], 0, sizeof(cb->args) -
353 if (tb->tb_dump(tb, skb, cb) < 0) 608 2 * sizeof(cb->args[0]));
354 break; 609 if (tb->tb_dump(tb, skb, cb) < 0)
610 goto out;
611 dumped = 1;
612next:
613 e++;
614 }
355 } 615 }
356 616out:
357 cb->args[0] = t; 617 cb->args[1] = e;
618 cb->args[0] = h;
358 619
359 return skb->len; 620 return skb->len;
360} 621}
@@ -366,17 +627,19 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
366 only when netlink is already locked. 627 only when netlink is already locked.
367 */ 628 */
368 629
369static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) 630static void fib_magic(int cmd, int type, u32 dst, int dst_len,
631 struct in_ifaddr *ifa)
370{ 632{
371 struct fib_table * tb; 633 struct fib_table *tb;
372 struct { 634 struct fib_config cfg = {
373 struct nlmsghdr nlh; 635 .fc_protocol = RTPROT_KERNEL,
374 struct rtmsg rtm; 636 .fc_type = type,
375 } req; 637 .fc_dst = dst,
376 struct kern_rta rta; 638 .fc_dst_len = dst_len,
377 639 .fc_prefsrc = ifa->ifa_local,
378 memset(&req.rtm, 0, sizeof(req.rtm)); 640 .fc_oif = ifa->ifa_dev->dev->ifindex,
379 memset(&rta, 0, sizeof(rta)); 641 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
642 };
380 643
381 if (type == RTN_UNICAST) 644 if (type == RTN_UNICAST)
382 tb = fib_new_table(RT_TABLE_MAIN); 645 tb = fib_new_table(RT_TABLE_MAIN);
@@ -386,26 +649,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr
386 if (tb == NULL) 649 if (tb == NULL)
387 return; 650 return;
388 651
389 req.nlh.nlmsg_len = sizeof(req); 652 cfg.fc_table = tb->tb_id;
390 req.nlh.nlmsg_type = cmd;
391 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
392 req.nlh.nlmsg_pid = 0;
393 req.nlh.nlmsg_seq = 0;
394 653
395 req.rtm.rtm_dst_len = dst_len; 654 if (type != RTN_LOCAL)
396 req.rtm.rtm_table = tb->tb_id; 655 cfg.fc_scope = RT_SCOPE_LINK;
397 req.rtm.rtm_protocol = RTPROT_KERNEL; 656 else
398 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); 657 cfg.fc_scope = RT_SCOPE_HOST;
399 req.rtm.rtm_type = type;
400
401 rta.rta_dst = &dst;
402 rta.rta_prefsrc = &ifa->ifa_local;
403 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
404 658
405 if (cmd == RTM_NEWROUTE) 659 if (cmd == RTM_NEWROUTE)
406 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); 660 tb->tb_insert(tb, &cfg);
407 else 661 else
408 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); 662 tb->tb_delete(tb, &cfg);
409} 663}
410 664
411void fib_add_ifaddr(struct in_ifaddr *ifa) 665void fib_add_ifaddr(struct in_ifaddr *ifa)
@@ -652,11 +906,17 @@ static struct notifier_block fib_netdev_notifier = {
652 906
653void __init ip_fib_init(void) 907void __init ip_fib_init(void)
654{ 908{
909 unsigned int i;
910
911 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
912 INIT_HLIST_HEAD(&fib_table_hash[i]);
655#ifndef CONFIG_IP_MULTIPLE_TABLES 913#ifndef CONFIG_IP_MULTIPLE_TABLES
656 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); 914 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
915 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
657 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); 916 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
917 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
658#else 918#else
659 fib_rules_init(); 919 fib4_rules_init();
660#endif 920#endif
661 921
662 register_netdevice_notifier(&fib_netdev_notifier); 922 register_netdevice_notifier(&fib_netdev_notifier);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 72c633b357cf..88133b383dc5 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -379,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key)
379 return NULL; 379 return NULL;
380} 380}
381 381
382static int 382static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
383fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
384 struct nlmsghdr *n, struct netlink_skb_parms *req)
385{ 383{
386 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 384 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
387 struct fib_node *new_f, *f; 385 struct fib_node *new_f, *f;
388 struct fib_alias *fa, *new_fa; 386 struct fib_alias *fa, *new_fa;
389 struct fn_zone *fz; 387 struct fn_zone *fz;
390 struct fib_info *fi; 388 struct fib_info *fi;
391 int z = r->rtm_dst_len; 389 u8 tos = cfg->fc_tos;
392 int type = r->rtm_type;
393 u8 tos = r->rtm_tos;
394 u32 key; 390 u32 key;
395 int err; 391 int err;
396 392
397 if (z > 32) 393 if (cfg->fc_dst_len > 32)
398 return -EINVAL; 394 return -EINVAL;
399 fz = table->fn_zones[z]; 395
400 if (!fz && !(fz = fn_new_zone(table, z))) 396 fz = table->fn_zones[cfg->fc_dst_len];
397 if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
401 return -ENOBUFS; 398 return -ENOBUFS;
402 399
403 key = 0; 400 key = 0;
404 if (rta->rta_dst) { 401 if (cfg->fc_dst) {
405 u32 dst; 402 if (cfg->fc_dst & ~FZ_MASK(fz))
406 memcpy(&dst, rta->rta_dst, 4);
407 if (dst & ~FZ_MASK(fz))
408 return -EINVAL; 403 return -EINVAL;
409 key = fz_key(dst, fz); 404 key = fz_key(cfg->fc_dst, fz);
410 } 405 }
411 406
412 if ((fi = fib_create_info(r, rta, n, &err)) == NULL) 407 fi = fib_create_info(cfg);
413 return err; 408 if (IS_ERR(fi))
409 return PTR_ERR(fi);
414 410
415 if (fz->fz_nent > (fz->fz_divisor<<1) && 411 if (fz->fz_nent > (fz->fz_divisor<<1) &&
416 fz->fz_divisor < FZ_MAX_DIVISOR && 412 fz->fz_divisor < FZ_MAX_DIVISOR &&
417 (z==32 || (1<<z) > fz->fz_divisor)) 413 (cfg->fc_dst_len == 32 ||
414 (1 << cfg->fc_dst_len) > fz->fz_divisor))
418 fn_rehash_zone(fz); 415 fn_rehash_zone(fz);
419 416
420 f = fib_find_node(fz, key); 417 f = fib_find_node(fz, key);
@@ -440,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
440 struct fib_alias *fa_orig; 437 struct fib_alias *fa_orig;
441 438
442 err = -EEXIST; 439 err = -EEXIST;
443 if (n->nlmsg_flags & NLM_F_EXCL) 440 if (cfg->fc_nlflags & NLM_F_EXCL)
444 goto out; 441 goto out;
445 442
446 if (n->nlmsg_flags & NLM_F_REPLACE) { 443 if (cfg->fc_nlflags & NLM_F_REPLACE) {
447 struct fib_info *fi_drop; 444 struct fib_info *fi_drop;
448 u8 state; 445 u8 state;
449 446
450 write_lock_bh(&fib_hash_lock); 447 write_lock_bh(&fib_hash_lock);
451 fi_drop = fa->fa_info; 448 fi_drop = fa->fa_info;
452 fa->fa_info = fi; 449 fa->fa_info = fi;
453 fa->fa_type = type; 450 fa->fa_type = cfg->fc_type;
454 fa->fa_scope = r->rtm_scope; 451 fa->fa_scope = cfg->fc_scope;
455 state = fa->fa_state; 452 state = fa->fa_state;
456 fa->fa_state &= ~FA_S_ACCESSED; 453 fa->fa_state &= ~FA_S_ACCESSED;
457 fib_hash_genid++; 454 fib_hash_genid++;
@@ -474,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
474 break; 471 break;
475 if (fa->fa_info->fib_priority != fi->fib_priority) 472 if (fa->fa_info->fib_priority != fi->fib_priority)
476 break; 473 break;
477 if (fa->fa_type == type && 474 if (fa->fa_type == cfg->fc_type &&
478 fa->fa_scope == r->rtm_scope && 475 fa->fa_scope == cfg->fc_scope &&
479 fa->fa_info == fi) 476 fa->fa_info == fi)
480 goto out; 477 goto out;
481 } 478 }
482 if (!(n->nlmsg_flags & NLM_F_APPEND)) 479 if (!(cfg->fc_nlflags & NLM_F_APPEND))
483 fa = fa_orig; 480 fa = fa_orig;
484 } 481 }
485 482
486 err = -ENOENT; 483 err = -ENOENT;
487 if (!(n->nlmsg_flags&NLM_F_CREATE)) 484 if (!(cfg->fc_nlflags & NLM_F_CREATE))
488 goto out; 485 goto out;
489 486
490 err = -ENOBUFS; 487 err = -ENOBUFS;
@@ -506,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
506 503
507 new_fa->fa_info = fi; 504 new_fa->fa_info = fi;
508 new_fa->fa_tos = tos; 505 new_fa->fa_tos = tos;
509 new_fa->fa_type = type; 506 new_fa->fa_type = cfg->fc_type;
510 new_fa->fa_scope = r->rtm_scope; 507 new_fa->fa_scope = cfg->fc_scope;
511 new_fa->fa_state = 0; 508 new_fa->fa_state = 0;
512 509
513 /* 510 /*
@@ -526,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
526 fz->fz_nent++; 523 fz->fz_nent++;
527 rt_cache_flush(-1); 524 rt_cache_flush(-1);
528 525
529 rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req); 526 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
527 &cfg->fc_nlinfo);
530 return 0; 528 return 0;
531 529
532out_free_new_fa: 530out_free_new_fa:
@@ -537,30 +535,25 @@ out:
537} 535}
538 536
539 537
540static int 538static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
541fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
542 struct nlmsghdr *n, struct netlink_skb_parms *req)
543{ 539{
544 struct fn_hash *table = (struct fn_hash*)tb->tb_data; 540 struct fn_hash *table = (struct fn_hash*)tb->tb_data;
545 struct fib_node *f; 541 struct fib_node *f;
546 struct fib_alias *fa, *fa_to_delete; 542 struct fib_alias *fa, *fa_to_delete;
547 int z = r->rtm_dst_len;
548 struct fn_zone *fz; 543 struct fn_zone *fz;
549 u32 key; 544 u32 key;
550 u8 tos = r->rtm_tos;
551 545
552 if (z > 32) 546 if (cfg->fc_dst_len > 32)
553 return -EINVAL; 547 return -EINVAL;
554 if ((fz = table->fn_zones[z]) == NULL) 548
549 if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL)
555 return -ESRCH; 550 return -ESRCH;
556 551
557 key = 0; 552 key = 0;
558 if (rta->rta_dst) { 553 if (cfg->fc_dst) {
559 u32 dst; 554 if (cfg->fc_dst & ~FZ_MASK(fz))
560 memcpy(&dst, rta->rta_dst, 4);
561 if (dst & ~FZ_MASK(fz))
562 return -EINVAL; 555 return -EINVAL;
563 key = fz_key(dst, fz); 556 key = fz_key(cfg->fc_dst, fz);
564 } 557 }
565 558
566 f = fib_find_node(fz, key); 559 f = fib_find_node(fz, key);
@@ -568,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
568 if (!f) 561 if (!f)
569 fa = NULL; 562 fa = NULL;
570 else 563 else
571 fa = fib_find_alias(&f->fn_alias, tos, 0); 564 fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
572 if (!fa) 565 if (!fa)
573 return -ESRCH; 566 return -ESRCH;
574 567
@@ -577,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
577 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { 570 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
578 struct fib_info *fi = fa->fa_info; 571 struct fib_info *fi = fa->fa_info;
579 572
580 if (fa->fa_tos != tos) 573 if (fa->fa_tos != cfg->fc_tos)
581 break; 574 break;
582 575
583 if ((!r->rtm_type || 576 if ((!cfg->fc_type ||
584 fa->fa_type == r->rtm_type) && 577 fa->fa_type == cfg->fc_type) &&
585 (r->rtm_scope == RT_SCOPE_NOWHERE || 578 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
586 fa->fa_scope == r->rtm_scope) && 579 fa->fa_scope == cfg->fc_scope) &&
587 (!r->rtm_protocol || 580 (!cfg->fc_protocol ||
588 fi->fib_protocol == r->rtm_protocol) && 581 fi->fib_protocol == cfg->fc_protocol) &&
589 fib_nh_match(r, n, rta, fi) == 0) { 582 fib_nh_match(cfg, fi) == 0) {
590 fa_to_delete = fa; 583 fa_to_delete = fa;
591 break; 584 break;
592 } 585 }
@@ -596,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
596 int kill_fn; 589 int kill_fn;
597 590
598 fa = fa_to_delete; 591 fa = fa_to_delete;
599 rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req); 592 rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
593 tb->tb_id, &cfg->fc_nlinfo);
600 594
601 kill_fn = 0; 595 kill_fn = 0;
602 write_lock_bh(&fib_hash_lock); 596 write_lock_bh(&fib_hash_lock);
@@ -684,7 +678,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
684 struct fib_node *f; 678 struct fib_node *f;
685 int i, s_i; 679 int i, s_i;
686 680
687 s_i = cb->args[3]; 681 s_i = cb->args[4];
688 i = 0; 682 i = 0;
689 hlist_for_each_entry(f, node, head, fn_hash) { 683 hlist_for_each_entry(f, node, head, fn_hash) {
690 struct fib_alias *fa; 684 struct fib_alias *fa;
@@ -699,19 +693,19 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
699 tb->tb_id, 693 tb->tb_id,
700 fa->fa_type, 694 fa->fa_type,
701 fa->fa_scope, 695 fa->fa_scope,
702 &f->fn_key, 696 f->fn_key,
703 fz->fz_order, 697 fz->fz_order,
704 fa->fa_tos, 698 fa->fa_tos,
705 fa->fa_info, 699 fa->fa_info,
706 NLM_F_MULTI) < 0) { 700 NLM_F_MULTI) < 0) {
707 cb->args[3] = i; 701 cb->args[4] = i;
708 return -1; 702 return -1;
709 } 703 }
710 next: 704 next:
711 i++; 705 i++;
712 } 706 }
713 } 707 }
714 cb->args[3] = i; 708 cb->args[4] = i;
715 return skb->len; 709 return skb->len;
716} 710}
717 711
@@ -722,21 +716,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
722{ 716{
723 int h, s_h; 717 int h, s_h;
724 718
725 s_h = cb->args[2]; 719 s_h = cb->args[3];
726 for (h=0; h < fz->fz_divisor; h++) { 720 for (h=0; h < fz->fz_divisor; h++) {
727 if (h < s_h) continue; 721 if (h < s_h) continue;
728 if (h > s_h) 722 if (h > s_h)
729 memset(&cb->args[3], 0, 723 memset(&cb->args[4], 0,
730 sizeof(cb->args) - 3*sizeof(cb->args[0])); 724 sizeof(cb->args) - 4*sizeof(cb->args[0]));
731 if (fz->fz_hash == NULL || 725 if (fz->fz_hash == NULL ||
732 hlist_empty(&fz->fz_hash[h])) 726 hlist_empty(&fz->fz_hash[h]))
733 continue; 727 continue;
734 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { 728 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) {
735 cb->args[2] = h; 729 cb->args[3] = h;
736 return -1; 730 return -1;
737 } 731 }
738 } 732 }
739 cb->args[2] = h; 733 cb->args[3] = h;
740 return skb->len; 734 return skb->len;
741} 735}
742 736
@@ -746,28 +740,28 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
746 struct fn_zone *fz; 740 struct fn_zone *fz;
747 struct fn_hash *table = (struct fn_hash*)tb->tb_data; 741 struct fn_hash *table = (struct fn_hash*)tb->tb_data;
748 742
749 s_m = cb->args[1]; 743 s_m = cb->args[2];
750 read_lock(&fib_hash_lock); 744 read_lock(&fib_hash_lock);
751 for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { 745 for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
752 if (m < s_m) continue; 746 if (m < s_m) continue;
753 if (m > s_m) 747 if (m > s_m)
754 memset(&cb->args[2], 0, 748 memset(&cb->args[3], 0,
755 sizeof(cb->args) - 2*sizeof(cb->args[0])); 749 sizeof(cb->args) - 3*sizeof(cb->args[0]));
756 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { 750 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
757 cb->args[1] = m; 751 cb->args[2] = m;
758 read_unlock(&fib_hash_lock); 752 read_unlock(&fib_hash_lock);
759 return -1; 753 return -1;
760 } 754 }
761 } 755 }
762 read_unlock(&fib_hash_lock); 756 read_unlock(&fib_hash_lock);
763 cb->args[1] = m; 757 cb->args[2] = m;
764 return skb->len; 758 return skb->len;
765} 759}
766 760
767#ifdef CONFIG_IP_MULTIPLE_TABLES 761#ifdef CONFIG_IP_MULTIPLE_TABLES
768struct fib_table * fib_hash_init(int id) 762struct fib_table * fib_hash_init(u32 id)
769#else 763#else
770struct fib_table * __init fib_hash_init(int id) 764struct fib_table * __init fib_hash_init(u32 id)
771#endif 765#endif
772{ 766{
773 struct fib_table *tb; 767 struct fib_table *tb;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ef6609ea0eb7..fd6f7769f8ab 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -23,19 +23,14 @@ extern int fib_semantic_match(struct list_head *head,
23 struct fib_result *res, __u32 zone, __u32 mask, 23 struct fib_result *res, __u32 zone, __u32 mask,
24 int prefixlen); 24 int prefixlen);
25extern void fib_release_info(struct fib_info *); 25extern void fib_release_info(struct fib_info *);
26extern struct fib_info *fib_create_info(const struct rtmsg *r, 26extern struct fib_info *fib_create_info(struct fib_config *cfg);
27 struct kern_rta *rta, 27extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
28 const struct nlmsghdr *,
29 int *err);
30extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
31 struct kern_rta *rta, struct fib_info *fi);
32extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 28extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
33 u8 tb_id, u8 type, u8 scope, void *dst, 29 u32 tb_id, u8 type, u8 scope, u32 dst,
34 int dst_len, u8 tos, struct fib_info *fi, 30 int dst_len, u8 tos, struct fib_info *fi,
35 unsigned int); 31 unsigned int);
36extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 32extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
37 int z, int tb_id, 33 int dst_len, u32 tb_id, struct nl_info *info);
38 struct nlmsghdr *n, struct netlink_skb_parms *req);
39extern struct fib_alias *fib_find_alias(struct list_head *fah, 34extern struct fib_alias *fib_find_alias(struct list_head *fah,
40 u8 tos, u32 prio); 35 u8 tos, u32 prio);
41extern int fib_detect_death(struct fib_info *fi, int order, 36extern int fib_detect_death(struct fib_info *fi, int order,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 79b04718bdfd..52b2adae4f22 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -5,9 +5,8 @@
5 * 5 *
6 * IPv4 Forwarding Information Base: policy rules. 6 * IPv4 Forwarding Information Base: policy rules.
7 * 7 *
8 * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 * Thomas Graf <tgraf@suug.ch>
11 * 10 *
12 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -19,463 +18,350 @@
19 * Marc Boucher : routing by fwmark 18 * Marc Boucher : routing by fwmark
20 */ 19 */
21 20
22#include <asm/uaccess.h>
23#include <asm/system.h>
24#include <linux/bitops.h>
25#include <linux/types.h> 21#include <linux/types.h>
26#include <linux/kernel.h> 22#include <linux/kernel.h>
27#include <linux/sched.h>
28#include <linux/mm.h>
29#include <linux/string.h>
30#include <linux/socket.h>
31#include <linux/sockios.h>
32#include <linux/errno.h>
33#include <linux/in.h>
34#include <linux/inet.h>
35#include <linux/inetdevice.h>
36#include <linux/netdevice.h> 23#include <linux/netdevice.h>
37#include <linux/if_arp.h>
38#include <linux/proc_fs.h>
39#include <linux/skbuff.h>
40#include <linux/netlink.h> 24#include <linux/netlink.h>
25#include <linux/inetdevice.h>
41#include <linux/init.h> 26#include <linux/init.h>
42#include <linux/list.h> 27#include <linux/list.h>
43#include <linux/rcupdate.h> 28#include <linux/rcupdate.h>
44
45#include <net/ip.h> 29#include <net/ip.h>
46#include <net/protocol.h>
47#include <net/route.h> 30#include <net/route.h>
48#include <net/tcp.h> 31#include <net/tcp.h>
49#include <net/sock.h>
50#include <net/ip_fib.h> 32#include <net/ip_fib.h>
33#include <net/fib_rules.h>
51 34
52#define FRprintk(a...) 35static struct fib_rules_ops fib4_rules_ops;
53 36
54struct fib_rule 37struct fib4_rule
55{ 38{
56 struct hlist_node hlist; 39 struct fib_rule common;
57 atomic_t r_clntref; 40 u8 dst_len;
58 u32 r_preference; 41 u8 src_len;
59 unsigned char r_table; 42 u8 tos;
60 unsigned char r_action; 43 u32 src;
61 unsigned char r_dst_len; 44 u32 srcmask;
62 unsigned char r_src_len; 45 u32 dst;
63 u32 r_src; 46 u32 dstmask;
64 u32 r_srcmask;
65 u32 r_dst;
66 u32 r_dstmask;
67 u32 r_srcmap;
68 u8 r_flags;
69 u8 r_tos;
70#ifdef CONFIG_IP_ROUTE_FWMARK 47#ifdef CONFIG_IP_ROUTE_FWMARK
71 u32 r_fwmark; 48 u32 fwmark;
49 u32 fwmask;
72#endif 50#endif
73 int r_ifindex;
74#ifdef CONFIG_NET_CLS_ROUTE 51#ifdef CONFIG_NET_CLS_ROUTE
75 __u32 r_tclassid; 52 u32 tclassid;
76#endif 53#endif
77 char r_ifname[IFNAMSIZ];
78 int r_dead;
79 struct rcu_head rcu;
80}; 54};
81 55
82static struct fib_rule default_rule = { 56static struct fib4_rule default_rule = {
83 .r_clntref = ATOMIC_INIT(2), 57 .common = {
84 .r_preference = 0x7FFF, 58 .refcnt = ATOMIC_INIT(2),
85 .r_table = RT_TABLE_DEFAULT, 59 .pref = 0x7FFF,
86 .r_action = RTN_UNICAST, 60 .table = RT_TABLE_DEFAULT,
61 .action = FR_ACT_TO_TBL,
62 },
87}; 63};
88 64
89static struct fib_rule main_rule = { 65static struct fib4_rule main_rule = {
90 .r_clntref = ATOMIC_INIT(2), 66 .common = {
91 .r_preference = 0x7FFE, 67 .refcnt = ATOMIC_INIT(2),
92 .r_table = RT_TABLE_MAIN, 68 .pref = 0x7FFE,
93 .r_action = RTN_UNICAST, 69 .table = RT_TABLE_MAIN,
70 .action = FR_ACT_TO_TBL,
71 },
94}; 72};
95 73
96static struct fib_rule local_rule = { 74static struct fib4_rule local_rule = {
97 .r_clntref = ATOMIC_INIT(2), 75 .common = {
98 .r_table = RT_TABLE_LOCAL, 76 .refcnt = ATOMIC_INIT(2),
99 .r_action = RTN_UNICAST, 77 .table = RT_TABLE_LOCAL,
78 .action = FR_ACT_TO_TBL,
79 .flags = FIB_RULE_PERMANENT,
80 },
100}; 81};
101 82
102static struct hlist_head fib_rules; 83static LIST_HEAD(fib4_rules);
103 84
104/* writer func called from netlink -- rtnl_sem hold*/ 85#ifdef CONFIG_NET_CLS_ROUTE
105 86u32 fib_rules_tclass(struct fib_result *res)
106static void rtmsg_rule(int, struct fib_rule *);
107
108int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
109{ 87{
110 struct rtattr **rta = arg; 88 return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
111 struct rtmsg *rtm = NLMSG_DATA(nlh);
112 struct fib_rule *r;
113 struct hlist_node *node;
114 int err = -ESRCH;
115
116 hlist_for_each_entry(r, node, &fib_rules, hlist) {
117 if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
118 rtm->rtm_src_len == r->r_src_len &&
119 rtm->rtm_dst_len == r->r_dst_len &&
120 (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
121 rtm->rtm_tos == r->r_tos &&
122#ifdef CONFIG_IP_ROUTE_FWMARK
123 (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
124#endif
125 (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
126 (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
127 (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
128 (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
129 err = -EPERM;
130 if (r == &local_rule)
131 break;
132
133 hlist_del_rcu(&r->hlist);
134 r->r_dead = 1;
135 rtmsg_rule(RTM_DELRULE, r);
136 fib_rule_put(r);
137 err = 0;
138 break;
139 }
140 }
141 return err;
142} 89}
90#endif
143 91
144/* Allocate new unique table id */ 92int fib_lookup(struct flowi *flp, struct fib_result *res)
145
146static struct fib_table *fib_empty_table(void)
147{ 93{
148 int id; 94 struct fib_lookup_arg arg = {
95 .result = res,
96 };
97 int err;
149 98
150 for (id = 1; id <= RT_TABLE_MAX; id++) 99 err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
151 if (fib_tables[id] == NULL) 100 res->r = arg.rule;
152 return __fib_new_table(id);
153 return NULL;
154}
155 101
156static inline void fib_rule_put_rcu(struct rcu_head *head) 102 return err;
157{
158 struct fib_rule *r = container_of(head, struct fib_rule, rcu);
159 kfree(r);
160} 103}
161 104
162void fib_rule_put(struct fib_rule *r) 105static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
106 int flags, struct fib_lookup_arg *arg)
163{ 107{
164 if (atomic_dec_and_test(&r->r_clntref)) { 108 int err = -EAGAIN;
165 if (r->r_dead) 109 struct fib_table *tbl;
166 call_rcu(&r->rcu, fib_rule_put_rcu); 110
167 else 111 switch (rule->action) {
168 printk("Freeing alive rule %p\n", r); 112 case FR_ACT_TO_TBL:
113 break;
114
115 case FR_ACT_UNREACHABLE:
116 err = -ENETUNREACH;
117 goto errout;
118
119 case FR_ACT_PROHIBIT:
120 err = -EACCES;
121 goto errout;
122
123 case FR_ACT_BLACKHOLE:
124 default:
125 err = -EINVAL;
126 goto errout;
169 } 127 }
128
129 if ((tbl = fib_get_table(rule->table)) == NULL)
130 goto errout;
131
132 err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
133 if (err > 0)
134 err = -EAGAIN;
135errout:
136 return err;
170} 137}
171 138
172/* writer func called from netlink -- rtnl_sem hold*/
173 139
174int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 140void fib_select_default(const struct flowi *flp, struct fib_result *res)
175{ 141{
176 struct rtattr **rta = arg; 142 if (res->r && res->r->action == FR_ACT_TO_TBL &&
177 struct rtmsg *rtm = NLMSG_DATA(nlh); 143 FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
178 struct fib_rule *r, *new_r, *last = NULL; 144 struct fib_table *tb;
179 struct hlist_node *node = NULL; 145 if ((tb = fib_get_table(res->r->table)) != NULL)
180 unsigned char table_id; 146 tb->tb_select_default(tb, flp, res);
181
182 if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
183 (rtm->rtm_tos & ~IPTOS_TOS_MASK))
184 return -EINVAL;
185
186 if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
187 return -EINVAL;
188
189 table_id = rtm->rtm_table;
190 if (table_id == RT_TABLE_UNSPEC) {
191 struct fib_table *table;
192 if (rtm->rtm_type == RTN_UNICAST) {
193 if ((table = fib_empty_table()) == NULL)
194 return -ENOBUFS;
195 table_id = table->tb_id;
196 }
197 } 147 }
148}
198 149
199 new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); 150static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
200 if (!new_r) 151{
201 return -ENOMEM; 152 struct fib4_rule *r = (struct fib4_rule *) rule;
202 153 u32 daddr = fl->fl4_dst;
203 if (rta[RTA_SRC-1]) 154 u32 saddr = fl->fl4_src;
204 memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
205 if (rta[RTA_DST-1])
206 memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
207 if (rta[RTA_GATEWAY-1])
208 memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
209 new_r->r_src_len = rtm->rtm_src_len;
210 new_r->r_dst_len = rtm->rtm_dst_len;
211 new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
212 new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
213 new_r->r_tos = rtm->rtm_tos;
214#ifdef CONFIG_IP_ROUTE_FWMARK
215 if (rta[RTA_PROTOINFO-1])
216 memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
217#endif
218 new_r->r_action = rtm->rtm_type;
219 new_r->r_flags = rtm->rtm_flags;
220 if (rta[RTA_PRIORITY-1])
221 memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
222 new_r->r_table = table_id;
223 if (rta[RTA_IIF-1]) {
224 struct net_device *dev;
225 rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
226 new_r->r_ifindex = -1;
227 dev = __dev_get_by_name(new_r->r_ifname);
228 if (dev)
229 new_r->r_ifindex = dev->ifindex;
230 }
231#ifdef CONFIG_NET_CLS_ROUTE
232 if (rta[RTA_FLOW-1])
233 memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
234#endif
235 r = container_of(fib_rules.first, struct fib_rule, hlist);
236 155
237 if (!new_r->r_preference) { 156 if (((saddr ^ r->src) & r->srcmask) ||
238 if (r && r->hlist.next != NULL) { 157 ((daddr ^ r->dst) & r->dstmask))
239 r = container_of(r->hlist.next, struct fib_rule, hlist); 158 return 0;
240 if (r->r_preference)
241 new_r->r_preference = r->r_preference - 1;
242 }
243 }
244 159
245 hlist_for_each_entry(r, node, &fib_rules, hlist) { 160 if (r->tos && (r->tos != fl->fl4_tos))
246 if (r->r_preference > new_r->r_preference) 161 return 0;
247 break;
248 last = r;
249 }
250 atomic_inc(&new_r->r_clntref);
251 162
252 if (last) 163#ifdef CONFIG_IP_ROUTE_FWMARK
253 hlist_add_after_rcu(&last->hlist, &new_r->hlist); 164 if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask)
254 else 165 return 0;
255 hlist_add_before_rcu(&new_r->hlist, &r->hlist); 166#endif
256 167
257 rtmsg_rule(RTM_NEWRULE, new_r); 168 return 1;
258 return 0;
259} 169}
260 170
261#ifdef CONFIG_NET_CLS_ROUTE 171static struct fib_table *fib_empty_table(void)
262u32 fib_rules_tclass(struct fib_result *res)
263{ 172{
264 if (res->r) 173 u32 id;
265 return res->r->r_tclassid; 174
266 return 0; 175 for (id = 1; id <= RT_TABLE_MAX; id++)
176 if (fib_get_table(id) == NULL)
177 return fib_new_table(id);
178 return NULL;
267} 179}
268#endif
269 180
270/* callers should hold rtnl semaphore */ 181static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
182 [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
183 [FRA_PRIORITY] = { .type = NLA_U32 },
184 [FRA_SRC] = { .type = NLA_U32 },
185 [FRA_DST] = { .type = NLA_U32 },
186 [FRA_FWMARK] = { .type = NLA_U32 },
187 [FRA_FWMASK] = { .type = NLA_U32 },
188 [FRA_FLOW] = { .type = NLA_U32 },
189 [FRA_TABLE] = { .type = NLA_U32 },
190};
271 191
272static void fib_rules_detach(struct net_device *dev) 192static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
193 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
194 struct nlattr **tb)
273{ 195{
274 struct hlist_node *node; 196 int err = -EINVAL;
275 struct fib_rule *r; 197 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
198
199 if (frh->src_len > 32 || frh->dst_len > 32 ||
200 (frh->tos & ~IPTOS_TOS_MASK))
201 goto errout;
202
203 if (rule->table == RT_TABLE_UNSPEC) {
204 if (rule->action == FR_ACT_TO_TBL) {
205 struct fib_table *table;
276 206
277 hlist_for_each_entry(r, node, &fib_rules, hlist) { 207 table = fib_empty_table();
278 if (r->r_ifindex == dev->ifindex) 208 if (table == NULL) {
279 r->r_ifindex = -1; 209 err = -ENOBUFS;
210 goto errout;
211 }
280 212
213 rule->table = table->tb_id;
214 }
281 } 215 }
282}
283 216
284/* callers should hold rtnl semaphore */ 217 if (tb[FRA_SRC])
218 rule4->src = nla_get_u32(tb[FRA_SRC]);
285 219
286static void fib_rules_attach(struct net_device *dev) 220 if (tb[FRA_DST])
287{ 221 rule4->dst = nla_get_u32(tb[FRA_DST]);
288 struct hlist_node *node;
289 struct fib_rule *r;
290 222
291 hlist_for_each_entry(r, node, &fib_rules, hlist) { 223#ifdef CONFIG_IP_ROUTE_FWMARK
292 if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) 224 if (tb[FRA_FWMARK]) {
293 r->r_ifindex = dev->ifindex; 225 rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]);
226 if (rule4->fwmark)
227 /* compatibility: if the mark value is non-zero all bits
228 * are compared unless a mask is explicitly specified.
229 */
230 rule4->fwmask = 0xFFFFFFFF;
294 } 231 }
232
233 if (tb[FRA_FWMASK])
234 rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]);
235#endif
236
237#ifdef CONFIG_NET_CLS_ROUTE
238 if (tb[FRA_FLOW])
239 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
240#endif
241
242 rule4->src_len = frh->src_len;
243 rule4->srcmask = inet_make_mask(rule4->src_len);
244 rule4->dst_len = frh->dst_len;
245 rule4->dstmask = inet_make_mask(rule4->dst_len);
246 rule4->tos = frh->tos;
247
248 err = 0;
249errout:
250 return err;
295} 251}
296 252
297int fib_lookup(const struct flowi *flp, struct fib_result *res) 253static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
254 struct nlattr **tb)
298{ 255{
299 int err; 256 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
300 struct fib_rule *r, *policy;
301 struct fib_table *tb;
302 struct hlist_node *node;
303 257
304 u32 daddr = flp->fl4_dst; 258 if (frh->src_len && (rule4->src_len != frh->src_len))
305 u32 saddr = flp->fl4_src; 259 return 0;
306 260
307FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", 261 if (frh->dst_len && (rule4->dst_len != frh->dst_len))
308 NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); 262 return 0;
309 263
310 rcu_read_lock(); 264 if (frh->tos && (rule4->tos != frh->tos))
265 return 0;
311 266
312 hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) {
313 if (((saddr^r->r_src) & r->r_srcmask) ||
314 ((daddr^r->r_dst) & r->r_dstmask) ||
315 (r->r_tos && r->r_tos != flp->fl4_tos) ||
316#ifdef CONFIG_IP_ROUTE_FWMARK 267#ifdef CONFIG_IP_ROUTE_FWMARK
317 (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || 268 if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK])))
269 return 0;
270
271 if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK])))
272 return 0;
318#endif 273#endif
319 (r->r_ifindex && r->r_ifindex != flp->iif))
320 continue;
321
322FRprintk("tb %d r %d ", r->r_table, r->r_action);
323 switch (r->r_action) {
324 case RTN_UNICAST:
325 policy = r;
326 break;
327 case RTN_UNREACHABLE:
328 rcu_read_unlock();
329 return -ENETUNREACH;
330 default:
331 case RTN_BLACKHOLE:
332 rcu_read_unlock();
333 return -EINVAL;
334 case RTN_PROHIBIT:
335 rcu_read_unlock();
336 return -EACCES;
337 }
338 274
339 if ((tb = fib_get_table(r->r_table)) == NULL) 275#ifdef CONFIG_NET_CLS_ROUTE
340 continue; 276 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
341 err = tb->tb_lookup(tb, flp, res); 277 return 0;
342 if (err == 0) { 278#endif
343 res->r = policy;
344 if (policy)
345 atomic_inc(&policy->r_clntref);
346 rcu_read_unlock();
347 return 0;
348 }
349 if (err < 0 && err != -EAGAIN) {
350 rcu_read_unlock();
351 return err;
352 }
353 }
354FRprintk("FAILURE\n");
355 rcu_read_unlock();
356 return -ENETUNREACH;
357}
358 279
359void fib_select_default(const struct flowi *flp, struct fib_result *res) 280 if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC])))
360{ 281 return 0;
361 if (res->r && res->r->r_action == RTN_UNICAST &&
362 FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
363 struct fib_table *tb;
364 if ((tb = fib_get_table(res->r->r_table)) != NULL)
365 tb->tb_select_default(tb, flp, res);
366 }
367}
368 282
369static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) 283 if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST])))
370{ 284 return 0;
371 struct net_device *dev = ptr;
372 285
373 if (event == NETDEV_UNREGISTER) 286 return 1;
374 fib_rules_detach(dev);
375 else if (event == NETDEV_REGISTER)
376 fib_rules_attach(dev);
377 return NOTIFY_DONE;
378} 287}
379 288
289static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
290 struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
291{
292 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
380 293
381static struct notifier_block fib_rules_notifier = { 294 frh->family = AF_INET;
382 .notifier_call =fib_rules_event, 295 frh->dst_len = rule4->dst_len;
383}; 296 frh->src_len = rule4->src_len;
297 frh->tos = rule4->tos;
384 298
385static __inline__ int inet_fill_rule(struct sk_buff *skb,
386 struct fib_rule *r,
387 u32 pid, u32 seq, int event,
388 unsigned int flags)
389{
390 struct rtmsg *rtm;
391 struct nlmsghdr *nlh;
392 unsigned char *b = skb->tail;
393
394 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
395 rtm = NLMSG_DATA(nlh);
396 rtm->rtm_family = AF_INET;
397 rtm->rtm_dst_len = r->r_dst_len;
398 rtm->rtm_src_len = r->r_src_len;
399 rtm->rtm_tos = r->r_tos;
400#ifdef CONFIG_IP_ROUTE_FWMARK 299#ifdef CONFIG_IP_ROUTE_FWMARK
401 if (r->r_fwmark) 300 if (rule4->fwmark)
402 RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); 301 NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark);
302
303 if (rule4->fwmask || rule4->fwmark)
304 NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask);
403#endif 305#endif
404 rtm->rtm_table = r->r_table; 306
405 rtm->rtm_protocol = 0; 307 if (rule4->dst_len)
406 rtm->rtm_scope = 0; 308 NLA_PUT_U32(skb, FRA_DST, rule4->dst);
407 rtm->rtm_type = r->r_action; 309
408 rtm->rtm_flags = r->r_flags; 310 if (rule4->src_len)
409 311 NLA_PUT_U32(skb, FRA_SRC, rule4->src);
410 if (r->r_dst_len) 312
411 RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
412 if (r->r_src_len)
413 RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
414 if (r->r_ifname[0])
415 RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
416 if (r->r_preference)
417 RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
418 if (r->r_srcmap)
419 RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
420#ifdef CONFIG_NET_CLS_ROUTE 313#ifdef CONFIG_NET_CLS_ROUTE
421 if (r->r_tclassid) 314 if (rule4->tclassid)
422 RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); 315 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
423#endif 316#endif
424 nlh->nlmsg_len = skb->tail - b; 317 return 0;
425 return skb->len;
426 318
427nlmsg_failure: 319nla_put_failure:
428rtattr_failure: 320 return -ENOBUFS;
429 skb_trim(skb, b - skb->data);
430 return -1;
431} 321}
432 322
433/* callers should hold rtnl semaphore */ 323int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
434
435static void rtmsg_rule(int event, struct fib_rule *r)
436{ 324{
437 int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128); 325 return fib_rules_dump(skb, cb, AF_INET);
438 struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
439
440 if (!skb)
441 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS);
442 else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) {
443 kfree_skb(skb);
444 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL);
445 } else {
446 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL);
447 }
448} 326}
449 327
450int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) 328static u32 fib4_rule_default_pref(void)
451{ 329{
452 int idx = 0; 330 struct list_head *pos;
453 int s_idx = cb->args[0]; 331 struct fib_rule *rule;
454 struct fib_rule *r; 332
455 struct hlist_node *node; 333 if (!list_empty(&fib4_rules)) {
456 334 pos = fib4_rules.next;
457 rcu_read_lock(); 335 if (pos->next != &fib4_rules) {
458 hlist_for_each_entry(r, node, &fib_rules, hlist) { 336 rule = list_entry(pos->next, struct fib_rule, list);
459 if (idx < s_idx) 337 if (rule->pref)
460 goto next; 338 return rule->pref - 1;
461 if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, 339 }
462 cb->nlh->nlmsg_seq,
463 RTM_NEWRULE, NLM_F_MULTI) < 0)
464 break;
465next:
466 idx++;
467 } 340 }
468 rcu_read_unlock();
469 cb->args[0] = idx;
470 341
471 return skb->len; 342 return 0;
472} 343}
473 344
474void __init fib_rules_init(void) 345static struct fib_rules_ops fib4_rules_ops = {
346 .family = AF_INET,
347 .rule_size = sizeof(struct fib4_rule),
348 .action = fib4_rule_action,
349 .match = fib4_rule_match,
350 .configure = fib4_rule_configure,
351 .compare = fib4_rule_compare,
352 .fill = fib4_rule_fill,
353 .default_pref = fib4_rule_default_pref,
354 .nlgroup = RTNLGRP_IPV4_RULE,
355 .policy = fib4_rule_policy,
356 .rules_list = &fib4_rules,
357 .owner = THIS_MODULE,
358};
359
360void __init fib4_rules_init(void)
475{ 361{
476 INIT_HLIST_HEAD(&fib_rules); 362 list_add_tail(&local_rule.common.list, &fib4_rules);
477 hlist_add_head(&local_rule.hlist, &fib_rules); 363 list_add_tail(&main_rule.common.list, &fib4_rules);
478 hlist_add_after(&local_rule.hlist, &main_rule.hlist); 364 list_add_tail(&default_rule.common.list, &fib4_rules);
479 hlist_add_after(&main_rule.hlist, &default_rule.hlist); 365
480 register_netdevice_notifier(&fib_rules_notifier); 366 fib_rules_register(&fib4_rules_ops);
481} 367}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 51738000f3dc..2ead09543f68 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -33,7 +33,6 @@
33#include <linux/if_arp.h> 33#include <linux/if_arp.h>
34#include <linux/proc_fs.h> 34#include <linux/proc_fs.h>
35#include <linux/skbuff.h> 35#include <linux/skbuff.h>
36#include <linux/netlink.h>
37#include <linux/init.h> 36#include <linux/init.h>
38 37
39#include <net/arp.h> 38#include <net/arp.h>
@@ -44,12 +43,14 @@
44#include <net/sock.h> 43#include <net/sock.h>
45#include <net/ip_fib.h> 44#include <net/ip_fib.h>
46#include <net/ip_mp_alg.h> 45#include <net/ip_mp_alg.h>
46#include <net/netlink.h>
47#include <net/nexthop.h>
47 48
48#include "fib_lookup.h" 49#include "fib_lookup.h"
49 50
50#define FSprintk(a...) 51#define FSprintk(a...)
51 52
52static DEFINE_RWLOCK(fib_info_lock); 53static DEFINE_SPINLOCK(fib_info_lock);
53static struct hlist_head *fib_info_hash; 54static struct hlist_head *fib_info_hash;
54static struct hlist_head *fib_info_laddrhash; 55static struct hlist_head *fib_info_laddrhash;
55static unsigned int fib_hash_size; 56static unsigned int fib_hash_size;
@@ -159,7 +160,7 @@ void free_fib_info(struct fib_info *fi)
159 160
160void fib_release_info(struct fib_info *fi) 161void fib_release_info(struct fib_info *fi)
161{ 162{
162 write_lock_bh(&fib_info_lock); 163 spin_lock_bh(&fib_info_lock);
163 if (fi && --fi->fib_treeref == 0) { 164 if (fi && --fi->fib_treeref == 0) {
164 hlist_del(&fi->fib_hash); 165 hlist_del(&fi->fib_hash);
165 if (fi->fib_prefsrc) 166 if (fi->fib_prefsrc)
@@ -172,7 +173,7 @@ void fib_release_info(struct fib_info *fi)
172 fi->fib_dead = 1; 173 fi->fib_dead = 1;
173 fib_info_put(fi); 174 fib_info_put(fi);
174 } 175 }
175 write_unlock_bh(&fib_info_lock); 176 spin_unlock_bh(&fib_info_lock);
176} 177}
177 178
178static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 179static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -254,7 +255,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
254 struct fib_nh *nh; 255 struct fib_nh *nh;
255 unsigned int hash; 256 unsigned int hash;
256 257
257 read_lock(&fib_info_lock); 258 spin_lock(&fib_info_lock);
258 259
259 hash = fib_devindex_hashfn(dev->ifindex); 260 hash = fib_devindex_hashfn(dev->ifindex);
260 head = &fib_info_devhash[hash]; 261 head = &fib_info_devhash[hash];
@@ -262,41 +263,41 @@ int ip_fib_check_default(u32 gw, struct net_device *dev)
262 if (nh->nh_dev == dev && 263 if (nh->nh_dev == dev &&
263 nh->nh_gw == gw && 264 nh->nh_gw == gw &&
264 !(nh->nh_flags&RTNH_F_DEAD)) { 265 !(nh->nh_flags&RTNH_F_DEAD)) {
265 read_unlock(&fib_info_lock); 266 spin_unlock(&fib_info_lock);
266 return 0; 267 return 0;
267 } 268 }
268 } 269 }
269 270
270 read_unlock(&fib_info_lock); 271 spin_unlock(&fib_info_lock);
271 272
272 return -1; 273 return -1;
273} 274}
274 275
275void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 276void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
276 int z, int tb_id, 277 int dst_len, u32 tb_id, struct nl_info *info)
277 struct nlmsghdr *n, struct netlink_skb_parms *req)
278{ 278{
279 struct sk_buff *skb; 279 struct sk_buff *skb;
280 u32 pid = req ? req->pid : n->nlmsg_pid; 280 int payload = sizeof(struct rtmsg) + 256;
281 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 281 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
282 282 int err = -ENOBUFS;
283 skb = alloc_skb(size, GFP_KERNEL); 283
284 if (!skb) 284 skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
285 return; 285 if (skb == NULL)
286 286 goto errout;
287 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, 287
288 fa->fa_type, fa->fa_scope, &key, z, 288 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
289 fa->fa_tos, 289 fa->fa_type, fa->fa_scope, key, dst_len,
290 fa->fa_info, 0) < 0) { 290 fa->fa_tos, fa->fa_info, 0);
291 if (err < 0) {
291 kfree_skb(skb); 292 kfree_skb(skb);
292 return; 293 goto errout;
293 } 294 }
294 NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; 295
295 if (n->nlmsg_flags&NLM_F_ECHO) 296 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
296 atomic_inc(&skb->users); 297 info->nlh, GFP_KERNEL);
297 netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); 298errout:
298 if (n->nlmsg_flags&NLM_F_ECHO) 299 if (err < 0)
299 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); 300 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
300} 301}
301 302
302/* Return the first fib alias matching TOS with 303/* Return the first fib alias matching TOS with
@@ -342,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order,
342 343
343#ifdef CONFIG_IP_ROUTE_MULTIPATH 344#ifdef CONFIG_IP_ROUTE_MULTIPATH
344 345
345static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) 346static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
346{
347 while (RTA_OK(attr,attrlen)) {
348 if (attr->rta_type == type)
349 return *(u32*)RTA_DATA(attr);
350 attr = RTA_NEXT(attr, attrlen);
351 }
352 return 0;
353}
354
355static int
356fib_count_nexthops(struct rtattr *rta)
357{ 347{
358 int nhs = 0; 348 int nhs = 0;
359 struct rtnexthop *nhp = RTA_DATA(rta);
360 int nhlen = RTA_PAYLOAD(rta);
361 349
362 while (nhlen >= (int)sizeof(struct rtnexthop)) { 350 while (rtnh_ok(rtnh, remaining)) {
363 if ((nhlen -= nhp->rtnh_len) < 0)
364 return 0;
365 nhs++; 351 nhs++;
366 nhp = RTNH_NEXT(nhp); 352 rtnh = rtnh_next(rtnh, &remaining);
367 }; 353 }
368 return nhs; 354
355 /* leftover implies invalid nexthop configuration, discard it */
356 return remaining > 0 ? 0 : nhs;
369} 357}
370 358
371static int 359static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
372fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) 360 int remaining, struct fib_config *cfg)
373{ 361{
374 struct rtnexthop *nhp = RTA_DATA(rta);
375 int nhlen = RTA_PAYLOAD(rta);
376
377 change_nexthops(fi) { 362 change_nexthops(fi) {
378 int attrlen = nhlen - sizeof(struct rtnexthop); 363 int attrlen;
379 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 364
365 if (!rtnh_ok(rtnh, remaining))
380 return -EINVAL; 366 return -EINVAL;
381 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; 367
382 nh->nh_oif = nhp->rtnh_ifindex; 368 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
383 nh->nh_weight = nhp->rtnh_hops + 1; 369 nh->nh_oif = rtnh->rtnh_ifindex;
384 if (attrlen) { 370 nh->nh_weight = rtnh->rtnh_hops + 1;
385 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 371
372 attrlen = rtnh_attrlen(rtnh);
373 if (attrlen > 0) {
374 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
375
376 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
377 nh->nh_gw = nla ? nla_get_u32(nla) : 0;
386#ifdef CONFIG_NET_CLS_ROUTE 378#ifdef CONFIG_NET_CLS_ROUTE
387 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 379 nla = nla_find(attrs, attrlen, RTA_FLOW);
380 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
388#endif 381#endif
389 } 382 }
390 nhp = RTNH_NEXT(nhp); 383
384 rtnh = rtnh_next(rtnh, &remaining);
391 } endfor_nexthops(fi); 385 } endfor_nexthops(fi);
386
392 return 0; 387 return 0;
393} 388}
394 389
395#endif 390#endif
396 391
397int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, 392int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
398 struct fib_info *fi)
399{ 393{
400#ifdef CONFIG_IP_ROUTE_MULTIPATH 394#ifdef CONFIG_IP_ROUTE_MULTIPATH
401 struct rtnexthop *nhp; 395 struct rtnexthop *rtnh;
402 int nhlen; 396 int remaining;
403#endif 397#endif
404 398
405 if (rta->rta_priority && 399 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
406 *rta->rta_priority != fi->fib_priority)
407 return 1; 400 return 1;
408 401
409 if (rta->rta_oif || rta->rta_gw) { 402 if (cfg->fc_oif || cfg->fc_gw) {
410 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && 403 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
411 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) 404 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
412 return 0; 405 return 0;
413 return 1; 406 return 1;
414 } 407 }
415 408
416#ifdef CONFIG_IP_ROUTE_MULTIPATH 409#ifdef CONFIG_IP_ROUTE_MULTIPATH
417 if (rta->rta_mp == NULL) 410 if (cfg->fc_mp == NULL)
418 return 0; 411 return 0;
419 nhp = RTA_DATA(rta->rta_mp); 412
420 nhlen = RTA_PAYLOAD(rta->rta_mp); 413 rtnh = cfg->fc_mp;
414 remaining = cfg->fc_mp_len;
421 415
422 for_nexthops(fi) { 416 for_nexthops(fi) {
423 int attrlen = nhlen - sizeof(struct rtnexthop); 417 int attrlen;
424 u32 gw;
425 418
426 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) 419 if (!rtnh_ok(rtnh, remaining))
427 return -EINVAL; 420 return -EINVAL;
428 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) 421
422 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
429 return 1; 423 return 1;
430 if (attrlen) { 424
431 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 425 attrlen = rtnh_attrlen(rtnh);
432 if (gw && gw != nh->nh_gw) 426 if (attrlen < 0) {
427 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
428
429 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
430 if (nla && nla_get_u32(nla) != nh->nh_gw)
433 return 1; 431 return 1;
434#ifdef CONFIG_NET_CLS_ROUTE 432#ifdef CONFIG_NET_CLS_ROUTE
435 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); 433 nla = nla_find(attrs, attrlen, RTA_FLOW);
436 if (gw && gw != nh->nh_tclassid) 434 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
437 return 1; 435 return 1;
438#endif 436#endif
439 } 437 }
440 nhp = RTNH_NEXT(nhp); 438
439 rtnh = rtnh_next(rtnh, &remaining);
441 } endfor_nexthops(fi); 440 } endfor_nexthops(fi);
442#endif 441#endif
443 return 0; 442 return 0;
@@ -488,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
488 |-> {local prefix} (terminal node) 487 |-> {local prefix} (terminal node)
489 */ 488 */
490 489
491static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) 490static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
491 struct fib_nh *nh)
492{ 492{
493 int err; 493 int err;
494 494
@@ -502,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
502 if (nh->nh_flags&RTNH_F_ONLINK) { 502 if (nh->nh_flags&RTNH_F_ONLINK) {
503 struct net_device *dev; 503 struct net_device *dev;
504 504
505 if (r->rtm_scope >= RT_SCOPE_LINK) 505 if (cfg->fc_scope >= RT_SCOPE_LINK)
506 return -EINVAL; 506 return -EINVAL;
507 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) 507 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
508 return -EINVAL; 508 return -EINVAL;
@@ -516,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
516 return 0; 516 return 0;
517 } 517 }
518 { 518 {
519 struct flowi fl = { .nl_u = { .ip4_u = 519 struct flowi fl = {
520 { .daddr = nh->nh_gw, 520 .nl_u = {
521 .scope = r->rtm_scope + 1 } }, 521 .ip4_u = {
522 .oif = nh->nh_oif }; 522 .daddr = nh->nh_gw,
523 .scope = cfg->fc_scope + 1,
524 },
525 },
526 .oif = nh->nh_oif,
527 };
523 528
524 /* It is not necessary, but requires a bit of thinking */ 529 /* It is not necessary, but requires a bit of thinking */
525 if (fl.fl4_scope < RT_SCOPE_LINK) 530 if (fl.fl4_scope < RT_SCOPE_LINK)
@@ -598,7 +603,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
598 unsigned int old_size = fib_hash_size; 603 unsigned int old_size = fib_hash_size;
599 unsigned int i, bytes; 604 unsigned int i, bytes;
600 605
601 write_lock_bh(&fib_info_lock); 606 spin_lock_bh(&fib_info_lock);
602 old_info_hash = fib_info_hash; 607 old_info_hash = fib_info_hash;
603 old_laddrhash = fib_info_laddrhash; 608 old_laddrhash = fib_info_laddrhash;
604 fib_hash_size = new_size; 609 fib_hash_size = new_size;
@@ -639,46 +644,35 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
639 } 644 }
640 fib_info_laddrhash = new_laddrhash; 645 fib_info_laddrhash = new_laddrhash;
641 646
642 write_unlock_bh(&fib_info_lock); 647 spin_unlock_bh(&fib_info_lock);
643 648
644 bytes = old_size * sizeof(struct hlist_head *); 649 bytes = old_size * sizeof(struct hlist_head *);
645 fib_hash_free(old_info_hash, bytes); 650 fib_hash_free(old_info_hash, bytes);
646 fib_hash_free(old_laddrhash, bytes); 651 fib_hash_free(old_laddrhash, bytes);
647} 652}
648 653
649struct fib_info * 654struct fib_info *fib_create_info(struct fib_config *cfg)
650fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
651 const struct nlmsghdr *nlh, int *errp)
652{ 655{
653 int err; 656 int err;
654 struct fib_info *fi = NULL; 657 struct fib_info *fi = NULL;
655 struct fib_info *ofi; 658 struct fib_info *ofi;
656#ifdef CONFIG_IP_ROUTE_MULTIPATH
657 int nhs = 1; 659 int nhs = 1;
658#else
659 const int nhs = 1;
660#endif
661#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
662 u32 mp_alg = IP_MP_ALG_NONE;
663#endif
664 660
665 /* Fast check to catch the most weird cases */ 661 /* Fast check to catch the most weird cases */
666 if (fib_props[r->rtm_type].scope > r->rtm_scope) 662 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
667 goto err_inval; 663 goto err_inval;
668 664
669#ifdef CONFIG_IP_ROUTE_MULTIPATH 665#ifdef CONFIG_IP_ROUTE_MULTIPATH
670 if (rta->rta_mp) { 666 if (cfg->fc_mp) {
671 nhs = fib_count_nexthops(rta->rta_mp); 667 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
672 if (nhs == 0) 668 if (nhs == 0)
673 goto err_inval; 669 goto err_inval;
674 } 670 }
675#endif 671#endif
676#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 672#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
677 if (rta->rta_mp_alg) { 673 if (cfg->fc_mp_alg) {
678 mp_alg = *rta->rta_mp_alg; 674 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
679 675 cfg->fc_mp_alg > IP_MP_ALG_MAX)
680 if (mp_alg < IP_MP_ALG_NONE ||
681 mp_alg > IP_MP_ALG_MAX)
682 goto err_inval; 676 goto err_inval;
683 } 677 }
684#endif 678#endif
@@ -714,43 +708,42 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
714 goto failure; 708 goto failure;
715 fib_info_cnt++; 709 fib_info_cnt++;
716 710
717 fi->fib_protocol = r->rtm_protocol; 711 fi->fib_protocol = cfg->fc_protocol;
712 fi->fib_flags = cfg->fc_flags;
713 fi->fib_priority = cfg->fc_priority;
714 fi->fib_prefsrc = cfg->fc_prefsrc;
718 715
719 fi->fib_nhs = nhs; 716 fi->fib_nhs = nhs;
720 change_nexthops(fi) { 717 change_nexthops(fi) {
721 nh->nh_parent = fi; 718 nh->nh_parent = fi;
722 } endfor_nexthops(fi) 719 } endfor_nexthops(fi)
723 720
724 fi->fib_flags = r->rtm_flags; 721 if (cfg->fc_mx) {
725 if (rta->rta_priority) 722 struct nlattr *nla;
726 fi->fib_priority = *rta->rta_priority; 723 int remaining;
727 if (rta->rta_mx) { 724
728 int attrlen = RTA_PAYLOAD(rta->rta_mx); 725 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
729 struct rtattr *attr = RTA_DATA(rta->rta_mx); 726 int type = nla->nla_type;
730 727
731 while (RTA_OK(attr, attrlen)) { 728 if (type) {
732 unsigned flavor = attr->rta_type; 729 if (type > RTAX_MAX)
733 if (flavor) {
734 if (flavor > RTAX_MAX)
735 goto err_inval; 730 goto err_inval;
736 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); 731 fi->fib_metrics[type - 1] = nla_get_u32(nla);
737 } 732 }
738 attr = RTA_NEXT(attr, attrlen);
739 } 733 }
740 } 734 }
741 if (rta->rta_prefsrc)
742 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
743 735
744 if (rta->rta_mp) { 736 if (cfg->fc_mp) {
745#ifdef CONFIG_IP_ROUTE_MULTIPATH 737#ifdef CONFIG_IP_ROUTE_MULTIPATH
746 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) 738 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
739 if (err != 0)
747 goto failure; 740 goto failure;
748 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) 741 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
749 goto err_inval; 742 goto err_inval;
750 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) 743 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
751 goto err_inval; 744 goto err_inval;
752#ifdef CONFIG_NET_CLS_ROUTE 745#ifdef CONFIG_NET_CLS_ROUTE
753 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) 746 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
754 goto err_inval; 747 goto err_inval;
755#endif 748#endif
756#else 749#else
@@ -758,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
758#endif 751#endif
759 } else { 752 } else {
760 struct fib_nh *nh = fi->fib_nh; 753 struct fib_nh *nh = fi->fib_nh;
761 if (rta->rta_oif) 754
762 nh->nh_oif = *rta->rta_oif; 755 nh->nh_oif = cfg->fc_oif;
763 if (rta->rta_gw) 756 nh->nh_gw = cfg->fc_gw;
764 memcpy(&nh->nh_gw, rta->rta_gw, 4); 757 nh->nh_flags = cfg->fc_flags;
765#ifdef CONFIG_NET_CLS_ROUTE 758#ifdef CONFIG_NET_CLS_ROUTE
766 if (rta->rta_flow) 759 nh->nh_tclassid = cfg->fc_flow;
767 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
768#endif 760#endif
769 nh->nh_flags = r->rtm_flags;
770#ifdef CONFIG_IP_ROUTE_MULTIPATH 761#ifdef CONFIG_IP_ROUTE_MULTIPATH
771 nh->nh_weight = 1; 762 nh->nh_weight = 1;
772#endif 763#endif
773 } 764 }
774 765
775#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 766#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
776 fi->fib_mp_alg = mp_alg; 767 fi->fib_mp_alg = cfg->fc_mp_alg;
777#endif 768#endif
778 769
779 if (fib_props[r->rtm_type].error) { 770 if (fib_props[cfg->fc_type].error) {
780 if (rta->rta_gw || rta->rta_oif || rta->rta_mp) 771 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
781 goto err_inval; 772 goto err_inval;
782 goto link_it; 773 goto link_it;
783 } 774 }
784 775
785 if (r->rtm_scope > RT_SCOPE_HOST) 776 if (cfg->fc_scope > RT_SCOPE_HOST)
786 goto err_inval; 777 goto err_inval;
787 778
788 if (r->rtm_scope == RT_SCOPE_HOST) { 779 if (cfg->fc_scope == RT_SCOPE_HOST) {
789 struct fib_nh *nh = fi->fib_nh; 780 struct fib_nh *nh = fi->fib_nh;
790 781
791 /* Local address is added. */ 782 /* Local address is added. */
@@ -798,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
798 goto failure; 789 goto failure;
799 } else { 790 } else {
800 change_nexthops(fi) { 791 change_nexthops(fi) {
801 if ((err = fib_check_nh(r, fi, nh)) != 0) 792 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
802 goto failure; 793 goto failure;
803 } endfor_nexthops(fi) 794 } endfor_nexthops(fi)
804 } 795 }
805 796
806 if (fi->fib_prefsrc) { 797 if (fi->fib_prefsrc) {
807 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || 798 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
808 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) 799 fi->fib_prefsrc != cfg->fc_dst)
809 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 800 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
810 goto err_inval; 801 goto err_inval;
811 } 802 }
@@ -820,7 +811,7 @@ link_it:
820 811
821 fi->fib_treeref++; 812 fi->fib_treeref++;
822 atomic_inc(&fi->fib_clntref); 813 atomic_inc(&fi->fib_clntref);
823 write_lock_bh(&fib_info_lock); 814 spin_lock_bh(&fib_info_lock);
824 hlist_add_head(&fi->fib_hash, 815 hlist_add_head(&fi->fib_hash,
825 &fib_info_hash[fib_info_hashfn(fi)]); 816 &fib_info_hash[fib_info_hashfn(fi)]);
826 if (fi->fib_prefsrc) { 817 if (fi->fib_prefsrc) {
@@ -839,19 +830,19 @@ link_it:
839 head = &fib_info_devhash[hash]; 830 head = &fib_info_devhash[hash];
840 hlist_add_head(&nh->nh_hash, head); 831 hlist_add_head(&nh->nh_hash, head);
841 } endfor_nexthops(fi) 832 } endfor_nexthops(fi)
842 write_unlock_bh(&fib_info_lock); 833 spin_unlock_bh(&fib_info_lock);
843 return fi; 834 return fi;
844 835
845err_inval: 836err_inval:
846 err = -EINVAL; 837 err = -EINVAL;
847 838
848failure: 839failure:
849 *errp = err;
850 if (fi) { 840 if (fi) {
851 fi->fib_dead = 1; 841 fi->fib_dead = 1;
852 free_fib_info(fi); 842 free_fib_info(fi);
853 } 843 }
854 return NULL; 844
845 return ERR_PTR(err);
855} 846}
856 847
857/* Note! fib_semantic_match intentionally uses RCU list functions. */ 848/* Note! fib_semantic_match intentionally uses RCU list functions. */
@@ -937,224 +928,89 @@ u32 __fib_res_prefsrc(struct fib_result *res)
937 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 928 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
938} 929}
939 930
940int 931int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
941fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 932 u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos,
942 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, 933 struct fib_info *fi, unsigned int flags)
943 struct fib_info *fi, unsigned int flags)
944{ 934{
935 struct nlmsghdr *nlh;
945 struct rtmsg *rtm; 936 struct rtmsg *rtm;
946 struct nlmsghdr *nlh;
947 unsigned char *b = skb->tail;
948 937
949 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); 938 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
950 rtm = NLMSG_DATA(nlh); 939 if (nlh == NULL)
940 return -ENOBUFS;
941
942 rtm = nlmsg_data(nlh);
951 rtm->rtm_family = AF_INET; 943 rtm->rtm_family = AF_INET;
952 rtm->rtm_dst_len = dst_len; 944 rtm->rtm_dst_len = dst_len;
953 rtm->rtm_src_len = 0; 945 rtm->rtm_src_len = 0;
954 rtm->rtm_tos = tos; 946 rtm->rtm_tos = tos;
955 rtm->rtm_table = tb_id; 947 rtm->rtm_table = tb_id;
948 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
956 rtm->rtm_type = type; 949 rtm->rtm_type = type;
957 rtm->rtm_flags = fi->fib_flags; 950 rtm->rtm_flags = fi->fib_flags;
958 rtm->rtm_scope = scope; 951 rtm->rtm_scope = scope;
959 if (rtm->rtm_dst_len)
960 RTA_PUT(skb, RTA_DST, 4, dst);
961 rtm->rtm_protocol = fi->fib_protocol; 952 rtm->rtm_protocol = fi->fib_protocol;
953
954 if (rtm->rtm_dst_len)
955 NLA_PUT_U32(skb, RTA_DST, dst);
956
962 if (fi->fib_priority) 957 if (fi->fib_priority)
963 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); 958 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
959
964 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 960 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
965 goto rtattr_failure; 961 goto nla_put_failure;
962
966 if (fi->fib_prefsrc) 963 if (fi->fib_prefsrc)
967 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); 964 NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc);
965
968 if (fi->fib_nhs == 1) { 966 if (fi->fib_nhs == 1) {
969 if (fi->fib_nh->nh_gw) 967 if (fi->fib_nh->nh_gw)
970 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); 968 NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
969
971 if (fi->fib_nh->nh_oif) 970 if (fi->fib_nh->nh_oif)
972 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); 971 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
973#ifdef CONFIG_NET_CLS_ROUTE 972#ifdef CONFIG_NET_CLS_ROUTE
974 if (fi->fib_nh[0].nh_tclassid) 973 if (fi->fib_nh[0].nh_tclassid)
975 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); 974 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
976#endif 975#endif
977 } 976 }
978#ifdef CONFIG_IP_ROUTE_MULTIPATH 977#ifdef CONFIG_IP_ROUTE_MULTIPATH
979 if (fi->fib_nhs > 1) { 978 if (fi->fib_nhs > 1) {
980 struct rtnexthop *nhp; 979 struct rtnexthop *rtnh;
981 struct rtattr *mp_head; 980 struct nlattr *mp;
982 if (skb_tailroom(skb) <= RTA_SPACE(0)) 981
983 goto rtattr_failure; 982 mp = nla_nest_start(skb, RTA_MULTIPATH);
984 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); 983 if (mp == NULL)
984 goto nla_put_failure;
985 985
986 for_nexthops(fi) { 986 for_nexthops(fi) {
987 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 987 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
988 goto rtattr_failure; 988 if (rtnh == NULL)
989 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 989 goto nla_put_failure;
990 nhp->rtnh_flags = nh->nh_flags & 0xFF; 990
991 nhp->rtnh_hops = nh->nh_weight-1; 991 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
992 nhp->rtnh_ifindex = nh->nh_oif; 992 rtnh->rtnh_hops = nh->nh_weight - 1;
993 rtnh->rtnh_ifindex = nh->nh_oif;
994
993 if (nh->nh_gw) 995 if (nh->nh_gw)
994 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); 996 NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw);
995#ifdef CONFIG_NET_CLS_ROUTE 997#ifdef CONFIG_NET_CLS_ROUTE
996 if (nh->nh_tclassid) 998 if (nh->nh_tclassid)
997 RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid); 999 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
998#endif 1000#endif
999 nhp->rtnh_len = skb->tail - (unsigned char*)nhp; 1001 /* length of rtnetlink header + attributes */
1002 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1000 } endfor_nexthops(fi); 1003 } endfor_nexthops(fi);
1001 mp_head->rta_type = RTA_MULTIPATH;
1002 mp_head->rta_len = skb->tail - (u8*)mp_head;
1003 }
1004#endif
1005 nlh->nlmsg_len = skb->tail - b;
1006 return skb->len;
1007
1008nlmsg_failure:
1009rtattr_failure:
1010 skb_trim(skb, b - skb->data);
1011 return -1;
1012}
1013
1014#ifndef CONFIG_IP_NOSIOCRT
1015
1016int
1017fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1018 struct kern_rta *rta, struct rtentry *r)
1019{
1020 int plen;
1021 u32 *ptr;
1022
1023 memset(rtm, 0, sizeof(*rtm));
1024 memset(rta, 0, sizeof(*rta));
1025
1026 if (r->rt_dst.sa_family != AF_INET)
1027 return -EAFNOSUPPORT;
1028
1029 /* Check mask for validity:
1030 a) it must be contiguous.
1031 b) destination must have all host bits clear.
1032 c) if application forgot to set correct family (AF_INET),
1033 reject request unless it is absolutely clear i.e.
1034 both family and mask are zero.
1035 */
1036 plen = 32;
1037 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1038 if (!(r->rt_flags&RTF_HOST)) {
1039 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1040 if (r->rt_genmask.sa_family != AF_INET) {
1041 if (mask || r->rt_genmask.sa_family)
1042 return -EAFNOSUPPORT;
1043 }
1044 if (bad_mask(mask, *ptr))
1045 return -EINVAL;
1046 plen = inet_mask_len(mask);
1047 }
1048
1049 nl->nlmsg_flags = NLM_F_REQUEST;
1050 nl->nlmsg_pid = 0;
1051 nl->nlmsg_seq = 0;
1052 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1053 if (cmd == SIOCDELRT) {
1054 nl->nlmsg_type = RTM_DELROUTE;
1055 nl->nlmsg_flags = 0;
1056 } else {
1057 nl->nlmsg_type = RTM_NEWROUTE;
1058 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1059 rtm->rtm_protocol = RTPROT_BOOT;
1060 }
1061
1062 rtm->rtm_dst_len = plen;
1063 rta->rta_dst = ptr;
1064
1065 if (r->rt_metric) {
1066 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1067 rta->rta_priority = (u32*)&r->rt_pad3;
1068 }
1069 if (r->rt_flags&RTF_REJECT) {
1070 rtm->rtm_scope = RT_SCOPE_HOST;
1071 rtm->rtm_type = RTN_UNREACHABLE;
1072 return 0;
1073 }
1074 rtm->rtm_scope = RT_SCOPE_NOWHERE;
1075 rtm->rtm_type = RTN_UNICAST;
1076
1077 if (r->rt_dev) {
1078 char *colon;
1079 struct net_device *dev;
1080 char devname[IFNAMSIZ];
1081
1082 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1083 return -EFAULT;
1084 devname[IFNAMSIZ-1] = 0;
1085 colon = strchr(devname, ':');
1086 if (colon)
1087 *colon = 0;
1088 dev = __dev_get_by_name(devname);
1089 if (!dev)
1090 return -ENODEV;
1091 rta->rta_oif = &dev->ifindex;
1092 if (colon) {
1093 struct in_ifaddr *ifa;
1094 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1095 if (!in_dev)
1096 return -ENODEV;
1097 *colon = ':';
1098 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1099 if (strcmp(ifa->ifa_label, devname) == 0)
1100 break;
1101 if (ifa == NULL)
1102 return -ENODEV;
1103 rta->rta_prefsrc = &ifa->ifa_local;
1104 }
1105 }
1106 1004
1107 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; 1005 nla_nest_end(skb, mp);
1108 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1109 rta->rta_gw = ptr;
1110 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1111 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1112 } 1006 }
1007#endif
1008 return nlmsg_end(skb, nlh);
1113 1009
1114 if (cmd == SIOCDELRT) 1010nla_put_failure:
1115 return 0; 1011 return nlmsg_cancel(skb, nlh);
1116
1117 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1118 return -EINVAL;
1119
1120 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1121 rtm->rtm_scope = RT_SCOPE_LINK;
1122
1123 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1124 struct rtattr *rec;
1125 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1126 if (mx == NULL)
1127 return -ENOMEM;
1128 rta->rta_mx = mx;
1129 mx->rta_type = RTA_METRICS;
1130 mx->rta_len = RTA_LENGTH(0);
1131 if (r->rt_flags&RTF_MTU) {
1132 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1133 rec->rta_type = RTAX_ADVMSS;
1134 rec->rta_len = RTA_LENGTH(4);
1135 mx->rta_len += RTA_LENGTH(4);
1136 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1137 }
1138 if (r->rt_flags&RTF_WINDOW) {
1139 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1140 rec->rta_type = RTAX_WINDOW;
1141 rec->rta_len = RTA_LENGTH(4);
1142 mx->rta_len += RTA_LENGTH(4);
1143 *(u32*)RTA_DATA(rec) = r->rt_window;
1144 }
1145 if (r->rt_flags&RTF_IRTT) {
1146 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1147 rec->rta_type = RTAX_RTT;
1148 rec->rta_len = RTA_LENGTH(4);
1149 mx->rta_len += RTA_LENGTH(4);
1150 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1151 }
1152 }
1153 return 0;
1154} 1012}
1155 1013
1156#endif
1157
1158/* 1014/*
1159 Update FIB if: 1015 Update FIB if:
1160 - local address disappeared -> we must delete all the entries 1016 - local address disappeared -> we must delete all the entries
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 01801c0f885d..9c3ff6ba6e21 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1124,17 +1124,14 @@ err:
1124 return fa_head; 1124 return fa_head;
1125} 1125}
1126 1126
1127static int 1127static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1128fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1129 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1130{ 1128{
1131 struct trie *t = (struct trie *) tb->tb_data; 1129 struct trie *t = (struct trie *) tb->tb_data;
1132 struct fib_alias *fa, *new_fa; 1130 struct fib_alias *fa, *new_fa;
1133 struct list_head *fa_head = NULL; 1131 struct list_head *fa_head = NULL;
1134 struct fib_info *fi; 1132 struct fib_info *fi;
1135 int plen = r->rtm_dst_len; 1133 int plen = cfg->fc_dst_len;
1136 int type = r->rtm_type; 1134 u8 tos = cfg->fc_tos;
1137 u8 tos = r->rtm_tos;
1138 u32 key, mask; 1135 u32 key, mask;
1139 int err; 1136 int err;
1140 struct leaf *l; 1137 struct leaf *l;
@@ -1142,13 +1139,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1142 if (plen > 32) 1139 if (plen > 32)
1143 return -EINVAL; 1140 return -EINVAL;
1144 1141
1145 key = 0; 1142 key = ntohl(cfg->fc_dst);
1146 if (rta->rta_dst)
1147 memcpy(&key, rta->rta_dst, 4);
1148
1149 key = ntohl(key);
1150 1143
1151 pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); 1144 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1152 1145
1153 mask = ntohl(inet_make_mask(plen)); 1146 mask = ntohl(inet_make_mask(plen));
1154 1147
@@ -1157,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1157 1150
1158 key = key & mask; 1151 key = key & mask;
1159 1152
1160 fi = fib_create_info(r, rta, nlhdr, &err); 1153 fi = fib_create_info(cfg);
1161 1154 if (IS_ERR(fi)) {
1162 if (!fi) 1155 err = PTR_ERR(fi);
1163 goto err; 1156 goto err;
1157 }
1164 1158
1165 l = fib_find_node(t, key); 1159 l = fib_find_node(t, key);
1166 fa = NULL; 1160 fa = NULL;
@@ -1185,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1185 struct fib_alias *fa_orig; 1179 struct fib_alias *fa_orig;
1186 1180
1187 err = -EEXIST; 1181 err = -EEXIST;
1188 if (nlhdr->nlmsg_flags & NLM_F_EXCL) 1182 if (cfg->fc_nlflags & NLM_F_EXCL)
1189 goto out; 1183 goto out;
1190 1184
1191 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { 1185 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1192 struct fib_info *fi_drop; 1186 struct fib_info *fi_drop;
1193 u8 state; 1187 u8 state;
1194 1188
@@ -1200,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1200 fi_drop = fa->fa_info; 1194 fi_drop = fa->fa_info;
1201 new_fa->fa_tos = fa->fa_tos; 1195 new_fa->fa_tos = fa->fa_tos;
1202 new_fa->fa_info = fi; 1196 new_fa->fa_info = fi;
1203 new_fa->fa_type = type; 1197 new_fa->fa_type = cfg->fc_type;
1204 new_fa->fa_scope = r->rtm_scope; 1198 new_fa->fa_scope = cfg->fc_scope;
1205 state = fa->fa_state; 1199 state = fa->fa_state;
1206 new_fa->fa_state &= ~FA_S_ACCESSED; 1200 new_fa->fa_state &= ~FA_S_ACCESSED;
1207 1201
@@ -1224,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1224 break; 1218 break;
1225 if (fa->fa_info->fib_priority != fi->fib_priority) 1219 if (fa->fa_info->fib_priority != fi->fib_priority)
1226 break; 1220 break;
1227 if (fa->fa_type == type && 1221 if (fa->fa_type == cfg->fc_type &&
1228 fa->fa_scope == r->rtm_scope && 1222 fa->fa_scope == cfg->fc_scope &&
1229 fa->fa_info == fi) { 1223 fa->fa_info == fi) {
1230 goto out; 1224 goto out;
1231 } 1225 }
1232 } 1226 }
1233 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) 1227 if (!(cfg->fc_nlflags & NLM_F_APPEND))
1234 fa = fa_orig; 1228 fa = fa_orig;
1235 } 1229 }
1236 err = -ENOENT; 1230 err = -ENOENT;
1237 if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) 1231 if (!(cfg->fc_nlflags & NLM_F_CREATE))
1238 goto out; 1232 goto out;
1239 1233
1240 err = -ENOBUFS; 1234 err = -ENOBUFS;
@@ -1244,8 +1238,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1244 1238
1245 new_fa->fa_info = fi; 1239 new_fa->fa_info = fi;
1246 new_fa->fa_tos = tos; 1240 new_fa->fa_tos = tos;
1247 new_fa->fa_type = type; 1241 new_fa->fa_type = cfg->fc_type;
1248 new_fa->fa_scope = r->rtm_scope; 1242 new_fa->fa_scope = cfg->fc_scope;
1249 new_fa->fa_state = 0; 1243 new_fa->fa_state = 0;
1250 /* 1244 /*
1251 * Insert new entry to the list. 1245 * Insert new entry to the list.
@@ -1262,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1262 (fa ? &fa->fa_list : fa_head)); 1256 (fa ? &fa->fa_list : fa_head));
1263 1257
1264 rt_cache_flush(-1); 1258 rt_cache_flush(-1);
1265 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); 1259 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
1260 &cfg->fc_nlinfo);
1266succeeded: 1261succeeded:
1267 return 0; 1262 return 0;
1268 1263
@@ -1548,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key)
1548 return 1; 1543 return 1;
1549} 1544}
1550 1545
1551static int 1546static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
1552fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1553 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1554{ 1547{
1555 struct trie *t = (struct trie *) tb->tb_data; 1548 struct trie *t = (struct trie *) tb->tb_data;
1556 u32 key, mask; 1549 u32 key, mask;
1557 int plen = r->rtm_dst_len; 1550 int plen = cfg->fc_dst_len;
1558 u8 tos = r->rtm_tos; 1551 u8 tos = cfg->fc_tos;
1559 struct fib_alias *fa, *fa_to_delete; 1552 struct fib_alias *fa, *fa_to_delete;
1560 struct list_head *fa_head; 1553 struct list_head *fa_head;
1561 struct leaf *l; 1554 struct leaf *l;
1562 struct leaf_info *li; 1555 struct leaf_info *li;
1563 1556
1564
1565 if (plen > 32) 1557 if (plen > 32)
1566 return -EINVAL; 1558 return -EINVAL;
1567 1559
1568 key = 0; 1560 key = ntohl(cfg->fc_dst);
1569 if (rta->rta_dst)
1570 memcpy(&key, rta->rta_dst, 4);
1571
1572 key = ntohl(key);
1573 mask = ntohl(inet_make_mask(plen)); 1561 mask = ntohl(inet_make_mask(plen));
1574 1562
1575 if (key & ~mask) 1563 if (key & ~mask)
@@ -1598,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1598 if (fa->fa_tos != tos) 1586 if (fa->fa_tos != tos)
1599 break; 1587 break;
1600 1588
1601 if ((!r->rtm_type || 1589 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1602 fa->fa_type == r->rtm_type) && 1590 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1603 (r->rtm_scope == RT_SCOPE_NOWHERE || 1591 fa->fa_scope == cfg->fc_scope) &&
1604 fa->fa_scope == r->rtm_scope) && 1592 (!cfg->fc_protocol ||
1605 (!r->rtm_protocol || 1593 fi->fib_protocol == cfg->fc_protocol) &&
1606 fi->fib_protocol == r->rtm_protocol) && 1594 fib_nh_match(cfg, fi) == 0) {
1607 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1608 fa_to_delete = fa; 1595 fa_to_delete = fa;
1609 break; 1596 break;
1610 } 1597 }
@@ -1614,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1614 return -ESRCH; 1601 return -ESRCH;
1615 1602
1616 fa = fa_to_delete; 1603 fa = fa_to_delete;
1617 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); 1604 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
1605 &cfg->fc_nlinfo);
1618 1606
1619 l = fib_find_node(t, key); 1607 l = fib_find_node(t, key);
1620 li = find_leaf_info(l, plen); 1608 li = find_leaf_info(l, plen);
@@ -1848,7 +1836,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1848 1836
1849 u32 xkey = htonl(key); 1837 u32 xkey = htonl(key);
1850 1838
1851 s_i = cb->args[3]; 1839 s_i = cb->args[4];
1852 i = 0; 1840 i = 0;
1853 1841
1854 /* rcu_read_lock is hold by caller */ 1842 /* rcu_read_lock is hold by caller */
@@ -1866,16 +1854,16 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
1866 tb->tb_id, 1854 tb->tb_id,
1867 fa->fa_type, 1855 fa->fa_type,
1868 fa->fa_scope, 1856 fa->fa_scope,
1869 &xkey, 1857 xkey,
1870 plen, 1858 plen,
1871 fa->fa_tos, 1859 fa->fa_tos,
1872 fa->fa_info, 0) < 0) { 1860 fa->fa_info, 0) < 0) {
1873 cb->args[3] = i; 1861 cb->args[4] = i;
1874 return -1; 1862 return -1;
1875 } 1863 }
1876 i++; 1864 i++;
1877 } 1865 }
1878 cb->args[3] = i; 1866 cb->args[4] = i;
1879 return skb->len; 1867 return skb->len;
1880} 1868}
1881 1869
@@ -1886,14 +1874,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
1886 struct list_head *fa_head; 1874 struct list_head *fa_head;
1887 struct leaf *l = NULL; 1875 struct leaf *l = NULL;
1888 1876
1889 s_h = cb->args[2]; 1877 s_h = cb->args[3];
1890 1878
1891 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { 1879 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
1892 if (h < s_h) 1880 if (h < s_h)
1893 continue; 1881 continue;
1894 if (h > s_h) 1882 if (h > s_h)
1895 memset(&cb->args[3], 0, 1883 memset(&cb->args[4], 0,
1896 sizeof(cb->args) - 3*sizeof(cb->args[0])); 1884 sizeof(cb->args) - 4*sizeof(cb->args[0]));
1897 1885
1898 fa_head = get_fa_head(l, plen); 1886 fa_head = get_fa_head(l, plen);
1899 1887
@@ -1904,11 +1892,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str
1904 continue; 1892 continue;
1905 1893
1906 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { 1894 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1907 cb->args[2] = h; 1895 cb->args[3] = h;
1908 return -1; 1896 return -1;
1909 } 1897 }
1910 } 1898 }
1911 cb->args[2] = h; 1899 cb->args[3] = h;
1912 return skb->len; 1900 return skb->len;
1913} 1901}
1914 1902
@@ -1917,23 +1905,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
1917 int m, s_m; 1905 int m, s_m;
1918 struct trie *t = (struct trie *) tb->tb_data; 1906 struct trie *t = (struct trie *) tb->tb_data;
1919 1907
1920 s_m = cb->args[1]; 1908 s_m = cb->args[2];
1921 1909
1922 rcu_read_lock(); 1910 rcu_read_lock();
1923 for (m = 0; m <= 32; m++) { 1911 for (m = 0; m <= 32; m++) {
1924 if (m < s_m) 1912 if (m < s_m)
1925 continue; 1913 continue;
1926 if (m > s_m) 1914 if (m > s_m)
1927 memset(&cb->args[2], 0, 1915 memset(&cb->args[3], 0,
1928 sizeof(cb->args) - 2*sizeof(cb->args[0])); 1916 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1929 1917
1930 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { 1918 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1931 cb->args[1] = m; 1919 cb->args[2] = m;
1932 goto out; 1920 goto out;
1933 } 1921 }
1934 } 1922 }
1935 rcu_read_unlock(); 1923 rcu_read_unlock();
1936 cb->args[1] = m; 1924 cb->args[2] = m;
1937 return skb->len; 1925 return skb->len;
1938out: 1926out:
1939 rcu_read_unlock(); 1927 rcu_read_unlock();
@@ -1943,9 +1931,9 @@ out:
1943/* Fix more generic FIB names for init later */ 1931/* Fix more generic FIB names for init later */
1944 1932
1945#ifdef CONFIG_IP_MULTIPLE_TABLES 1933#ifdef CONFIG_IP_MULTIPLE_TABLES
1946struct fib_table * fib_hash_init(int id) 1934struct fib_table * fib_hash_init(u32 id)
1947#else 1935#else
1948struct fib_table * __init fib_hash_init(int id) 1936struct fib_table * __init fib_hash_init(u32 id)
1949#endif 1937#endif
1950{ 1938{
1951 struct fib_table *tb; 1939 struct fib_table *tb;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4c86ac3d882d..c2ad07e48ab4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = {
187}; 187};
188 188
189/* Control parameters for ECHO replies. */ 189/* Control parameters for ECHO replies. */
190int sysctl_icmp_echo_ignore_all; 190int sysctl_icmp_echo_ignore_all __read_mostly;
191int sysctl_icmp_echo_ignore_broadcasts = 1; 191int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
192 192
193/* Control parameter - ignore bogus broadcast responses? */ 193/* Control parameter - ignore bogus broadcast responses? */
194int sysctl_icmp_ignore_bogus_error_responses = 1; 194int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
195 195
196/* 196/*
197 * Configurable global rate limit. 197 * Configurable global rate limit.
@@ -205,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses = 1;
205 * time exceeded (11), parameter problem (12) 205 * time exceeded (11), parameter problem (12)
206 */ 206 */
207 207
208int sysctl_icmp_ratelimit = 1 * HZ; 208int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
209int sysctl_icmp_ratemask = 0x1818; 209int sysctl_icmp_ratemask __read_mostly = 0x1818;
210int sysctl_icmp_errors_use_inbound_ifaddr; 210int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
211 211
212/* 212/*
213 * ICMP control array. This specifies what to do with each ICMP. 213 * ICMP control array. This specifies what to do with each ICMP.
@@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
406 .saddr = rt->rt_spec_dst, 406 .saddr = rt->rt_spec_dst,
407 .tos = RT_TOS(skb->nh.iph->tos) } }, 407 .tos = RT_TOS(skb->nh.iph->tos) } },
408 .proto = IPPROTO_ICMP }; 408 .proto = IPPROTO_ICMP };
409 security_skb_classify_flow(skb, &fl);
409 if (ip_route_output_key(&rt, &fl)) 410 if (ip_route_output_key(&rt, &fl))
410 goto out_unlock; 411 goto out_unlock;
411 } 412 }
@@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
560 } 561 }
561 } 562 }
562 }; 563 };
564 security_skb_classify_flow(skb_in, &fl);
563 if (ip_route_output_key(&rt, &fl)) 565 if (ip_route_output_key(&rt, &fl))
564 goto out_unlock; 566 goto out_unlock;
565 } 567 }
@@ -928,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb)
928 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); 930 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
929 931
930 switch (skb->ip_summed) { 932 switch (skb->ip_summed) {
931 case CHECKSUM_HW: 933 case CHECKSUM_COMPLETE:
932 if (!(u16)csum_fold(skb->csum)) 934 if (!(u16)csum_fold(skb->csum))
933 break; 935 break;
934 /* fall through */ 936 /* fall through */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8e8117c19e4d..58be8227b0cb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb)
931 goto drop; 931 goto drop;
932 932
933 switch (skb->ip_summed) { 933 switch (skb->ip_summed) {
934 case CHECKSUM_HW: 934 case CHECKSUM_COMPLETE:
935 if (!(u16)csum_fold(skb->csum)) 935 if (!(u16)csum_fold(skb->csum))
936 break; 936 break;
937 /* fall through */ 937 /* fall through */
@@ -1397,8 +1397,8 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
1397/* 1397/*
1398 * Join a socket to a group 1398 * Join a socket to a group
1399 */ 1399 */
1400int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS; 1400int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
1401int sysctl_igmp_max_msf = IP_MAX_MSF; 1401int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
1402 1402
1403 1403
1404static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, 1404static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e50a1bfd7ccc..07204391d083 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
327 { .sport = inet_sk(sk)->sport, 327 { .sport = inet_sk(sk)->sport,
328 .dport = ireq->rmt_port } } }; 328 .dport = ireq->rmt_port } } };
329 329
330 security_req_classify_flow(req, &fl);
330 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 331 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
331 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 332 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
332 return NULL; 333 return NULL;
@@ -509,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
509 510
510 /* Deinitialize accept_queue to trap illegal accesses. */ 511 /* Deinitialize accept_queue to trap illegal accesses. */
511 memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); 512 memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
513
514 security_inet_csk_clone(newsk, req);
512 } 515 }
513 return newsk; 516 return newsk;
514} 517}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 95fac5532994..fb296c9a7f3f 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -124,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock);
124 * remote address for the connection. So always assume those are both 124 * remote address for the connection. So always assume those are both
125 * wildcarded during the search since they can never be otherwise. 125 * wildcarded during the search since they can never be otherwise.
126 */ 126 */
127struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, 127static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
128 const unsigned short hnum, const int dif) 128 const u32 daddr,
129 const unsigned short hnum,
130 const int dif)
129{ 131{
130 struct sock *result = NULL, *sk; 132 struct sock *result = NULL, *sk;
131 const struct hlist_node *node; 133 const struct hlist_node *node;
@@ -159,6 +161,33 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
159 return result; 161 return result;
160} 162}
161 163
164/* Optimize the common listener case. */
165struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
166 const u32 daddr, const unsigned short hnum,
167 const int dif)
168{
169 struct sock *sk = NULL;
170 const struct hlist_head *head;
171
172 read_lock(&hashinfo->lhash_lock);
173 head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
174 if (!hlist_empty(head)) {
175 const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
176
177 if (inet->num == hnum && !sk->sk_node.next &&
178 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
179 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
180 !sk->sk_bound_dev_if)
181 goto sherry_cache;
182 sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
183 }
184 if (sk) {
185sherry_cache:
186 sock_hold(sk);
187 }
188 read_unlock(&hashinfo->lhash_lock);
189 return sk;
190}
162EXPORT_SYMBOL_GPL(__inet_lookup_listener); 191EXPORT_SYMBOL_GPL(__inet_lookup_listener);
163 192
164/* called with local bh disabled */ 193/* called with local bh disabled */
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 03ff62ebcfeb..a675602ef295 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -126,12 +126,9 @@ void __init inet_initpeers(void)
126 126
127 peer_cachep = kmem_cache_create("inet_peer_cache", 127 peer_cachep = kmem_cache_create("inet_peer_cache",
128 sizeof(struct inet_peer), 128 sizeof(struct inet_peer),
129 0, SLAB_HWCACHE_ALIGN, 129 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
130 NULL, NULL); 130 NULL, NULL);
131 131
132 if (!peer_cachep)
133 panic("cannot create inet_peer_cache");
134
135 /* All the timers, started at system startup tend 132 /* All the timers, started at system startup tend
136 to synchronize. Perturb it a bit. 133 to synchronize. Perturb it a bit.
137 */ 134 */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b84b53a47526..165d72859ddf 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -54,15 +54,15 @@
54 * even the most extreme cases without allowing an attacker to measurably 54 * even the most extreme cases without allowing an attacker to measurably
55 * harm machine performance. 55 * harm machine performance.
56 */ 56 */
57int sysctl_ipfrag_high_thresh = 256*1024; 57int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
58int sysctl_ipfrag_low_thresh = 192*1024; 58int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
59 59
60int sysctl_ipfrag_max_dist = 64; 60int sysctl_ipfrag_max_dist __read_mostly = 64;
61 61
62/* Important NOTE! Fragment queue must be destroyed before MSL expires. 62/* Important NOTE! Fragment queue must be destroyed before MSL expires.
63 * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. 63 * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
64 */ 64 */
65int sysctl_ipfrag_time = IP_FRAG_TIME; 65int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
66 66
67struct ipfrag_skb_cb 67struct ipfrag_skb_cb
68{ 68{
@@ -130,7 +130,7 @@ static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
130} 130}
131 131
132static struct timer_list ipfrag_secret_timer; 132static struct timer_list ipfrag_secret_timer;
133int sysctl_ipfrag_secret_interval = 10 * 60 * HZ; 133int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
134 134
135static void ipfrag_secret_rebuild(unsigned long dummy) 135static void ipfrag_secret_rebuild(unsigned long dummy)
136{ 136{
@@ -665,7 +665,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
665 head->len += fp->len; 665 head->len += fp->len;
666 if (head->ip_summed != fp->ip_summed) 666 if (head->ip_summed != fp->ip_summed)
667 head->ip_summed = CHECKSUM_NONE; 667 head->ip_summed = CHECKSUM_NONE;
668 else if (head->ip_summed == CHECKSUM_HW) 668 else if (head->ip_summed == CHECKSUM_COMPLETE)
669 head->csum = csum_add(head->csum, fp->csum); 669 head->csum = csum_add(head->csum, fp->csum);
670 head->truesize += fp->truesize; 670 head->truesize += fp->truesize;
671 atomic_sub(fp->truesize, &ip_frag_mem); 671 atomic_sub(fp->truesize, &ip_frag_mem);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0f9b3a31997b..f5fba051df3d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -393,7 +393,8 @@ out:
393 int code = skb->h.icmph->code; 393 int code = skb->h.icmph->code;
394 int rel_type = 0; 394 int rel_type = 0;
395 int rel_code = 0; 395 int rel_code = 0;
396 int rel_info = 0; 396 __be32 rel_info = 0;
397 __u32 n = 0;
397 u16 flags; 398 u16 flags;
398 int grehlen = (iph->ihl<<2) + 4; 399 int grehlen = (iph->ihl<<2) + 4;
399 struct sk_buff *skb2; 400 struct sk_buff *skb2;
@@ -422,14 +423,16 @@ out:
422 default: 423 default:
423 return; 424 return;
424 case ICMP_PARAMETERPROB: 425 case ICMP_PARAMETERPROB:
425 if (skb->h.icmph->un.gateway < (iph->ihl<<2)) 426 n = ntohl(skb->h.icmph->un.gateway) >> 24;
427 if (n < (iph->ihl<<2))
426 return; 428 return;
427 429
428 /* So... This guy found something strange INSIDE encapsulated 430 /* So... This guy found something strange INSIDE encapsulated
429 packet. Well, he is fool, but what can we do ? 431 packet. Well, he is fool, but what can we do ?
430 */ 432 */
431 rel_type = ICMP_PARAMETERPROB; 433 rel_type = ICMP_PARAMETERPROB;
432 rel_info = skb->h.icmph->un.gateway - grehlen; 434 n -= grehlen;
435 rel_info = htonl(n << 24);
433 break; 436 break;
434 437
435 case ICMP_DEST_UNREACH: 438 case ICMP_DEST_UNREACH:
@@ -440,13 +443,14 @@ out:
440 return; 443 return;
441 case ICMP_FRAG_NEEDED: 444 case ICMP_FRAG_NEEDED:
442 /* And it is the only really necessary thing :-) */ 445 /* And it is the only really necessary thing :-) */
443 rel_info = ntohs(skb->h.icmph->un.frag.mtu); 446 n = ntohs(skb->h.icmph->un.frag.mtu);
444 if (rel_info < grehlen+68) 447 if (n < grehlen+68)
445 return; 448 return;
446 rel_info -= grehlen; 449 n -= grehlen;
447 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ 450 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
448 if (rel_info > ntohs(eiph->tot_len)) 451 if (n > ntohs(eiph->tot_len))
449 return; 452 return;
453 rel_info = htonl(n);
450 break; 454 break;
451 default: 455 default:
452 /* All others are translated to HOST_UNREACH. 456 /* All others are translated to HOST_UNREACH.
@@ -508,12 +512,11 @@ out:
508 512
509 /* change mtu on this route */ 513 /* change mtu on this route */
510 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 514 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
511 if (rel_info > dst_mtu(skb2->dst)) { 515 if (n > dst_mtu(skb2->dst)) {
512 kfree_skb(skb2); 516 kfree_skb(skb2);
513 return; 517 return;
514 } 518 }
515 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 519 skb2->dst->ops->update_pmtu(skb2->dst, n);
516 rel_info = htonl(rel_info);
517 } else if (type == ICMP_TIME_EXCEEDED) { 520 } else if (type == ICMP_TIME_EXCEEDED) {
518 struct ip_tunnel *t = netdev_priv(skb2->dev); 521 struct ip_tunnel *t = netdev_priv(skb2->dev);
519 if (t->parms.iph.ttl) { 522 if (t->parms.iph.ttl) {
@@ -576,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb)
576 579
577 if (flags&GRE_CSUM) { 580 if (flags&GRE_CSUM) {
578 switch (skb->ip_summed) { 581 switch (skb->ip_summed) {
579 case CHECKSUM_HW: 582 case CHECKSUM_COMPLETE:
580 csum = (u16)csum_fold(skb->csum); 583 csum = (u16)csum_fold(skb->csum);
581 if (!csum) 584 if (!csum)
582 break; 585 break;
@@ -584,7 +587,7 @@ static int ipgre_rcv(struct sk_buff *skb)
584 case CHECKSUM_NONE: 587 case CHECKSUM_NONE:
585 skb->csum = 0; 588 skb->csum = 0;
586 csum = __skb_checksum_complete(skb); 589 csum = __skb_checksum_complete(skb);
587 skb->ip_summed = CHECKSUM_HW; 590 skb->ip_summed = CHECKSUM_COMPLETE;
588 } 591 }
589 offset += 4; 592 offset += 4;
590 } 593 }
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 406056edc02b..e7437c091326 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -24,6 +24,7 @@
24#include <net/ip.h> 24#include <net/ip.h>
25#include <net/icmp.h> 25#include <net/icmp.h>
26#include <net/route.h> 26#include <net/route.h>
27#include <net/cipso_ipv4.h>
27 28
28/* 29/*
29 * Write options to IP header, record destination address to 30 * Write options to IP header, record destination address to
@@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
194 dopt->is_strictroute = sopt->is_strictroute; 195 dopt->is_strictroute = sopt->is_strictroute;
195 } 196 }
196 } 197 }
198 if (sopt->cipso) {
199 optlen = sptr[sopt->cipso+1];
200 dopt->cipso = dopt->optlen+sizeof(struct iphdr);
201 memcpy(dptr, sptr+sopt->cipso, optlen);
202 dptr += optlen;
203 dopt->optlen += optlen;
204 }
197 while (dopt->optlen & 3) { 205 while (dopt->optlen & 3) {
198 *dptr++ = IPOPT_END; 206 *dptr++ = IPOPT_END;
199 dopt->optlen++; 207 dopt->optlen++;
@@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
434 if (optptr[2] == 0 && optptr[3] == 0) 442 if (optptr[2] == 0 && optptr[3] == 0)
435 opt->router_alert = optptr - iph; 443 opt->router_alert = optptr - iph;
436 break; 444 break;
445 case IPOPT_CIPSO:
446 if (opt->cipso) {
447 pp_ptr = optptr;
448 goto error;
449 }
450 opt->cipso = optptr - iph;
451 if (cipso_v4_validate(&optptr)) {
452 pp_ptr = optptr;
453 goto error;
454 }
455 break;
437 case IPOPT_SEC: 456 case IPOPT_SEC:
438 case IPOPT_SID: 457 case IPOPT_SID:
439 default: 458 default:
@@ -506,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp,
506 opt->__data[optlen++] = IPOPT_END; 525 opt->__data[optlen++] = IPOPT_END;
507 opt->optlen = optlen; 526 opt->optlen = optlen;
508 opt->is_data = 1; 527 opt->is_data = 1;
509 opt->is_setbyuser = 1;
510 if (optlen && ip_options_compile(opt, NULL)) { 528 if (optlen && ip_options_compile(opt, NULL)) {
511 kfree(opt); 529 kfree(opt);
512 return -EINVAL; 530 return -EINVAL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a2ede167e045..97aee76fb746 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,7 +83,7 @@
83#include <linux/netlink.h> 83#include <linux/netlink.h>
84#include <linux/tcp.h> 84#include <linux/tcp.h>
85 85
86int sysctl_ip_default_ttl = IPDEFTTL; 86int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
87 87
88/* Generate a checksum for an outgoing IP datagram. */ 88/* Generate a checksum for an outgoing IP datagram. */
89__inline__ void ip_send_check(struct iphdr *iph) 89__inline__ void ip_send_check(struct iphdr *iph)
@@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
328 * keep trying until route appears or the connection times 328 * keep trying until route appears or the connection times
329 * itself out. 329 * itself out.
330 */ 330 */
331 security_sk_classify_flow(sk, &fl);
331 if (ip_route_output_flow(&rt, &fl, sk, 0)) 332 if (ip_route_output_flow(&rt, &fl, sk, 0))
332 goto no_route; 333 goto no_route;
333 } 334 }
@@ -425,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
425 int ptr; 426 int ptr;
426 struct net_device *dev; 427 struct net_device *dev;
427 struct sk_buff *skb2; 428 struct sk_buff *skb2;
428 unsigned int mtu, hlen, left, len, ll_rs; 429 unsigned int mtu, hlen, left, len, ll_rs, pad;
429 int offset; 430 int offset;
430 __be16 not_last_frag; 431 __be16 not_last_frag;
431 struct rtable *rt = (struct rtable*)skb->dst; 432 struct rtable *rt = (struct rtable*)skb->dst;
@@ -555,14 +556,13 @@ slow_path:
555 left = skb->len - hlen; /* Space per frame */ 556 left = skb->len - hlen; /* Space per frame */
556 ptr = raw + hlen; /* Where to start from */ 557 ptr = raw + hlen; /* Where to start from */
557 558
558#ifdef CONFIG_BRIDGE_NETFILTER
559 /* for bridged IP traffic encapsulated inside f.e. a vlan header, 559 /* for bridged IP traffic encapsulated inside f.e. a vlan header,
560 * we need to make room for the encapsulating header */ 560 * we need to make room for the encapsulating header
561 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb)); 561 */
562 mtu -= nf_bridge_pad(skb); 562 pad = nf_bridge_pad(skb);
563#else 563 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
564 ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev); 564 mtu -= pad;
565#endif 565
566 /* 566 /*
567 * Fragment the datagram. 567 * Fragment the datagram.
568 */ 568 */
@@ -679,7 +679,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
679{ 679{
680 struct iovec *iov = from; 680 struct iovec *iov = from;
681 681
682 if (skb->ip_summed == CHECKSUM_HW) { 682 if (skb->ip_summed == CHECKSUM_PARTIAL) {
683 if (memcpy_fromiovecend(to, iov, offset, len) < 0) 683 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
684 return -EFAULT; 684 return -EFAULT;
685 } else { 685 } else {
@@ -735,7 +735,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
735 /* initialize protocol header pointer */ 735 /* initialize protocol header pointer */
736 skb->h.raw = skb->data + fragheaderlen; 736 skb->h.raw = skb->data + fragheaderlen;
737 737
738 skb->ip_summed = CHECKSUM_HW; 738 skb->ip_summed = CHECKSUM_PARTIAL;
739 skb->csum = 0; 739 skb->csum = 0;
740 sk->sk_sndmsg_off = 0; 740 sk->sk_sndmsg_off = 0;
741 } 741 }
@@ -843,7 +843,7 @@ int ip_append_data(struct sock *sk,
843 length + fragheaderlen <= mtu && 843 length + fragheaderlen <= mtu &&
844 rt->u.dst.dev->features & NETIF_F_ALL_CSUM && 844 rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
845 !exthdrlen) 845 !exthdrlen)
846 csummode = CHECKSUM_HW; 846 csummode = CHECKSUM_PARTIAL;
847 847
848 inet->cork.length += length; 848 inet->cork.length += length;
849 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && 849 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
@@ -1366,6 +1366,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1366 { .sport = skb->h.th->dest, 1366 { .sport = skb->h.th->dest,
1367 .dport = skb->h.th->source } }, 1367 .dport = skb->h.th->source } },
1368 .proto = sk->sk_protocol }; 1368 .proto = sk->sk_protocol };
1369 security_skb_classify_flow(skb, &fl);
1369 if (ip_route_output_key(&rt, &fl)) 1370 if (ip_route_output_key(&rt, &fl))
1370 return; 1371 return;
1371 } 1372 }
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 5bb9c9f03fb6..17342430a843 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
176 return 0; 176 return 0;
177 177
178out_ok: 178out_ok:
179 if (x->props.mode) 179 if (x->props.mode == XFRM_MODE_TUNNEL)
180 ip_send_check(iph); 180 ip_send_check(iph);
181 return 0; 181 return 0;
182} 182}
@@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
216 t->id.daddr.a4 = x->id.daddr.a4; 216 t->id.daddr.a4 = x->id.daddr.a4;
217 memcpy(&t->sel, &x->sel, sizeof(t->sel)); 217 memcpy(&t->sel, &x->sel, sizeof(t->sel));
218 t->props.family = AF_INET; 218 t->props.family = AF_INET;
219 t->props.mode = 1; 219 t->props.mode = XFRM_MODE_TUNNEL;
220 t->props.saddr.a4 = x->props.saddr.a4; 220 t->props.saddr.a4 = x->props.saddr.a4;
221 t->props.flags = x->props.flags; 221 t->props.flags = x->props.flags;
222 222
@@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
416 goto out; 416 goto out;
417 417
418 x->props.header_len = 0; 418 x->props.header_len = 0;
419 if (x->props.mode) 419 if (x->props.mode == XFRM_MODE_TUNNEL)
420 x->props.header_len += sizeof(struct iphdr); 420 x->props.header_len += sizeof(struct iphdr);
421 421
422 mutex_lock(&ipcomp_resource_mutex); 422 mutex_lock(&ipcomp_resource_mutex);
@@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x)
428 goto error; 428 goto error;
429 mutex_unlock(&ipcomp_resource_mutex); 429 mutex_unlock(&ipcomp_resource_mutex);
430 430
431 if (x->props.mode) { 431 if (x->props.mode == XFRM_MODE_TUNNEL) {
432 err = ipcomp_tunnel_attach(x); 432 err = ipcomp_tunnel_attach(x);
433 if (err) 433 if (err)
434 goto error_tunnel; 434 goto error_tunnel;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cb8a92f18ef6..1fbb38415b19 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -31,7 +31,6 @@
31 * -- Josef Siemes <jsiemes@web.de>, Aug 2002 31 * -- Josef Siemes <jsiemes@web.de>, Aug 2002
32 */ 32 */
33 33
34#include <linux/config.h>
35#include <linux/types.h> 34#include <linux/types.h>
36#include <linux/string.h> 35#include <linux/string.h>
37#include <linux/kernel.h> 36#include <linux/kernel.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 76ab50b0d6ef..0c4556529228 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -341,7 +341,8 @@ out:
341 int code = skb->h.icmph->code; 341 int code = skb->h.icmph->code;
342 int rel_type = 0; 342 int rel_type = 0;
343 int rel_code = 0; 343 int rel_code = 0;
344 int rel_info = 0; 344 __be32 rel_info = 0;
345 __u32 n = 0;
345 struct sk_buff *skb2; 346 struct sk_buff *skb2;
346 struct flowi fl; 347 struct flowi fl;
347 struct rtable *rt; 348 struct rtable *rt;
@@ -354,14 +355,15 @@ out:
354 default: 355 default:
355 return 0; 356 return 0;
356 case ICMP_PARAMETERPROB: 357 case ICMP_PARAMETERPROB:
357 if (skb->h.icmph->un.gateway < hlen) 358 n = ntohl(skb->h.icmph->un.gateway) >> 24;
359 if (n < hlen)
358 return 0; 360 return 0;
359 361
360 /* So... This guy found something strange INSIDE encapsulated 362 /* So... This guy found something strange INSIDE encapsulated
361 packet. Well, he is fool, but what can we do ? 363 packet. Well, he is fool, but what can we do ?
362 */ 364 */
363 rel_type = ICMP_PARAMETERPROB; 365 rel_type = ICMP_PARAMETERPROB;
364 rel_info = skb->h.icmph->un.gateway - hlen; 366 rel_info = htonl((n - hlen) << 24);
365 break; 367 break;
366 368
367 case ICMP_DEST_UNREACH: 369 case ICMP_DEST_UNREACH:
@@ -372,13 +374,14 @@ out:
372 return 0; 374 return 0;
373 case ICMP_FRAG_NEEDED: 375 case ICMP_FRAG_NEEDED:
374 /* And it is the only really necessary thing :-) */ 376 /* And it is the only really necessary thing :-) */
375 rel_info = ntohs(skb->h.icmph->un.frag.mtu); 377 n = ntohs(skb->h.icmph->un.frag.mtu);
376 if (rel_info < hlen+68) 378 if (n < hlen+68)
377 return 0; 379 return 0;
378 rel_info -= hlen; 380 n -= hlen;
379 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ 381 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
380 if (rel_info > ntohs(eiph->tot_len)) 382 if (n > ntohs(eiph->tot_len))
381 return 0; 383 return 0;
384 rel_info = htonl(n);
382 break; 385 break;
383 default: 386 default:
384 /* All others are translated to HOST_UNREACH. 387 /* All others are translated to HOST_UNREACH.
@@ -440,12 +443,11 @@ out:
440 443
441 /* change mtu on this route */ 444 /* change mtu on this route */
442 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 445 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
443 if (rel_info > dst_mtu(skb2->dst)) { 446 if (n > dst_mtu(skb2->dst)) {
444 kfree_skb(skb2); 447 kfree_skb(skb2);
445 return 0; 448 return 0;
446 } 449 }
447 skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 450 skb2->dst->ops->update_pmtu(skb2->dst, n);
448 rel_info = htonl(rel_info);
449 } else if (type == ICMP_TIME_EXCEEDED) { 451 } else if (type == ICMP_TIME_EXCEEDED) {
450 struct ip_tunnel *t = netdev_priv(skb2->dev); 452 struct ip_tunnel *t = netdev_priv(skb2->dev);
451 if (t->parms.iph.ttl) { 453 if (t->parms.iph.ttl) {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 85893eef6b16..ba49588da242 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -312,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
312 e = NLMSG_DATA(nlh); 312 e = NLMSG_DATA(nlh);
313 e->error = -ETIMEDOUT; 313 e->error = -ETIMEDOUT;
314 memset(&e->msg, 0, sizeof(e->msg)); 314 memset(&e->msg, 0, sizeof(e->msg));
315 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 315
316 rtnl_unicast(skb, NETLINK_CB(skb).pid);
316 } else 317 } else
317 kfree_skb(skb); 318 kfree_skb(skb);
318 } 319 }
@@ -512,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
512 513
513 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { 514 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
514 if (skb->nh.iph->version == 0) { 515 if (skb->nh.iph->version == 0) {
515 int err;
516 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 516 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
517 517
518 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 518 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -525,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
525 e->error = -EMSGSIZE; 525 e->error = -EMSGSIZE;
526 memset(&e->msg, 0, sizeof(e->msg)); 526 memset(&e->msg, 0, sizeof(e->msg));
527 } 527 }
528 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); 528
529 rtnl_unicast(skb, NETLINK_CB(skb).pid);
529 } else 530 } else
530 ip_mr_forward(skb, c, 0); 531 ip_mr_forward(skb, c, 0);
531 } 532 }
@@ -1899,11 +1900,8 @@ void __init ip_mr_init(void)
1899{ 1900{
1900 mrt_cachep = kmem_cache_create("ip_mrt_cache", 1901 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1901 sizeof(struct mfc_cache), 1902 sizeof(struct mfc_cache),
1902 0, SLAB_HWCACHE_ALIGN, 1903 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1903 NULL, NULL); 1904 NULL, NULL);
1904 if (!mrt_cachep)
1905 panic("cannot allocate ip_mrt_cache");
1906
1907 init_timer(&ipmr_expire_timer); 1905 init_timer(&ipmr_expire_timer);
1908 ipmr_expire_timer.function=ipmr_expire_process; 1906 ipmr_expire_timer.function=ipmr_expire_process;
1909 register_netdevice_notifier(&ip_mr_notifier); 1907 register_netdevice_notifier(&ip_mr_notifier);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index bc28b1160a3a..820e8318d10d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb,
151 /* Only port and addr are changed, do fast csum update */ 151 /* Only port and addr are changed, do fast csum update */
152 tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, 152 tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
153 cp->dport, cp->vport); 153 cp->dport, cp->vport);
154 if ((*pskb)->ip_summed == CHECKSUM_HW) 154 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
155 (*pskb)->ip_summed = CHECKSUM_NONE; 155 (*pskb)->ip_summed = CHECKSUM_NONE;
156 } else { 156 } else {
157 /* full checksum calculation */ 157 /* full checksum calculation */
@@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
204 /* Only port and addr are changed, do fast csum update */ 204 /* Only port and addr are changed, do fast csum update */
205 tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, 205 tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
206 cp->vport, cp->dport); 206 cp->vport, cp->dport);
207 if ((*pskb)->ip_summed == CHECKSUM_HW) 207 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
208 (*pskb)->ip_summed = CHECKSUM_NONE; 208 (*pskb)->ip_summed = CHECKSUM_NONE;
209 } else { 209 } else {
210 /* full checksum calculation */ 210 /* full checksum calculation */
@@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
229 switch (skb->ip_summed) { 229 switch (skb->ip_summed) {
230 case CHECKSUM_NONE: 230 case CHECKSUM_NONE:
231 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); 231 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
232 case CHECKSUM_HW: 232 case CHECKSUM_COMPLETE:
233 if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, 233 if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
234 skb->len - tcphoff, 234 skb->len - tcphoff,
235 skb->nh.iph->protocol, skb->csum)) { 235 skb->nh.iph->protocol, skb->csum)) {
@@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
239 } 239 }
240 break; 240 break;
241 default: 241 default:
242 /* CHECKSUM_UNNECESSARY */ 242 /* No need to checksum. */
243 break; 243 break;
244 } 244 }
245 245
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 89d9175d8f28..90c8166c0ec1 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb,
161 /* Only port and addr are changed, do fast csum update */ 161 /* Only port and addr are changed, do fast csum update */
162 udp_fast_csum_update(udph, cp->daddr, cp->vaddr, 162 udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
163 cp->dport, cp->vport); 163 cp->dport, cp->vport);
164 if ((*pskb)->ip_summed == CHECKSUM_HW) 164 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
165 (*pskb)->ip_summed = CHECKSUM_NONE; 165 (*pskb)->ip_summed = CHECKSUM_NONE;
166 } else { 166 } else {
167 /* full checksum calculation */ 167 /* full checksum calculation */
@@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb,
216 /* Only port and addr are changed, do fast csum update */ 216 /* Only port and addr are changed, do fast csum update */
217 udp_fast_csum_update(udph, cp->vaddr, cp->daddr, 217 udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
218 cp->vport, cp->dport); 218 cp->vport, cp->dport);
219 if ((*pskb)->ip_summed == CHECKSUM_HW) 219 if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
220 (*pskb)->ip_summed = CHECKSUM_NONE; 220 (*pskb)->ip_summed = CHECKSUM_NONE;
221 } else { 221 } else {
222 /* full checksum calculation */ 222 /* full checksum calculation */
@@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
250 case CHECKSUM_NONE: 250 case CHECKSUM_NONE:
251 skb->csum = skb_checksum(skb, udphoff, 251 skb->csum = skb_checksum(skb, udphoff,
252 skb->len - udphoff, 0); 252 skb->len - udphoff, 0);
253 case CHECKSUM_HW: 253 case CHECKSUM_COMPLETE:
254 if (csum_tcpudp_magic(skb->nh.iph->saddr, 254 if (csum_tcpudp_magic(skb->nh.iph->saddr,
255 skb->nh.iph->daddr, 255 skb->nh.iph->daddr,
256 skb->len - udphoff, 256 skb->len - udphoff,
@@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
262 } 262 }
263 break; 263 break;
264 default: 264 default:
265 /* CHECKSUM_UNNECESSARY */ 265 /* No need to checksum. */
266 break; 266 break;
267 } 267 }
268 } 268 }
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6a9e34b794bc..f88347de21a9 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
168 unsigned int csum = 0; 168 unsigned int csum = 0;
169 169
170 switch (skb->ip_summed) { 170 switch (skb->ip_summed) {
171 case CHECKSUM_HW: 171 case CHECKSUM_COMPLETE:
172 if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) 172 if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN)
173 break; 173 break;
174 if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || 174 if ((protocol == 0 && !(u16)csum_fold(skb->csum)) ||
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ef0b5aac5838..a55b8ff70ded 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN
278 278
279 To compile it as a module, choose M here. If unsure, say N. 279 To compile it as a module, choose M here. If unsure, say N.
280 280
281config IP_NF_MATCH_DSCP
282 tristate "DSCP match support"
283 depends on IP_NF_IPTABLES
284 help
285 This option adds a `DSCP' match, which allows you to match against
286 the IPv4 header DSCP field (DSCP codepoint).
287
288 The DSCP codepoint can have any value between 0x0 and 0x4f.
289
290 To compile it as a module, choose M here. If unsure, say N.
291
292config IP_NF_MATCH_AH 281config IP_NF_MATCH_AH
293 tristate "AH match support" 282 tristate "AH match support"
294 depends on IP_NF_IPTABLES 283 depends on IP_NF_IPTABLES
@@ -568,17 +557,6 @@ config IP_NF_TARGET_ECN
568 557
569 To compile it as a module, choose M here. If unsure, say N. 558 To compile it as a module, choose M here. If unsure, say N.
570 559
571config IP_NF_TARGET_DSCP
572 tristate "DSCP target support"
573 depends on IP_NF_MANGLE
574 help
575 This option adds a `DSCP' match, which allows you to match against
576 the IPv4 header DSCP field (DSCP codepoint).
577
578 The DSCP codepoint can have any value between 0x0 and 0x4f.
579
580 To compile it as a module, choose M here. If unsure, say N.
581
582config IP_NF_TARGET_TTL 560config IP_NF_TARGET_TTL
583 tristate 'TTL target support' 561 tristate 'TTL target support'
584 depends on IP_NF_MANGLE 562 depends on IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3ded4a3af59c..09aaed1a8063 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
59obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o 59obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
60obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o 60obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
61obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o 61obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
62obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
63obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o 62obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
64obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o 63obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
65obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o 64obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
@@ -68,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
68obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o 67obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
69obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o 68obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
70obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o 69obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
71obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
72obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o 70obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
73obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o 71obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
74obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o 72obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8d1d7a6e72a5..85f0d73ebfb4 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -56,8 +56,6 @@ do { \
56#define ARP_NF_ASSERT(x) 56#define ARP_NF_ASSERT(x)
57#endif 57#endif
58 58
59#include <linux/netfilter_ipv4/listhelp.h>
60
61static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, 59static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
62 char *hdr_addr, int len) 60 char *hdr_addr, int len)
63{ 61{
@@ -208,8 +206,7 @@ static unsigned int arpt_error(struct sk_buff **pskb,
208 const struct net_device *out, 206 const struct net_device *out,
209 unsigned int hooknum, 207 unsigned int hooknum,
210 const struct xt_target *target, 208 const struct xt_target *target,
211 const void *targinfo, 209 const void *targinfo)
212 void *userinfo)
213{ 210{
214 if (net_ratelimit()) 211 if (net_ratelimit())
215 printk("arp_tables: error: '%s'\n", (char *)targinfo); 212 printk("arp_tables: error: '%s'\n", (char *)targinfo);
@@ -226,8 +223,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
226 unsigned int hook, 223 unsigned int hook,
227 const struct net_device *in, 224 const struct net_device *in,
228 const struct net_device *out, 225 const struct net_device *out,
229 struct arpt_table *table, 226 struct arpt_table *table)
230 void *userdata)
231{ 227{
232 static const char nulldevname[IFNAMSIZ]; 228 static const char nulldevname[IFNAMSIZ];
233 unsigned int verdict = NF_DROP; 229 unsigned int verdict = NF_DROP;
@@ -302,8 +298,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
302 in, out, 298 in, out,
303 hook, 299 hook,
304 t->u.kernel.target, 300 t->u.kernel.target,
305 t->data, 301 t->data);
306 userdata);
307 302
308 /* Target might have changed stuff. */ 303 /* Target might have changed stuff. */
309 arp = (*pskb)->nh.arph; 304 arp = (*pskb)->nh.arph;
@@ -490,12 +485,10 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
490 if (t->u.kernel.target == &arpt_standard_target) { 485 if (t->u.kernel.target == &arpt_standard_target) {
491 if (!standard_check(t, size)) { 486 if (!standard_check(t, size)) {
492 ret = -EINVAL; 487 ret = -EINVAL;
493 goto out; 488 goto err;
494 } 489 }
495 } else if (t->u.kernel.target->checkentry 490 } else if (t->u.kernel.target->checkentry
496 && !t->u.kernel.target->checkentry(name, e, target, t->data, 491 && !t->u.kernel.target->checkentry(name, e, target, t->data,
497 t->u.target_size
498 - sizeof(*t),
499 e->comefrom)) { 492 e->comefrom)) {
500 duprintf("arp_tables: check failed for `%s'.\n", 493 duprintf("arp_tables: check failed for `%s'.\n",
501 t->u.kernel.target->name); 494 t->u.kernel.target->name);
@@ -562,8 +555,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
562 555
563 t = arpt_get_target(e); 556 t = arpt_get_target(e);
564 if (t->u.kernel.target->destroy) 557 if (t->u.kernel.target->destroy)
565 t->u.kernel.target->destroy(t->u.kernel.target, t->data, 558 t->u.kernel.target->destroy(t->u.kernel.target, t->data);
566 t->u.target_size - sizeof(*t));
567 module_put(t->u.kernel.target->me); 559 module_put(t->u.kernel.target->me);
568 return 0; 560 return 0;
569} 561}
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index a58325c1ceb9..d12b1df252a1 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -11,7 +11,7 @@ static unsigned int
11target(struct sk_buff **pskb, 11target(struct sk_buff **pskb,
12 const struct net_device *in, const struct net_device *out, 12 const struct net_device *in, const struct net_device *out,
13 unsigned int hooknum, const struct xt_target *target, 13 unsigned int hooknum, const struct xt_target *target,
14 const void *targinfo, void *userinfo) 14 const void *targinfo)
15{ 15{
16 const struct arpt_mangle *mangle = targinfo; 16 const struct arpt_mangle *mangle = targinfo;
17 struct arphdr *arp; 17 struct arphdr *arp;
@@ -67,7 +67,7 @@ target(struct sk_buff **pskb,
67 67
68static int 68static int
69checkentry(const char *tablename, const void *e, const struct xt_target *target, 69checkentry(const char *tablename, const void *e, const struct xt_target *target,
70 void *targinfo, unsigned int targinfosize, unsigned int hook_mask) 70 void *targinfo, unsigned int hook_mask)
71{ 71{
72 const struct arpt_mangle *mangle = targinfo; 72 const struct arpt_mangle *mangle = targinfo;
73 73
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index d7c472faa53b..7edea2a1696c 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook,
155 const struct net_device *out, 155 const struct net_device *out,
156 int (*okfn)(struct sk_buff *)) 156 int (*okfn)(struct sk_buff *))
157{ 157{
158 return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL); 158 return arpt_do_table(pskb, hook, in, out, &packet_filter);
159} 159}
160 160
161static struct nf_hook_ops arpt_ops[] = { 161static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index aa459177c3f8..c432b3163609 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -47,7 +47,6 @@
47#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 47#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
48#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 48#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
49#include <linux/netfilter_ipv4/ip_conntrack_core.h> 49#include <linux/netfilter_ipv4/ip_conntrack_core.h>
50#include <linux/netfilter_ipv4/listhelp.h>
51 50
52#define IP_CONNTRACK_VERSION "2.4" 51#define IP_CONNTRACK_VERSION "2.4"
53 52
@@ -64,17 +63,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0);
64 63
65void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; 64void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
66LIST_HEAD(ip_conntrack_expect_list); 65LIST_HEAD(ip_conntrack_expect_list);
67struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; 66struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
68static LIST_HEAD(helpers); 67static LIST_HEAD(helpers);
69unsigned int ip_conntrack_htable_size = 0; 68unsigned int ip_conntrack_htable_size __read_mostly = 0;
70int ip_conntrack_max; 69int ip_conntrack_max __read_mostly;
71struct list_head *ip_conntrack_hash; 70struct list_head *ip_conntrack_hash __read_mostly;
72static kmem_cache_t *ip_conntrack_cachep __read_mostly; 71static kmem_cache_t *ip_conntrack_cachep __read_mostly;
73static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; 72static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
74struct ip_conntrack ip_conntrack_untracked; 73struct ip_conntrack ip_conntrack_untracked;
75unsigned int ip_ct_log_invalid; 74unsigned int ip_ct_log_invalid __read_mostly;
76static LIST_HEAD(unconfirmed); 75static LIST_HEAD(unconfirmed);
77static int ip_conntrack_vmalloc; 76static int ip_conntrack_vmalloc __read_mostly;
78 77
79static unsigned int ip_conntrack_next_id; 78static unsigned int ip_conntrack_next_id;
80static unsigned int ip_conntrack_expect_next_id; 79static unsigned int ip_conntrack_expect_next_id;
@@ -294,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct)
294static void 293static void
295clean_from_lists(struct ip_conntrack *ct) 294clean_from_lists(struct ip_conntrack *ct)
296{ 295{
297 unsigned int ho, hr;
298
299 DEBUGP("clean_from_lists(%p)\n", ct); 296 DEBUGP("clean_from_lists(%p)\n", ct);
300 ASSERT_WRITE_LOCK(&ip_conntrack_lock); 297 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
301 298 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
302 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 299 list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
303 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
304 LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
305 LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
306 300
307 /* Destroy all pending expectations */ 301 /* Destroy all pending expectations */
308 ip_ct_remove_expectations(ct); 302 ip_ct_remove_expectations(ct);
@@ -313,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
313{ 307{
314 struct ip_conntrack *ct = (struct ip_conntrack *)nfct; 308 struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
315 struct ip_conntrack_protocol *proto; 309 struct ip_conntrack_protocol *proto;
310 struct ip_conntrack_helper *helper;
316 311
317 DEBUGP("destroy_conntrack(%p)\n", ct); 312 DEBUGP("destroy_conntrack(%p)\n", ct);
318 IP_NF_ASSERT(atomic_read(&nfct->use) == 0); 313 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
@@ -321,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
321 ip_conntrack_event(IPCT_DESTROY, ct); 316 ip_conntrack_event(IPCT_DESTROY, ct);
322 set_bit(IPS_DYING_BIT, &ct->status); 317 set_bit(IPS_DYING_BIT, &ct->status);
323 318
319 helper = ct->helper;
320 if (helper && helper->destroy)
321 helper->destroy(ct);
322
324 /* To make sure we don't get any weird locking issues here: 323 /* To make sure we don't get any weird locking issues here:
325 * destroy_conntrack() MUST NOT be called with a write lock 324 * destroy_conntrack() MUST NOT be called with a write lock
326 * to ip_conntrack_lock!!! -HW */ 325 * to ip_conntrack_lock!!! -HW */
@@ -367,16 +366,6 @@ static void death_by_timeout(unsigned long ul_conntrack)
367 ip_conntrack_put(ct); 366 ip_conntrack_put(ct);
368} 367}
369 368
370static inline int
371conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
372 const struct ip_conntrack_tuple *tuple,
373 const struct ip_conntrack *ignored_conntrack)
374{
375 ASSERT_READ_LOCK(&ip_conntrack_lock);
376 return tuplehash_to_ctrack(i) != ignored_conntrack
377 && ip_ct_tuple_equal(tuple, &i->tuple);
378}
379
380struct ip_conntrack_tuple_hash * 369struct ip_conntrack_tuple_hash *
381__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, 370__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
382 const struct ip_conntrack *ignored_conntrack) 371 const struct ip_conntrack *ignored_conntrack)
@@ -386,7 +375,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
386 375
387 ASSERT_READ_LOCK(&ip_conntrack_lock); 376 ASSERT_READ_LOCK(&ip_conntrack_lock);
388 list_for_each_entry(h, &ip_conntrack_hash[hash], list) { 377 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
389 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { 378 if (tuplehash_to_ctrack(h) != ignored_conntrack &&
379 ip_ct_tuple_equal(tuple, &h->tuple)) {
390 CONNTRACK_STAT_INC(found); 380 CONNTRACK_STAT_INC(found);
391 return h; 381 return h;
392 } 382 }
@@ -417,10 +407,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
417 unsigned int repl_hash) 407 unsigned int repl_hash)
418{ 408{
419 ct->id = ++ip_conntrack_next_id; 409 ct->id = ++ip_conntrack_next_id;
420 list_prepend(&ip_conntrack_hash[hash], 410 list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
421 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); 411 &ip_conntrack_hash[hash]);
422 list_prepend(&ip_conntrack_hash[repl_hash], 412 list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
423 &ct->tuplehash[IP_CT_DIR_REPLY].list); 413 &ip_conntrack_hash[repl_hash]);
424} 414}
425 415
426void ip_conntrack_hash_insert(struct ip_conntrack *ct) 416void ip_conntrack_hash_insert(struct ip_conntrack *ct)
@@ -440,6 +430,7 @@ int
440__ip_conntrack_confirm(struct sk_buff **pskb) 430__ip_conntrack_confirm(struct sk_buff **pskb)
441{ 431{
442 unsigned int hash, repl_hash; 432 unsigned int hash, repl_hash;
433 struct ip_conntrack_tuple_hash *h;
443 struct ip_conntrack *ct; 434 struct ip_conntrack *ct;
444 enum ip_conntrack_info ctinfo; 435 enum ip_conntrack_info ctinfo;
445 436
@@ -470,43 +461,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
470 /* See if there's one in the list already, including reverse: 461 /* See if there's one in the list already, including reverse:
471 NAT could have grabbed it without realizing, since we're 462 NAT could have grabbed it without realizing, since we're
472 not in the hash. If there is, we lost race. */ 463 not in the hash. If there is, we lost race. */
473 if (!LIST_FIND(&ip_conntrack_hash[hash], 464 list_for_each_entry(h, &ip_conntrack_hash[hash], list)
474 conntrack_tuple_cmp, 465 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
475 struct ip_conntrack_tuple_hash *, 466 &h->tuple))
476 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) 467 goto out;
477 && !LIST_FIND(&ip_conntrack_hash[repl_hash], 468 list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
478 conntrack_tuple_cmp, 469 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
479 struct ip_conntrack_tuple_hash *, 470 &h->tuple))
480 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { 471 goto out;
481 /* Remove from unconfirmed list */
482 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
483 472
484 __ip_conntrack_hash_insert(ct, hash, repl_hash); 473 /* Remove from unconfirmed list */
485 /* Timer relative to confirmation time, not original 474 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
486 setting time, otherwise we'd get timer wrap in 475
487 weird delay cases. */ 476 __ip_conntrack_hash_insert(ct, hash, repl_hash);
488 ct->timeout.expires += jiffies; 477 /* Timer relative to confirmation time, not original
489 add_timer(&ct->timeout); 478 setting time, otherwise we'd get timer wrap in
490 atomic_inc(&ct->ct_general.use); 479 weird delay cases. */
491 set_bit(IPS_CONFIRMED_BIT, &ct->status); 480 ct->timeout.expires += jiffies;
492 CONNTRACK_STAT_INC(insert); 481 add_timer(&ct->timeout);
493 write_unlock_bh(&ip_conntrack_lock); 482 atomic_inc(&ct->ct_general.use);
494 if (ct->helper) 483 set_bit(IPS_CONFIRMED_BIT, &ct->status);
495 ip_conntrack_event_cache(IPCT_HELPER, *pskb); 484 CONNTRACK_STAT_INC(insert);
485 write_unlock_bh(&ip_conntrack_lock);
486 if (ct->helper)
487 ip_conntrack_event_cache(IPCT_HELPER, *pskb);
496#ifdef CONFIG_IP_NF_NAT_NEEDED 488#ifdef CONFIG_IP_NF_NAT_NEEDED
497 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || 489 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
498 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) 490 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
499 ip_conntrack_event_cache(IPCT_NATINFO, *pskb); 491 ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
500#endif 492#endif
501 ip_conntrack_event_cache(master_ct(ct) ? 493 ip_conntrack_event_cache(master_ct(ct) ?
502 IPCT_RELATED : IPCT_NEW, *pskb); 494 IPCT_RELATED : IPCT_NEW, *pskb);
503 495
504 return NF_ACCEPT; 496 return NF_ACCEPT;
505 }
506 497
498out:
507 CONNTRACK_STAT_INC(insert_failed); 499 CONNTRACK_STAT_INC(insert_failed);
508 write_unlock_bh(&ip_conntrack_lock); 500 write_unlock_bh(&ip_conntrack_lock);
509
510 return NF_DROP; 501 return NF_DROP;
511} 502}
512 503
@@ -527,23 +518,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
527 518
528/* There's a small race here where we may free a just-assured 519/* There's a small race here where we may free a just-assured
529 connection. Too bad: we're in trouble anyway. */ 520 connection. Too bad: we're in trouble anyway. */
530static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
531{
532 return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
533}
534
535static int early_drop(struct list_head *chain) 521static int early_drop(struct list_head *chain)
536{ 522{
537 /* Traverse backwards: gives us oldest, which is roughly LRU */ 523 /* Traverse backwards: gives us oldest, which is roughly LRU */
538 struct ip_conntrack_tuple_hash *h; 524 struct ip_conntrack_tuple_hash *h;
539 struct ip_conntrack *ct = NULL; 525 struct ip_conntrack *ct = NULL, *tmp;
540 int dropped = 0; 526 int dropped = 0;
541 527
542 read_lock_bh(&ip_conntrack_lock); 528 read_lock_bh(&ip_conntrack_lock);
543 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); 529 list_for_each_entry_reverse(h, chain, list) {
544 if (h) { 530 tmp = tuplehash_to_ctrack(h);
545 ct = tuplehash_to_ctrack(h); 531 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
546 atomic_inc(&ct->ct_general.use); 532 ct = tmp;
533 atomic_inc(&ct->ct_general.use);
534 break;
535 }
547 } 536 }
548 read_unlock_bh(&ip_conntrack_lock); 537 read_unlock_bh(&ip_conntrack_lock);
549 538
@@ -559,18 +548,16 @@ static int early_drop(struct list_head *chain)
559 return dropped; 548 return dropped;
560} 549}
561 550
562static inline int helper_cmp(const struct ip_conntrack_helper *i,
563 const struct ip_conntrack_tuple *rtuple)
564{
565 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
566}
567
568static struct ip_conntrack_helper * 551static struct ip_conntrack_helper *
569__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) 552__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
570{ 553{
571 return LIST_FIND(&helpers, helper_cmp, 554 struct ip_conntrack_helper *h;
572 struct ip_conntrack_helper *, 555
573 tuple); 556 list_for_each_entry(h, &helpers, list) {
557 if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
558 return h;
559 }
560 return NULL;
574} 561}
575 562
576struct ip_conntrack_helper * 563struct ip_conntrack_helper *
@@ -640,11 +627,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
640 ip_conntrack_hash_rnd_initted = 1; 627 ip_conntrack_hash_rnd_initted = 1;
641 } 628 }
642 629
630 /* We don't want any race condition at early drop stage */
631 atomic_inc(&ip_conntrack_count);
632
643 if (ip_conntrack_max 633 if (ip_conntrack_max
644 && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { 634 && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
645 unsigned int hash = hash_conntrack(orig); 635 unsigned int hash = hash_conntrack(orig);
646 /* Try dropping from this hash chain. */ 636 /* Try dropping from this hash chain. */
647 if (!early_drop(&ip_conntrack_hash[hash])) { 637 if (!early_drop(&ip_conntrack_hash[hash])) {
638 atomic_dec(&ip_conntrack_count);
648 if (net_ratelimit()) 639 if (net_ratelimit())
649 printk(KERN_WARNING 640 printk(KERN_WARNING
650 "ip_conntrack: table full, dropping" 641 "ip_conntrack: table full, dropping"
@@ -656,6 +647,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
656 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); 647 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
657 if (!conntrack) { 648 if (!conntrack) {
658 DEBUGP("Can't allocate conntrack.\n"); 649 DEBUGP("Can't allocate conntrack.\n");
650 atomic_dec(&ip_conntrack_count);
659 return ERR_PTR(-ENOMEM); 651 return ERR_PTR(-ENOMEM);
660 } 652 }
661 653
@@ -669,8 +661,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
669 conntrack->timeout.data = (unsigned long)conntrack; 661 conntrack->timeout.data = (unsigned long)conntrack;
670 conntrack->timeout.function = death_by_timeout; 662 conntrack->timeout.function = death_by_timeout;
671 663
672 atomic_inc(&ip_conntrack_count);
673
674 return conntrack; 664 return conntrack;
675} 665}
676 666
@@ -1062,7 +1052,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1062{ 1052{
1063 BUG_ON(me->timeout == 0); 1053 BUG_ON(me->timeout == 0);
1064 write_lock_bh(&ip_conntrack_lock); 1054 write_lock_bh(&ip_conntrack_lock);
1065 list_prepend(&helpers, me); 1055 list_add(&me->list, &helpers);
1066 write_unlock_bh(&ip_conntrack_lock); 1056 write_unlock_bh(&ip_conntrack_lock);
1067 1057
1068 return 0; 1058 return 0;
@@ -1081,24 +1071,24 @@ __ip_conntrack_helper_find_byname(const char *name)
1081 return NULL; 1071 return NULL;
1082} 1072}
1083 1073
1084static inline int unhelp(struct ip_conntrack_tuple_hash *i, 1074static inline void unhelp(struct ip_conntrack_tuple_hash *i,
1085 const struct ip_conntrack_helper *me) 1075 const struct ip_conntrack_helper *me)
1086{ 1076{
1087 if (tuplehash_to_ctrack(i)->helper == me) { 1077 if (tuplehash_to_ctrack(i)->helper == me) {
1088 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); 1078 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
1089 tuplehash_to_ctrack(i)->helper = NULL; 1079 tuplehash_to_ctrack(i)->helper = NULL;
1090 } 1080 }
1091 return 0;
1092} 1081}
1093 1082
1094void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) 1083void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1095{ 1084{
1096 unsigned int i; 1085 unsigned int i;
1086 struct ip_conntrack_tuple_hash *h;
1097 struct ip_conntrack_expect *exp, *tmp; 1087 struct ip_conntrack_expect *exp, *tmp;
1098 1088
1099 /* Need write lock here, to delete helper. */ 1089 /* Need write lock here, to delete helper. */
1100 write_lock_bh(&ip_conntrack_lock); 1090 write_lock_bh(&ip_conntrack_lock);
1101 LIST_DELETE(&helpers, me); 1091 list_del(&me->list);
1102 1092
1103 /* Get rid of expectations */ 1093 /* Get rid of expectations */
1104 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { 1094 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
@@ -1108,10 +1098,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1108 } 1098 }
1109 } 1099 }
1110 /* Get rid of expecteds, set helpers to NULL. */ 1100 /* Get rid of expecteds, set helpers to NULL. */
1111 LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); 1101 list_for_each_entry(h, &unconfirmed, list)
1112 for (i = 0; i < ip_conntrack_htable_size; i++) 1102 unhelp(h, me);
1113 LIST_FIND_W(&ip_conntrack_hash[i], unhelp, 1103 for (i = 0; i < ip_conntrack_htable_size; i++) {
1114 struct ip_conntrack_tuple_hash *, me); 1104 list_for_each_entry(h, &ip_conntrack_hash[i], list)
1105 unhelp(h, me);
1106 }
1115 write_unlock_bh(&ip_conntrack_lock); 1107 write_unlock_bh(&ip_conntrack_lock);
1116 1108
1117 /* Someone could be still looking at the helper in a bh. */ 1109 /* Someone could be still looking at the helper in a bh. */
@@ -1237,46 +1229,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1237 nf_conntrack_get(nskb->nfct); 1229 nf_conntrack_get(nskb->nfct);
1238} 1230}
1239 1231
1240static inline int
1241do_iter(const struct ip_conntrack_tuple_hash *i,
1242 int (*iter)(struct ip_conntrack *i, void *data),
1243 void *data)
1244{
1245 return iter(tuplehash_to_ctrack(i), data);
1246}
1247
1248/* Bring out ya dead! */ 1232/* Bring out ya dead! */
1249static struct ip_conntrack_tuple_hash * 1233static struct ip_conntrack *
1250get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), 1234get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1251 void *data, unsigned int *bucket) 1235 void *data, unsigned int *bucket)
1252{ 1236{
1253 struct ip_conntrack_tuple_hash *h = NULL; 1237 struct ip_conntrack_tuple_hash *h;
1238 struct ip_conntrack *ct;
1254 1239
1255 write_lock_bh(&ip_conntrack_lock); 1240 write_lock_bh(&ip_conntrack_lock);
1256 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { 1241 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1257 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, 1242 list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
1258 struct ip_conntrack_tuple_hash *, iter, data); 1243 ct = tuplehash_to_ctrack(h);
1259 if (h) 1244 if (iter(ct, data))
1260 break; 1245 goto found;
1246 }
1247 }
1248 list_for_each_entry(h, &unconfirmed, list) {
1249 ct = tuplehash_to_ctrack(h);
1250 if (iter(ct, data))
1251 goto found;
1261 } 1252 }
1262 if (!h)
1263 h = LIST_FIND_W(&unconfirmed, do_iter,
1264 struct ip_conntrack_tuple_hash *, iter, data);
1265 if (h)
1266 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
1267 write_unlock_bh(&ip_conntrack_lock); 1253 write_unlock_bh(&ip_conntrack_lock);
1254 return NULL;
1268 1255
1269 return h; 1256found:
1257 atomic_inc(&ct->ct_general.use);
1258 write_unlock_bh(&ip_conntrack_lock);
1259 return ct;
1270} 1260}
1271 1261
1272void 1262void
1273ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) 1263ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1274{ 1264{
1275 struct ip_conntrack_tuple_hash *h; 1265 struct ip_conntrack *ct;
1276 unsigned int bucket = 0; 1266 unsigned int bucket = 0;
1277 1267
1278 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { 1268 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
1279 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1280 /* Time to push up daises... */ 1269 /* Time to push up daises... */
1281 if (del_timer(&ct->timeout)) 1270 if (del_timer(&ct->timeout))
1282 death_by_timeout((unsigned long)ct); 1271 death_by_timeout((unsigned long)ct);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index b020a33e65e9..fb0aee691721 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -20,11 +20,11 @@
20 * - We can only support one single call within each session 20 * - We can only support one single call within each session
21 * 21 *
22 * TODO: 22 * TODO:
23 * - testing of incoming PPTP calls 23 * - testing of incoming PPTP calls
24 * 24 *
25 * Changes: 25 * Changes:
26 * 2002-02-05 - Version 1.3 26 * 2002-02-05 - Version 1.3
27 * - Call ip_conntrack_unexpect_related() from 27 * - Call ip_conntrack_unexpect_related() from
28 * pptp_destroy_siblings() to destroy expectations in case 28 * pptp_destroy_siblings() to destroy expectations in case
29 * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen 29 * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
30 * (Philip Craig <philipc@snapgear.com>) 30 * (Philip Craig <philipc@snapgear.com>)
@@ -80,7 +80,7 @@ int
80 struct PptpControlHeader *ctlh, 80 struct PptpControlHeader *ctlh,
81 union pptp_ctrl_union *pptpReq); 81 union pptp_ctrl_union *pptpReq);
82 82
83int 83void
84(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, 84(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
85 struct ip_conntrack_expect *expect_reply); 85 struct ip_conntrack_expect *expect_reply);
86 86
@@ -141,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct,
141 invert_tuplepr(&inv_t, &exp->tuple); 141 invert_tuplepr(&inv_t, &exp->tuple);
142 DEBUGP("trying to unexpect other dir: "); 142 DEBUGP("trying to unexpect other dir: ");
143 DUMP_TUPLE(&inv_t); 143 DUMP_TUPLE(&inv_t);
144 144
145 exp_other = ip_conntrack_expect_find(&inv_t); 145 exp_other = ip_conntrack_expect_find(&inv_t);
146 if (exp_other) { 146 if (exp_other) {
147 /* delete other expectation. */ 147 /* delete other expectation. */
@@ -194,15 +194,16 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
194{ 194{
195 struct ip_conntrack_tuple t; 195 struct ip_conntrack_tuple t;
196 196
197 /* Since ct->sibling_list has literally rusted away in 2.6.11, 197 ip_ct_gre_keymap_destroy(ct);
198 /* Since ct->sibling_list has literally rusted away in 2.6.11,
198 * we now need another way to find out about our sibling 199 * we now need another way to find out about our sibling
199 * contrack and expects... -HW */ 200 * contrack and expects... -HW */
200 201
201 /* try original (pns->pac) tuple */ 202 /* try original (pns->pac) tuple */
202 memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); 203 memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
203 t.dst.protonum = IPPROTO_GRE; 204 t.dst.protonum = IPPROTO_GRE;
204 t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); 205 t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
205 t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); 206 t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
206 207
207 if (!destroy_sibling_or_exp(&t)) 208 if (!destroy_sibling_or_exp(&t))
208 DEBUGP("failed to timeout original pns->pac ct/exp\n"); 209 DEBUGP("failed to timeout original pns->pac ct/exp\n");
@@ -210,8 +211,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
210 /* try reply (pac->pns) tuple */ 211 /* try reply (pac->pns) tuple */
211 memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); 212 memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
212 t.dst.protonum = IPPROTO_GRE; 213 t.dst.protonum = IPPROTO_GRE;
213 t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); 214 t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
214 t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); 215 t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
215 216
216 if (!destroy_sibling_or_exp(&t)) 217 if (!destroy_sibling_or_exp(&t))
217 DEBUGP("failed to timeout reply pac->pns ct/exp\n"); 218 DEBUGP("failed to timeout reply pac->pns ct/exp\n");
@@ -219,94 +220,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
219 220
220/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ 221/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
221static inline int 222static inline int
222exp_gre(struct ip_conntrack *master, 223exp_gre(struct ip_conntrack *ct,
223 u_int32_t seq,
224 __be16 callid, 224 __be16 callid,
225 __be16 peer_callid) 225 __be16 peer_callid)
226{ 226{
227 struct ip_conntrack_tuple inv_tuple;
228 struct ip_conntrack_tuple exp_tuples[] = {
229 /* tuple in original direction, PNS->PAC */
230 { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
231 .u = { .gre = { .key = peer_callid } }
232 },
233 .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
234 .u = { .gre = { .key = callid } },
235 .protonum = IPPROTO_GRE
236 },
237 },
238 /* tuple in reply direction, PAC->PNS */
239 { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
240 .u = { .gre = { .key = callid } }
241 },
242 .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
243 .u = { .gre = { .key = peer_callid } },
244 .protonum = IPPROTO_GRE
245 },
246 }
247 };
248 struct ip_conntrack_expect *exp_orig, *exp_reply; 227 struct ip_conntrack_expect *exp_orig, *exp_reply;
249 int ret = 1; 228 int ret = 1;
250 229
251 exp_orig = ip_conntrack_expect_alloc(master); 230 exp_orig = ip_conntrack_expect_alloc(ct);
252 if (exp_orig == NULL) 231 if (exp_orig == NULL)
253 goto out; 232 goto out;
254 233
255 exp_reply = ip_conntrack_expect_alloc(master); 234 exp_reply = ip_conntrack_expect_alloc(ct);
256 if (exp_reply == NULL) 235 if (exp_reply == NULL)
257 goto out_put_orig; 236 goto out_put_orig;
258 237
259 memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); 238 /* original direction, PNS->PAC */
239 exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
240 exp_orig->tuple.src.u.gre.key = peer_callid;
241 exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
242 exp_orig->tuple.dst.u.gre.key = callid;
243 exp_orig->tuple.dst.protonum = IPPROTO_GRE;
260 244
261 exp_orig->mask.src.ip = 0xffffffff; 245 exp_orig->mask.src.ip = 0xffffffff;
262 exp_orig->mask.src.u.all = 0; 246 exp_orig->mask.src.u.all = 0;
263 exp_orig->mask.dst.u.all = 0;
264 exp_orig->mask.dst.u.gre.key = htons(0xffff); 247 exp_orig->mask.dst.u.gre.key = htons(0xffff);
265 exp_orig->mask.dst.ip = 0xffffffff; 248 exp_orig->mask.dst.ip = 0xffffffff;
266 exp_orig->mask.dst.protonum = 0xff; 249 exp_orig->mask.dst.protonum = 0xff;
267 250
268 exp_orig->master = master; 251 exp_orig->master = ct;
269 exp_orig->expectfn = pptp_expectfn; 252 exp_orig->expectfn = pptp_expectfn;
270 exp_orig->flags = 0; 253 exp_orig->flags = 0;
271 254
272 /* both expectations are identical apart from tuple */ 255 /* both expectations are identical apart from tuple */
273 memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); 256 memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
274 memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
275 257
276 if (ip_nat_pptp_hook_exp_gre) 258 /* reply direction, PAC->PNS */
277 ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); 259 exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
278 else { 260 exp_reply->tuple.src.u.gre.key = callid;
279 261 exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
280 DEBUGP("calling expect_related PNS->PAC"); 262 exp_reply->tuple.dst.u.gre.key = peer_callid;
281 DUMP_TUPLE(&exp_orig->tuple); 263 exp_reply->tuple.dst.protonum = IPPROTO_GRE;
282
283 if (ip_conntrack_expect_related(exp_orig) != 0) {
284 DEBUGP("cannot expect_related()\n");
285 goto out_put_both;
286 }
287 264
288 DEBUGP("calling expect_related PAC->PNS"); 265 if (ip_nat_pptp_hook_exp_gre)
289 DUMP_TUPLE(&exp_reply->tuple); 266 ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
290 267 if (ip_conntrack_expect_related(exp_orig) != 0)
291 if (ip_conntrack_expect_related(exp_reply) != 0) { 268 goto out_put_both;
292 DEBUGP("cannot expect_related()\n"); 269 if (ip_conntrack_expect_related(exp_reply) != 0)
293 goto out_unexpect_orig; 270 goto out_unexpect_orig;
294 } 271
295 272 /* Add GRE keymap entries */
296 /* Add GRE keymap entries */ 273 if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
297 if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { 274 goto out_unexpect_both;
298 DEBUGP("cannot keymap_add() exp\n"); 275 if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
299 goto out_unexpect_both; 276 ip_ct_gre_keymap_destroy(ct);
300 } 277 goto out_unexpect_both;
301
302 invert_tuplepr(&inv_tuple, &exp_reply->tuple);
303 if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
304 ip_ct_gre_keymap_destroy(master);
305 DEBUGP("cannot keymap_add() exp_inv\n");
306 goto out_unexpect_both;
307 }
308 ret = 0;
309 } 278 }
279 ret = 0;
310 280
311out_put_both: 281out_put_both:
312 ip_conntrack_expect_put(exp_reply); 282 ip_conntrack_expect_put(exp_reply);
@@ -322,73 +292,36 @@ out_unexpect_orig:
322 goto out_put_both; 292 goto out_put_both;
323} 293}
324 294
325static inline int 295static inline int
326pptp_inbound_pkt(struct sk_buff **pskb, 296pptp_inbound_pkt(struct sk_buff **pskb,
327 struct tcphdr *tcph, 297 struct PptpControlHeader *ctlh,
328 unsigned int nexthdr_off, 298 union pptp_ctrl_union *pptpReq,
329 unsigned int datalen, 299 unsigned int reqlen,
330 struct ip_conntrack *ct, 300 struct ip_conntrack *ct,
331 enum ip_conntrack_info ctinfo) 301 enum ip_conntrack_info ctinfo)
332{ 302{
333 struct PptpControlHeader _ctlh, *ctlh;
334 unsigned int reqlen;
335 union pptp_ctrl_union _pptpReq, *pptpReq;
336 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; 303 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
337 u_int16_t msg; 304 u_int16_t msg;
338 __be16 *cid, *pcid; 305 __be16 cid = 0, pcid = 0;
339 u_int32_t seq;
340
341 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
342 if (!ctlh) {
343 DEBUGP("error during skb_header_pointer\n");
344 return NF_ACCEPT;
345 }
346 nexthdr_off += sizeof(_ctlh);
347 datalen -= sizeof(_ctlh);
348
349 reqlen = datalen;
350 if (reqlen > sizeof(*pptpReq))
351 reqlen = sizeof(*pptpReq);
352 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
353 if (!pptpReq) {
354 DEBUGP("error during skb_header_pointer\n");
355 return NF_ACCEPT;
356 }
357 306
358 msg = ntohs(ctlh->messageType); 307 msg = ntohs(ctlh->messageType);
359 DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); 308 DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
360 309
361 switch (msg) { 310 switch (msg) {
362 case PPTP_START_SESSION_REPLY: 311 case PPTP_START_SESSION_REPLY:
363 if (reqlen < sizeof(_pptpReq.srep)) {
364 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
365 break;
366 }
367
368 /* server confirms new control session */ 312 /* server confirms new control session */
369 if (info->sstate < PPTP_SESSION_REQUESTED) { 313 if (info->sstate < PPTP_SESSION_REQUESTED)
370 DEBUGP("%s without START_SESS_REQUEST\n", 314 goto invalid;
371 pptp_msg_name[msg]);
372 break;
373 }
374 if (pptpReq->srep.resultCode == PPTP_START_OK) 315 if (pptpReq->srep.resultCode == PPTP_START_OK)
375 info->sstate = PPTP_SESSION_CONFIRMED; 316 info->sstate = PPTP_SESSION_CONFIRMED;
376 else 317 else
377 info->sstate = PPTP_SESSION_ERROR; 318 info->sstate = PPTP_SESSION_ERROR;
378 break; 319 break;
379 320
380 case PPTP_STOP_SESSION_REPLY: 321 case PPTP_STOP_SESSION_REPLY:
381 if (reqlen < sizeof(_pptpReq.strep)) {
382 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
383 break;
384 }
385
386 /* server confirms end of control session */ 322 /* server confirms end of control session */
387 if (info->sstate > PPTP_SESSION_STOPREQ) { 323 if (info->sstate > PPTP_SESSION_STOPREQ)
388 DEBUGP("%s without STOP_SESS_REQUEST\n", 324 goto invalid;
389 pptp_msg_name[msg]);
390 break;
391 }
392 if (pptpReq->strep.resultCode == PPTP_STOP_OK) 325 if (pptpReq->strep.resultCode == PPTP_STOP_OK)
393 info->sstate = PPTP_SESSION_NONE; 326 info->sstate = PPTP_SESSION_NONE;
394 else 327 else
@@ -396,116 +329,64 @@ pptp_inbound_pkt(struct sk_buff **pskb,
396 break; 329 break;
397 330
398 case PPTP_OUT_CALL_REPLY: 331 case PPTP_OUT_CALL_REPLY:
399 if (reqlen < sizeof(_pptpReq.ocack)) {
400 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
401 break;
402 }
403
404 /* server accepted call, we now expect GRE frames */ 332 /* server accepted call, we now expect GRE frames */
405 if (info->sstate != PPTP_SESSION_CONFIRMED) { 333 if (info->sstate != PPTP_SESSION_CONFIRMED)
406 DEBUGP("%s but no session\n", pptp_msg_name[msg]); 334 goto invalid;
407 break;
408 }
409 if (info->cstate != PPTP_CALL_OUT_REQ && 335 if (info->cstate != PPTP_CALL_OUT_REQ &&
410 info->cstate != PPTP_CALL_OUT_CONF) { 336 info->cstate != PPTP_CALL_OUT_CONF)
411 DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); 337 goto invalid;
412 break; 338
413 } 339 cid = pptpReq->ocack.callID;
414 if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { 340 pcid = pptpReq->ocack.peersCallID;
341 if (info->pns_call_id != pcid)
342 goto invalid;
343 DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
344 ntohs(cid), ntohs(pcid));
345
346 if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
347 info->cstate = PPTP_CALL_OUT_CONF;
348 info->pac_call_id = cid;
349 exp_gre(ct, cid, pcid);
350 } else
415 info->cstate = PPTP_CALL_NONE; 351 info->cstate = PPTP_CALL_NONE;
416 break;
417 }
418
419 cid = &pptpReq->ocack.callID;
420 pcid = &pptpReq->ocack.peersCallID;
421
422 info->pac_call_id = ntohs(*cid);
423
424 if (htons(info->pns_call_id) != *pcid) {
425 DEBUGP("%s for unknown callid %u\n",
426 pptp_msg_name[msg], ntohs(*pcid));
427 break;
428 }
429
430 DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
431 ntohs(*cid), ntohs(*pcid));
432
433 info->cstate = PPTP_CALL_OUT_CONF;
434
435 seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
436 + sizeof(struct PptpControlHeader)
437 + ((void *)pcid - (void *)pptpReq);
438
439 if (exp_gre(ct, seq, *cid, *pcid) != 0)
440 printk("ip_conntrack_pptp: error during exp_gre\n");
441 break; 352 break;
442 353
443 case PPTP_IN_CALL_REQUEST: 354 case PPTP_IN_CALL_REQUEST:
444 if (reqlen < sizeof(_pptpReq.icack)) {
445 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
446 break;
447 }
448
449 /* server tells us about incoming call request */ 355 /* server tells us about incoming call request */
450 if (info->sstate != PPTP_SESSION_CONFIRMED) { 356 if (info->sstate != PPTP_SESSION_CONFIRMED)
451 DEBUGP("%s but no session\n", pptp_msg_name[msg]); 357 goto invalid;
452 break; 358
453 } 359 cid = pptpReq->icreq.callID;
454 pcid = &pptpReq->icack.peersCallID; 360 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
455 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
456 info->cstate = PPTP_CALL_IN_REQ; 361 info->cstate = PPTP_CALL_IN_REQ;
457 info->pac_call_id = ntohs(*pcid); 362 info->pac_call_id = cid;
458 break; 363 break;
459 364
460 case PPTP_IN_CALL_CONNECT: 365 case PPTP_IN_CALL_CONNECT:
461 if (reqlen < sizeof(_pptpReq.iccon)) {
462 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
463 break;
464 }
465
466 /* server tells us about incoming call established */ 366 /* server tells us about incoming call established */
467 if (info->sstate != PPTP_SESSION_CONFIRMED) { 367 if (info->sstate != PPTP_SESSION_CONFIRMED)
468 DEBUGP("%s but no session\n", pptp_msg_name[msg]); 368 goto invalid;
469 break; 369 if (info->cstate != PPTP_CALL_IN_REP &&
470 } 370 info->cstate != PPTP_CALL_IN_CONF)
471 if (info->cstate != PPTP_CALL_IN_REP 371 goto invalid;
472 && info->cstate != PPTP_CALL_IN_CONF) {
473 DEBUGP("%s but never sent IN_CALL_REPLY\n",
474 pptp_msg_name[msg]);
475 break;
476 }
477 372
478 pcid = &pptpReq->iccon.peersCallID; 373 pcid = pptpReq->iccon.peersCallID;
479 cid = &info->pac_call_id; 374 cid = info->pac_call_id;
480 375
481 if (info->pns_call_id != ntohs(*pcid)) { 376 if (info->pns_call_id != pcid)
482 DEBUGP("%s for unknown CallID %u\n", 377 goto invalid;
483 pptp_msg_name[msg], ntohs(*pcid));
484 break;
485 }
486 378
487 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); 379 DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
488 info->cstate = PPTP_CALL_IN_CONF; 380 info->cstate = PPTP_CALL_IN_CONF;
489 381
490 /* we expect a GRE connection from PAC to PNS */ 382 /* we expect a GRE connection from PAC to PNS */
491 seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) 383 exp_gre(ct, cid, pcid);
492 + sizeof(struct PptpControlHeader)
493 + ((void *)pcid - (void *)pptpReq);
494
495 if (exp_gre(ct, seq, *cid, *pcid) != 0)
496 printk("ip_conntrack_pptp: error during exp_gre\n");
497
498 break; 384 break;
499 385
500 case PPTP_CALL_DISCONNECT_NOTIFY: 386 case PPTP_CALL_DISCONNECT_NOTIFY:
501 if (reqlen < sizeof(_pptpReq.disc)) {
502 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
503 break;
504 }
505
506 /* server confirms disconnect */ 387 /* server confirms disconnect */
507 cid = &pptpReq->disc.callID; 388 cid = pptpReq->disc.callID;
508 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); 389 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
509 info->cstate = PPTP_CALL_NONE; 390 info->cstate = PPTP_CALL_NONE;
510 391
511 /* untrack this call id, unexpect GRE packets */ 392 /* untrack this call id, unexpect GRE packets */
@@ -513,54 +394,39 @@ pptp_inbound_pkt(struct sk_buff **pskb,
513 break; 394 break;
514 395
515 case PPTP_WAN_ERROR_NOTIFY: 396 case PPTP_WAN_ERROR_NOTIFY:
516 break;
517
518 case PPTP_ECHO_REQUEST: 397 case PPTP_ECHO_REQUEST:
519 case PPTP_ECHO_REPLY: 398 case PPTP_ECHO_REPLY:
520 /* I don't have to explain these ;) */ 399 /* I don't have to explain these ;) */
521 break; 400 break;
522 default: 401 default:
523 DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) 402 goto invalid;
524 ? pptp_msg_name[msg]:pptp_msg_name[0], msg);
525 break;
526 } 403 }
527 404
528
529 if (ip_nat_pptp_hook_inbound) 405 if (ip_nat_pptp_hook_inbound)
530 return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, 406 return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
531 pptpReq); 407 pptpReq);
532
533 return NF_ACCEPT; 408 return NF_ACCEPT;
534 409
410invalid:
411 DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
412 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
413 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
414 msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
415 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
416 return NF_ACCEPT;
535} 417}
536 418
537static inline int 419static inline int
538pptp_outbound_pkt(struct sk_buff **pskb, 420pptp_outbound_pkt(struct sk_buff **pskb,
539 struct tcphdr *tcph, 421 struct PptpControlHeader *ctlh,
540 unsigned int nexthdr_off, 422 union pptp_ctrl_union *pptpReq,
541 unsigned int datalen, 423 unsigned int reqlen,
542 struct ip_conntrack *ct, 424 struct ip_conntrack *ct,
543 enum ip_conntrack_info ctinfo) 425 enum ip_conntrack_info ctinfo)
544{ 426{
545 struct PptpControlHeader _ctlh, *ctlh;
546 unsigned int reqlen;
547 union pptp_ctrl_union _pptpReq, *pptpReq;
548 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; 427 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
549 u_int16_t msg; 428 u_int16_t msg;
550 __be16 *cid, *pcid; 429 __be16 cid = 0, pcid = 0;
551
552 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
553 if (!ctlh)
554 return NF_ACCEPT;
555 nexthdr_off += sizeof(_ctlh);
556 datalen -= sizeof(_ctlh);
557
558 reqlen = datalen;
559 if (reqlen > sizeof(*pptpReq))
560 reqlen = sizeof(*pptpReq);
561 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
562 if (!pptpReq)
563 return NF_ACCEPT;
564 430
565 msg = ntohs(ctlh->messageType); 431 msg = ntohs(ctlh->messageType);
566 DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); 432 DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
@@ -568,10 +434,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
568 switch (msg) { 434 switch (msg) {
569 case PPTP_START_SESSION_REQUEST: 435 case PPTP_START_SESSION_REQUEST:
570 /* client requests for new control session */ 436 /* client requests for new control session */
571 if (info->sstate != PPTP_SESSION_NONE) { 437 if (info->sstate != PPTP_SESSION_NONE)
572 DEBUGP("%s but we already have one", 438 goto invalid;
573 pptp_msg_name[msg]);
574 }
575 info->sstate = PPTP_SESSION_REQUESTED; 439 info->sstate = PPTP_SESSION_REQUESTED;
576 break; 440 break;
577 case PPTP_STOP_SESSION_REQUEST: 441 case PPTP_STOP_SESSION_REQUEST:
@@ -580,123 +444,115 @@ pptp_outbound_pkt(struct sk_buff **pskb,
580 break; 444 break;
581 445
582 case PPTP_OUT_CALL_REQUEST: 446 case PPTP_OUT_CALL_REQUEST:
583 if (reqlen < sizeof(_pptpReq.ocreq)) {
584 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
585 /* FIXME: break; */
586 }
587
588 /* client initiating connection to server */ 447 /* client initiating connection to server */
589 if (info->sstate != PPTP_SESSION_CONFIRMED) { 448 if (info->sstate != PPTP_SESSION_CONFIRMED)
590 DEBUGP("%s but no session\n", 449 goto invalid;
591 pptp_msg_name[msg]);
592 break;
593 }
594 info->cstate = PPTP_CALL_OUT_REQ; 450 info->cstate = PPTP_CALL_OUT_REQ;
595 /* track PNS call id */ 451 /* track PNS call id */
596 cid = &pptpReq->ocreq.callID; 452 cid = pptpReq->ocreq.callID;
597 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); 453 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
598 info->pns_call_id = ntohs(*cid); 454 info->pns_call_id = cid;
599 break; 455 break;
600 case PPTP_IN_CALL_REPLY: 456 case PPTP_IN_CALL_REPLY:
601 if (reqlen < sizeof(_pptpReq.icack)) {
602 DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
603 break;
604 }
605
606 /* client answers incoming call */ 457 /* client answers incoming call */
607 if (info->cstate != PPTP_CALL_IN_REQ 458 if (info->cstate != PPTP_CALL_IN_REQ &&
608 && info->cstate != PPTP_CALL_IN_REP) { 459 info->cstate != PPTP_CALL_IN_REP)
609 DEBUGP("%s without incall_req\n", 460 goto invalid;
610 pptp_msg_name[msg]); 461
611 break; 462 cid = pptpReq->icack.callID;
612 } 463 pcid = pptpReq->icack.peersCallID;
613 if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { 464 if (info->pac_call_id != pcid)
465 goto invalid;
466 DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
467 ntohs(cid), ntohs(pcid));
468
469 if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
470 /* part two of the three-way handshake */
471 info->cstate = PPTP_CALL_IN_REP;
472 info->pns_call_id = cid;
473 } else
614 info->cstate = PPTP_CALL_NONE; 474 info->cstate = PPTP_CALL_NONE;
615 break;
616 }
617 pcid = &pptpReq->icack.peersCallID;
618 if (info->pac_call_id != ntohs(*pcid)) {
619 DEBUGP("%s for unknown call %u\n",
620 pptp_msg_name[msg], ntohs(*pcid));
621 break;
622 }
623 DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
624 /* part two of the three-way handshake */
625 info->cstate = PPTP_CALL_IN_REP;
626 info->pns_call_id = ntohs(pptpReq->icack.callID);
627 break; 475 break;
628 476
629 case PPTP_CALL_CLEAR_REQUEST: 477 case PPTP_CALL_CLEAR_REQUEST:
630 /* client requests hangup of call */ 478 /* client requests hangup of call */
631 if (info->sstate != PPTP_SESSION_CONFIRMED) { 479 if (info->sstate != PPTP_SESSION_CONFIRMED)
632 DEBUGP("CLEAR_CALL but no session\n"); 480 goto invalid;
633 break;
634 }
635 /* FUTURE: iterate over all calls and check if 481 /* FUTURE: iterate over all calls and check if
636 * call ID is valid. We don't do this without newnat, 482 * call ID is valid. We don't do this without newnat,
637 * because we only know about last call */ 483 * because we only know about last call */
638 info->cstate = PPTP_CALL_CLEAR_REQ; 484 info->cstate = PPTP_CALL_CLEAR_REQ;
639 break; 485 break;
640 case PPTP_SET_LINK_INFO: 486 case PPTP_SET_LINK_INFO:
641 break;
642 case PPTP_ECHO_REQUEST: 487 case PPTP_ECHO_REQUEST:
643 case PPTP_ECHO_REPLY: 488 case PPTP_ECHO_REPLY:
644 /* I don't have to explain these ;) */ 489 /* I don't have to explain these ;) */
645 break; 490 break;
646 default: 491 default:
647 DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? 492 goto invalid;
648 pptp_msg_name[msg]:pptp_msg_name[0], msg);
649 /* unknown: no need to create GRE masq table entry */
650 break;
651 } 493 }
652 494
653 if (ip_nat_pptp_hook_outbound) 495 if (ip_nat_pptp_hook_outbound)
654 return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, 496 return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
655 pptpReq); 497 pptpReq);
498 return NF_ACCEPT;
656 499
500invalid:
501 DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
502 "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
503 msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
504 msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
505 ntohs(info->pns_call_id), ntohs(info->pac_call_id));
657 return NF_ACCEPT; 506 return NF_ACCEPT;
658} 507}
659 508
509static const unsigned int pptp_msg_size[] = {
510 [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest),
511 [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply),
512 [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest),
513 [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply),
514 [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest),
515 [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply),
516 [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest),
517 [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply),
518 [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected),
519 [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest),
520 [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
521 [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify),
522 [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo),
523};
660 524
661/* track caller id inside control connection, call expect_related */ 525/* track caller id inside control connection, call expect_related */
662static int 526static int
663conntrack_pptp_help(struct sk_buff **pskb, 527conntrack_pptp_help(struct sk_buff **pskb,
664 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) 528 struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
665 529
666{ 530{
667 struct pptp_pkt_hdr _pptph, *pptph;
668 struct tcphdr _tcph, *tcph;
669 u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
670 u_int32_t datalen;
671 int dir = CTINFO2DIR(ctinfo); 531 int dir = CTINFO2DIR(ctinfo);
672 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; 532 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
673 unsigned int nexthdr_off; 533 struct tcphdr _tcph, *tcph;
674 534 struct pptp_pkt_hdr _pptph, *pptph;
535 struct PptpControlHeader _ctlh, *ctlh;
536 union pptp_ctrl_union _pptpReq, *pptpReq;
537 unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
538 unsigned int datalen, reqlen, nexthdr_off;
675 int oldsstate, oldcstate; 539 int oldsstate, oldcstate;
676 int ret; 540 int ret;
541 u_int16_t msg;
677 542
678 /* don't do any tracking before tcp handshake complete */ 543 /* don't do any tracking before tcp handshake complete */
679 if (ctinfo != IP_CT_ESTABLISHED 544 if (ctinfo != IP_CT_ESTABLISHED
680 && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { 545 && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
681 DEBUGP("ctinfo = %u, skipping\n", ctinfo); 546 DEBUGP("ctinfo = %u, skipping\n", ctinfo);
682 return NF_ACCEPT; 547 return NF_ACCEPT;
683 } 548 }
684 549
685 nexthdr_off = (*pskb)->nh.iph->ihl*4; 550 nexthdr_off = (*pskb)->nh.iph->ihl*4;
686 tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); 551 tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
687 BUG_ON(!tcph); 552 BUG_ON(!tcph);
688 nexthdr_off += tcph->doff * 4; 553 nexthdr_off += tcph->doff * 4;
689 datalen = tcplen - tcph->doff * 4; 554 datalen = tcplen - tcph->doff * 4;
690 555
691 if (tcph->fin || tcph->rst) {
692 DEBUGP("RST/FIN received, timeouting GRE\n");
693 /* can't do this after real newnat */
694 info->cstate = PPTP_CALL_NONE;
695
696 /* untrack this call id, unexpect GRE packets */
697 pptp_destroy_siblings(ct);
698 }
699
700 pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); 556 pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
701 if (!pptph) { 557 if (!pptph) {
702 DEBUGP("no full PPTP header, can't track\n"); 558 DEBUGP("no full PPTP header, can't track\n");
@@ -712,6 +568,23 @@ conntrack_pptp_help(struct sk_buff **pskb,
712 return NF_ACCEPT; 568 return NF_ACCEPT;
713 } 569 }
714 570
571 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
572 if (!ctlh)
573 return NF_ACCEPT;
574 nexthdr_off += sizeof(_ctlh);
575 datalen -= sizeof(_ctlh);
576
577 reqlen = datalen;
578 msg = ntohs(ctlh->messageType);
579 if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
580 return NF_ACCEPT;
581 if (reqlen > sizeof(*pptpReq))
582 reqlen = sizeof(*pptpReq);
583
584 pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
585 if (!pptpReq)
586 return NF_ACCEPT;
587
715 oldsstate = info->sstate; 588 oldsstate = info->sstate;
716 oldcstate = info->cstate; 589 oldcstate = info->cstate;
717 590
@@ -721,11 +594,11 @@ conntrack_pptp_help(struct sk_buff **pskb,
721 * established from PNS->PAC. However, RFC makes no guarantee */ 594 * established from PNS->PAC. However, RFC makes no guarantee */
722 if (dir == IP_CT_DIR_ORIGINAL) 595 if (dir == IP_CT_DIR_ORIGINAL)
723 /* client -> server (PNS -> PAC) */ 596 /* client -> server (PNS -> PAC) */
724 ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, 597 ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
725 ctinfo); 598 ctinfo);
726 else 599 else
727 /* server -> client (PAC -> PNS) */ 600 /* server -> client (PAC -> PNS) */
728 ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, 601 ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
729 ctinfo); 602 ctinfo);
730 DEBUGP("sstate: %d->%d, cstate: %d->%d\n", 603 DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
731 oldsstate, info->sstate, oldcstate, info->cstate); 604 oldsstate, info->sstate, oldcstate, info->cstate);
@@ -735,30 +608,31 @@ conntrack_pptp_help(struct sk_buff **pskb,
735} 608}
736 609
737/* control protocol helper */ 610/* control protocol helper */
738static struct ip_conntrack_helper pptp = { 611static struct ip_conntrack_helper pptp = {
739 .list = { NULL, NULL }, 612 .list = { NULL, NULL },
740 .name = "pptp", 613 .name = "pptp",
741 .me = THIS_MODULE, 614 .me = THIS_MODULE,
742 .max_expected = 2, 615 .max_expected = 2,
743 .timeout = 5 * 60, 616 .timeout = 5 * 60,
744 .tuple = { .src = { .ip = 0, 617 .tuple = { .src = { .ip = 0,
745 .u = { .tcp = { .port = 618 .u = { .tcp = { .port =
746 __constant_htons(PPTP_CONTROL_PORT) } } 619 __constant_htons(PPTP_CONTROL_PORT) } }
747 }, 620 },
748 .dst = { .ip = 0, 621 .dst = { .ip = 0,
749 .u = { .all = 0 }, 622 .u = { .all = 0 },
750 .protonum = IPPROTO_TCP 623 .protonum = IPPROTO_TCP
751 } 624 }
752 }, 625 },
753 .mask = { .src = { .ip = 0, 626 .mask = { .src = { .ip = 0,
754 .u = { .tcp = { .port = __constant_htons(0xffff) } } 627 .u = { .tcp = { .port = __constant_htons(0xffff) } }
755 }, 628 },
756 .dst = { .ip = 0, 629 .dst = { .ip = 0,
757 .u = { .all = 0 }, 630 .u = { .all = 0 },
758 .protonum = 0xff 631 .protonum = 0xff
759 } 632 }
760 }, 633 },
761 .help = conntrack_pptp_help 634 .help = conntrack_pptp_help,
635 .destroy = pptp_destroy_siblings,
762}; 636};
763 637
764extern void ip_ct_proto_gre_fini(void); 638extern void ip_ct_proto_gre_fini(void);
@@ -768,7 +642,7 @@ extern int __init ip_ct_proto_gre_init(void);
768static int __init ip_conntrack_helper_pptp_init(void) 642static int __init ip_conntrack_helper_pptp_init(void)
769{ 643{
770 int retcode; 644 int retcode;
771 645
772 retcode = ip_ct_proto_gre_init(); 646 retcode = ip_ct_proto_gre_init();
773 if (retcode < 0) 647 if (retcode < 0)
774 return retcode; 648 return retcode;
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index a566a81325b2..3d0b438783db 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -21,6 +21,7 @@
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/netdevice.h> 22#include <linux/netdevice.h>
23#include <linux/inetdevice.h> 23#include <linux/inetdevice.h>
24#include <linux/if_addr.h>
24#include <linux/in.h> 25#include <linux/in.h>
25#include <linux/ip.h> 26#include <linux/ip.h>
26#include <net/route.h> 27#include <net/route.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 0d4cc92391fa..52eddea27e93 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -329,11 +329,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
329 /* dump everything */ 329 /* dump everything */
330 events = ~0UL; 330 events = ~0UL;
331 group = NFNLGRP_CONNTRACK_NEW; 331 group = NFNLGRP_CONNTRACK_NEW;
332 } else if (events & (IPCT_STATUS | 332 } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
333 IPCT_PROTOINFO |
334 IPCT_HELPER |
335 IPCT_HELPINFO |
336 IPCT_NATINFO)) {
337 type = IPCTNL_MSG_CT_NEW; 333 type = IPCTNL_MSG_CT_NEW;
338 group = NFNLGRP_CONNTRACK_UPDATE; 334 group = NFNLGRP_CONNTRACK_UPDATE;
339 } else 335 } else
@@ -385,6 +381,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
385 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 381 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
386 goto nfattr_failure; 382 goto nfattr_failure;
387 383
384 if (events & IPCT_MARK
385 && ctnetlink_dump_mark(skb, ct) < 0)
386 goto nfattr_failure;
387
388 nlh->nlmsg_len = skb->tail - b; 388 nlh->nlmsg_len = skb->tail - b;
389 nfnetlink_send(skb, 0, group, 0); 389 nfnetlink_send(skb, 0, group, 0);
390 return NOTIFY_DONE; 390 return NOTIFY_DONE;
@@ -436,6 +436,11 @@ restart:
436 cb->args[1] = (unsigned long)ct; 436 cb->args[1] = (unsigned long)ct;
437 goto out; 437 goto out;
438 } 438 }
439#ifdef CONFIG_NF_CT_ACCT
440 if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
441 IPCTNL_MSG_CT_GET_CTRZERO)
442 memset(&ct->counters, 0, sizeof(ct->counters));
443#endif
439 } 444 }
440 if (cb->args[1]) { 445 if (cb->args[1]) {
441 cb->args[1] = 0; 446 cb->args[1] = 0;
@@ -451,46 +456,6 @@ out:
451 return skb->len; 456 return skb->len;
452} 457}
453 458
454#ifdef CONFIG_IP_NF_CT_ACCT
455static int
456ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
457{
458 struct ip_conntrack *ct = NULL;
459 struct ip_conntrack_tuple_hash *h;
460 struct list_head *i;
461 u_int32_t *id = (u_int32_t *) &cb->args[1];
462
463 DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__,
464 cb->args[0], *id);
465
466 write_lock_bh(&ip_conntrack_lock);
467 for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
468 list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
469 h = (struct ip_conntrack_tuple_hash *) i;
470 if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
471 continue;
472 ct = tuplehash_to_ctrack(h);
473 if (ct->id <= *id)
474 continue;
475 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
476 cb->nlh->nlmsg_seq,
477 IPCTNL_MSG_CT_NEW,
478 1, ct) < 0)
479 goto out;
480 *id = ct->id;
481
482 memset(&ct->counters, 0, sizeof(ct->counters));
483 }
484 }
485out:
486 write_unlock_bh(&ip_conntrack_lock);
487
488 DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
489
490 return skb->len;
491}
492#endif
493
494static const size_t cta_min_ip[CTA_IP_MAX] = { 459static const size_t cta_min_ip[CTA_IP_MAX] = {
495 [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), 460 [CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
496 [CTA_IP_V4_DST-1] = sizeof(u_int32_t), 461 [CTA_IP_V4_DST-1] = sizeof(u_int32_t),
@@ -775,22 +740,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
775 if (msg->nfgen_family != AF_INET) 740 if (msg->nfgen_family != AF_INET)
776 return -EAFNOSUPPORT; 741 return -EAFNOSUPPORT;
777 742
778 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == 743#ifndef CONFIG_IP_NF_CT_ACCT
779 IPCTNL_MSG_CT_GET_CTRZERO) { 744 if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
780#ifdef CONFIG_IP_NF_CT_ACCT
781 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
782 ctnetlink_dump_table_w,
783 ctnetlink_done)) != 0)
784 return -EINVAL;
785#else
786 return -ENOTSUPP; 745 return -ENOTSUPP;
787#endif 746#endif
788 } else { 747 if ((*errp = netlink_dump_start(ctnl, skb, nlh,
789 if ((*errp = netlink_dump_start(ctnl, skb, nlh, 748 ctnetlink_dump_table,
790 ctnetlink_dump_table, 749 ctnetlink_done)) != 0)
791 ctnetlink_done)) != 0)
792 return -EINVAL; 750 return -EINVAL;
793 }
794 751
795 rlen = NLMSG_ALIGN(nlh->nlmsg_len); 752 rlen = NLMSG_ALIGN(nlh->nlmsg_len);
796 if (rlen > skb->len) 753 if (rlen > skb->len)
@@ -1253,6 +1210,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1253 } else 1210 } else
1254 return NOTIFY_DONE; 1211 return NOTIFY_DONE;
1255 1212
1213 if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
1214 return NOTIFY_DONE;
1215
1256 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 1216 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
1257 if (!skb) 1217 if (!skb)
1258 return NOTIFY_DONE; 1218 return NOTIFY_DONE;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index f891308b5e4c..36f2b5e5d80a 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
12#include <linux/netfilter.h> 12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 13#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
14 14
15unsigned int ip_ct_generic_timeout = 600*HZ; 15unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
16 16
17static int generic_pkt_to_tuple(const struct sk_buff *skb, 17static int generic_pkt_to_tuple(const struct sk_buff *skb,
18 unsigned int dataoff, 18 unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 4ee016c427b4..5fe026f467d3 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -1,15 +1,15 @@
1/* 1/*
2 * ip_conntrack_proto_gre.c - Version 3.0 2 * ip_conntrack_proto_gre.c - Version 3.0
3 * 3 *
4 * Connection tracking protocol helper module for GRE. 4 * Connection tracking protocol helper module for GRE.
5 * 5 *
6 * GRE is a generic encapsulation protocol, which is generally not very 6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers. 7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 * 8 *
9 * It has an optional key field, which may help us distinguishing two 9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts. 10 * connections between the same two hosts.
11 * 11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 * 13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory 14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key 15 * field called "CallID", which serves us for the same purpose as the key
@@ -37,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock);
37#define ASSERT_READ_LOCK(x) 37#define ASSERT_READ_LOCK(x)
38#define ASSERT_WRITE_LOCK(x) 38#define ASSERT_WRITE_LOCK(x)
39 39
40#include <linux/netfilter_ipv4/listhelp.h>
41#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 40#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
42#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 41#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
43#include <linux/netfilter_ipv4/ip_conntrack_core.h> 42#include <linux/netfilter_ipv4/ip_conntrack_core.h>
@@ -62,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
62#define DEBUGP(x, args...) 61#define DEBUGP(x, args...)
63#define DUMP_TUPLE_GRE(x) 62#define DUMP_TUPLE_GRE(x)
64#endif 63#endif
65 64
66/* GRE KEYMAP HANDLING FUNCTIONS */ 65/* GRE KEYMAP HANDLING FUNCTIONS */
67static LIST_HEAD(gre_keymap_list); 66static LIST_HEAD(gre_keymap_list);
68 67
@@ -82,12 +81,14 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
82 __be16 key = 0; 81 __be16 key = 0;
83 82
84 read_lock_bh(&ip_ct_gre_lock); 83 read_lock_bh(&ip_ct_gre_lock);
85 km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, 84 list_for_each_entry(km, &gre_keymap_list, list) {
86 struct ip_ct_gre_keymap *, t); 85 if (gre_key_cmpfn(km, t)) {
87 if (km) 86 key = km->tuple.src.u.gre.key;
88 key = km->tuple.src.u.gre.key; 87 break;
88 }
89 }
89 read_unlock_bh(&ip_ct_gre_lock); 90 read_unlock_bh(&ip_ct_gre_lock);
90 91
91 DEBUGP("lookup src key 0x%x up key for ", key); 92 DEBUGP("lookup src key 0x%x up key for ", key);
92 DUMP_TUPLE_GRE(t); 93 DUMP_TUPLE_GRE(t);
93 94
@@ -99,28 +100,25 @@ int
99ip_ct_gre_keymap_add(struct ip_conntrack *ct, 100ip_ct_gre_keymap_add(struct ip_conntrack *ct,
100 struct ip_conntrack_tuple *t, int reply) 101 struct ip_conntrack_tuple *t, int reply)
101{ 102{
102 struct ip_ct_gre_keymap **exist_km, *km, *old; 103 struct ip_ct_gre_keymap **exist_km, *km;
103 104
104 if (!ct->helper || strcmp(ct->helper->name, "pptp")) { 105 if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
105 DEBUGP("refusing to add GRE keymap to non-pptp session\n"); 106 DEBUGP("refusing to add GRE keymap to non-pptp session\n");
106 return -1; 107 return -1;
107 } 108 }
108 109
109 if (!reply) 110 if (!reply)
110 exist_km = &ct->help.ct_pptp_info.keymap_orig; 111 exist_km = &ct->help.ct_pptp_info.keymap_orig;
111 else 112 else
112 exist_km = &ct->help.ct_pptp_info.keymap_reply; 113 exist_km = &ct->help.ct_pptp_info.keymap_reply;
113 114
114 if (*exist_km) { 115 if (*exist_km) {
115 /* check whether it's a retransmission */ 116 /* check whether it's a retransmission */
116 old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, 117 list_for_each_entry(km, &gre_keymap_list, list) {
117 struct ip_ct_gre_keymap *, t); 118 if (gre_key_cmpfn(km, t) && km == *exist_km)
118 if (old == *exist_km) { 119 return 0;
119 DEBUGP("retransmission\n");
120 return 0;
121 } 120 }
122 121 DEBUGP("trying to override keymap_%s for ct %p\n",
123 DEBUGP("trying to override keymap_%s for ct %p\n",
124 reply? "reply":"orig", ct); 122 reply? "reply":"orig", ct);
125 return -EEXIST; 123 return -EEXIST;
126 } 124 }
@@ -136,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct,
136 DUMP_TUPLE_GRE(&km->tuple); 134 DUMP_TUPLE_GRE(&km->tuple);
137 135
138 write_lock_bh(&ip_ct_gre_lock); 136 write_lock_bh(&ip_ct_gre_lock);
139 list_append(&gre_keymap_list, km); 137 list_add_tail(&km->list, &gre_keymap_list);
140 write_unlock_bh(&ip_ct_gre_lock); 138 write_unlock_bh(&ip_ct_gre_lock);
141 139
142 return 0; 140 return 0;
@@ -154,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
154 152
155 write_lock_bh(&ip_ct_gre_lock); 153 write_lock_bh(&ip_ct_gre_lock);
156 if (ct->help.ct_pptp_info.keymap_orig) { 154 if (ct->help.ct_pptp_info.keymap_orig) {
157 DEBUGP("removing %p from list\n", 155 DEBUGP("removing %p from list\n",
158 ct->help.ct_pptp_info.keymap_orig); 156 ct->help.ct_pptp_info.keymap_orig);
159 list_del(&ct->help.ct_pptp_info.keymap_orig->list); 157 list_del(&ct->help.ct_pptp_info.keymap_orig->list);
160 kfree(ct->help.ct_pptp_info.keymap_orig); 158 kfree(ct->help.ct_pptp_info.keymap_orig);
@@ -222,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb,
222static int gre_print_tuple(struct seq_file *s, 220static int gre_print_tuple(struct seq_file *s,
223 const struct ip_conntrack_tuple *tuple) 221 const struct ip_conntrack_tuple *tuple)
224{ 222{
225 return seq_printf(s, "srckey=0x%x dstkey=0x%x ", 223 return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
226 ntohs(tuple->src.u.gre.key), 224 ntohs(tuple->src.u.gre.key),
227 ntohs(tuple->dst.u.gre.key)); 225 ntohs(tuple->dst.u.gre.key));
228} 226}
@@ -252,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct,
252 } else 250 } else
253 ip_ct_refresh_acct(ct, conntrackinfo, skb, 251 ip_ct_refresh_acct(ct, conntrackinfo, skb,
254 ct->proto.gre.timeout); 252 ct->proto.gre.timeout);
255 253
256 return NF_ACCEPT; 254 return NF_ACCEPT;
257} 255}
258 256
259/* Called when a new connection for this protocol found. */ 257/* Called when a new connection for this protocol found. */
260static int gre_new(struct ip_conntrack *ct, 258static int gre_new(struct ip_conntrack *ct,
261 const struct sk_buff *skb) 259 const struct sk_buff *skb)
262{ 260{
263 DEBUGP(": "); 261 DEBUGP(": ");
264 DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 262 DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
265 263
@@ -285,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct)
285} 283}
286 284
287/* protocol helper struct */ 285/* protocol helper struct */
288static struct ip_conntrack_protocol gre = { 286static struct ip_conntrack_protocol gre = {
289 .proto = IPPROTO_GRE, 287 .proto = IPPROTO_GRE,
290 .name = "gre", 288 .name = "gre",
291 .pkt_to_tuple = gre_pkt_to_tuple, 289 .pkt_to_tuple = gre_pkt_to_tuple,
292 .invert_tuple = gre_invert_tuple, 290 .invert_tuple = gre_invert_tuple,
293 .print_tuple = gre_print_tuple, 291 .print_tuple = gre_print_tuple,
@@ -325,7 +323,7 @@ void ip_ct_proto_gre_fini(void)
325 } 323 }
326 write_unlock_bh(&ip_ct_gre_lock); 324 write_unlock_bh(&ip_ct_gre_lock);
327 325
328 ip_conntrack_protocol_unregister(&gre); 326 ip_conntrack_protocol_unregister(&gre);
329} 327}
330 328
331EXPORT_SYMBOL(ip_ct_gre_keymap_add); 329EXPORT_SYMBOL(ip_ct_gre_keymap_add);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 23f1c504586d..09c40ebe3345 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -21,7 +21,7 @@
21#include <linux/netfilter_ipv4/ip_conntrack_core.h> 21#include <linux/netfilter_ipv4/ip_conntrack_core.h>
22#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 22#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
23 23
24unsigned int ip_ct_icmp_timeout = 30*HZ; 24unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
25 25
26#if 0 26#if 0
27#define DEBUGP printk 27#define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 2d3612cd5f18..b908a4842e18 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = {
58#define HOURS * 60 MINS 58#define HOURS * 60 MINS
59#define DAYS * 24 HOURS 59#define DAYS * 24 HOURS
60 60
61static unsigned int ip_ct_sctp_timeout_closed = 10 SECS; 61static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS;
62static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS; 62static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS;
63static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS; 63static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS;
64static unsigned int ip_ct_sctp_timeout_established = 5 DAYS; 64static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS;
65static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; 65static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000;
66static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; 66static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000;
67static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; 67static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
68 68
69static const unsigned int * sctp_timeouts[] 69static const unsigned int * sctp_timeouts[]
70= { NULL, /* SCTP_CONNTRACK_NONE */ 70= { NULL, /* SCTP_CONNTRACK_NONE */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index fb920e76ec10..03ae9a04cb37 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -48,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock);
48/* "Be conservative in what you do, 48/* "Be conservative in what you do,
49 be liberal in what you accept from others." 49 be liberal in what you accept from others."
50 If it's non-zero, we mark only out of window RST segments as INVALID. */ 50 If it's non-zero, we mark only out of window RST segments as INVALID. */
51int ip_ct_tcp_be_liberal = 0; 51int ip_ct_tcp_be_liberal __read_mostly = 0;
52 52
53/* When connection is picked up from the middle, how many packets are required 53/* When connection is picked up from the middle, how many packets are required
54 to pass in each direction when we assume we are in sync - if any side uses 54 to pass in each direction when we assume we are in sync - if any side uses
55 window scaling, we lost the game. 55 window scaling, we lost the game.
56 If it is set to zero, we disable picking up already established 56 If it is set to zero, we disable picking up already established
57 connections. */ 57 connections. */
58int ip_ct_tcp_loose = 3; 58int ip_ct_tcp_loose __read_mostly = 3;
59 59
60/* Max number of the retransmitted packets without receiving an (acceptable) 60/* Max number of the retransmitted packets without receiving an (acceptable)
61 ACK from the destination. If this number is reached, a shorter timer 61 ACK from the destination. If this number is reached, a shorter timer
62 will be started. */ 62 will be started. */
63int ip_ct_tcp_max_retrans = 3; 63int ip_ct_tcp_max_retrans __read_mostly = 3;
64 64
65 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more 65 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
66 closely. They're more complex. --RR */ 66 closely. They're more complex. --RR */
@@ -83,19 +83,19 @@ static const char *tcp_conntrack_names[] = {
83#define HOURS * 60 MINS 83#define HOURS * 60 MINS
84#define DAYS * 24 HOURS 84#define DAYS * 24 HOURS
85 85
86unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; 86unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS;
87unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; 87unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS;
88unsigned int ip_ct_tcp_timeout_established = 5 DAYS; 88unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS;
89unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; 89unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS;
90unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; 90unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS;
91unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; 91unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS;
92unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; 92unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS;
93unsigned int ip_ct_tcp_timeout_close = 10 SECS; 93unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS;
94 94
95/* RFC1122 says the R2 limit should be at least 100 seconds. 95/* RFC1122 says the R2 limit should be at least 100 seconds.
96 Linux uses 15 packets as limit, which corresponds 96 Linux uses 15 packets as limit, which corresponds
97 to ~13-30min depending on RTO. */ 97 to ~13-30min depending on RTO. */
98unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; 98unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS;
99 99
100static const unsigned int * tcp_timeouts[] 100static const unsigned int * tcp_timeouts[]
101= { NULL, /* TCP_CONNTRACK_NONE */ 101= { NULL, /* TCP_CONNTRACK_NONE */
@@ -731,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state,
731 if (state->last_dir == dir 731 if (state->last_dir == dir
732 && state->last_seq == seq 732 && state->last_seq == seq
733 && state->last_ack == ack 733 && state->last_ack == ack
734 && state->last_end == end) 734 && state->last_end == end
735 && state->last_win == win)
735 state->retrans++; 736 state->retrans++;
736 else { 737 else {
737 state->last_dir = dir; 738 state->last_dir = dir;
738 state->last_seq = seq; 739 state->last_seq = seq;
739 state->last_ack = ack; 740 state->last_ack = ack;
740 state->last_end = end; 741 state->last_end = end;
742 state->last_win = win;
741 state->retrans = 0; 743 state->retrans = 0;
742 } 744 }
743 } 745 }
@@ -865,8 +867,7 @@ static int tcp_error(struct sk_buff *skb,
865 867
866 /* Checksum invalid? Ignore. 868 /* Checksum invalid? Ignore.
867 * We skip checking packets on the outgoing path 869 * We skip checking packets on the outgoing path
868 * because the semantic of CHECKSUM_HW is different there 870 * because it is assumed to be correct.
869 * and moreover root might send raw packets.
870 */ 871 */
871 /* FIXME: Source route IP option packets --RR */ 872 /* FIXME: Source route IP option packets --RR */
872 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && 873 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 9b2c16b4d2ff..d0e8a16970ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -18,8 +18,8 @@
18#include <linux/netfilter_ipv4.h> 18#include <linux/netfilter_ipv4.h>
19#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 19#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
20 20
21unsigned int ip_ct_udp_timeout = 30*HZ; 21unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
22unsigned int ip_ct_udp_timeout_stream = 180*HZ; 22unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
23 23
24static int udp_pkt_to_tuple(const struct sk_buff *skb, 24static int udp_pkt_to_tuple(const struct sk_buff *skb,
25 unsigned int dataoff, 25 unsigned int dataoff,
@@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
117 117
118 /* Checksum invalid? Ignore. 118 /* Checksum invalid? Ignore.
119 * We skip checking packets on the outgoing path 119 * We skip checking packets on the outgoing path
120 * because the semantic of CHECKSUM_HW is different there 120 * because the checksum is assumed to be correct.
121 * and moreover root might send raw packets.
122 * FIXME: Source route IP option packets --RR */ 121 * FIXME: Source route IP option packets --RR */
123 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && 122 if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
124 nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { 123 nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index 4f222d6be009..2893e9c74850 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -8,7 +8,6 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11#include <linux/config.h>
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/ctype.h> 12#include <linux/ctype.h>
14#include <linux/skbuff.h> 13#include <linux/skbuff.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 7a9fa04a467a..02135756562e 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -35,7 +35,6 @@
35#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 35#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
36#include <linux/netfilter_ipv4/ip_conntrack_core.h> 36#include <linux/netfilter_ipv4/ip_conntrack_core.h>
37#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 37#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
38#include <linux/netfilter_ipv4/listhelp.h>
39 38
40#if 0 39#if 0
41#define DEBUGP printk 40#define DEBUGP printk
@@ -534,7 +533,7 @@ static struct nf_hook_ops ip_conntrack_ops[] = {
534 533
535/* Sysctl support */ 534/* Sysctl support */
536 535
537int ip_conntrack_checksum = 1; 536int ip_conntrack_checksum __read_mostly = 1;
538 537
539#ifdef CONFIG_SYSCTL 538#ifdef CONFIG_SYSCTL
540 539
@@ -563,7 +562,7 @@ extern unsigned int ip_ct_udp_timeout_stream;
563/* From ip_conntrack_proto_icmp.c */ 562/* From ip_conntrack_proto_icmp.c */
564extern unsigned int ip_ct_icmp_timeout; 563extern unsigned int ip_ct_icmp_timeout;
565 564
566/* From ip_conntrack_proto_icmp.c */ 565/* From ip_conntrack_proto_generic.c */
567extern unsigned int ip_ct_generic_timeout; 566extern unsigned int ip_ct_generic_timeout;
568 567
569/* Log invalid packets of a given protocol */ 568/* Log invalid packets of a given protocol */
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1741d555ad0d..71f3e09cbc84 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -22,9 +22,6 @@
22#include <linux/udp.h> 22#include <linux/udp.h>
23#include <linux/jhash.h> 23#include <linux/jhash.h>
24 24
25#define ASSERT_READ_LOCK(x)
26#define ASSERT_WRITE_LOCK(x)
27
28#include <linux/netfilter_ipv4/ip_conntrack.h> 25#include <linux/netfilter_ipv4/ip_conntrack.h>
29#include <linux/netfilter_ipv4/ip_conntrack_core.h> 26#include <linux/netfilter_ipv4/ip_conntrack_core.h>
30#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 27#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -33,7 +30,6 @@
33#include <linux/netfilter_ipv4/ip_nat_core.h> 30#include <linux/netfilter_ipv4/ip_nat_core.h>
34#include <linux/netfilter_ipv4/ip_nat_helper.h> 31#include <linux/netfilter_ipv4/ip_nat_helper.h>
35#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 32#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
36#include <linux/netfilter_ipv4/listhelp.h>
37 33
38#if 0 34#if 0
39#define DEBUGP printk 35#define DEBUGP printk
@@ -101,18 +97,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
101 write_unlock_bh(&ip_nat_lock); 97 write_unlock_bh(&ip_nat_lock);
102} 98}
103 99
104/* We do checksum mangling, so if they were wrong before they're still
105 * wrong. Also works for incomplete packets (eg. ICMP dest
106 * unreachables.) */
107u_int16_t
108ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
109{
110 u_int32_t diffs[] = { oldvalinv, newval };
111 return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
112 oldcheck^0xFFFF));
113}
114EXPORT_SYMBOL(ip_nat_cheat_check);
115
116/* Is this tuple already taken? (not by us) */ 100/* Is this tuple already taken? (not by us) */
117int 101int
118ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, 102ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
@@ -378,12 +362,12 @@ manip_pkt(u_int16_t proto,
378 iph = (void *)(*pskb)->data + iphdroff; 362 iph = (void *)(*pskb)->data + iphdroff;
379 363
380 if (maniptype == IP_NAT_MANIP_SRC) { 364 if (maniptype == IP_NAT_MANIP_SRC) {
381 iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, 365 iph->check = nf_csum_update(~iph->saddr, target->src.ip,
382 iph->check); 366 iph->check);
383 iph->saddr = target->src.ip; 367 iph->saddr = target->src.ip;
384 } else { 368 } else {
385 iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, 369 iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
386 iph->check); 370 iph->check);
387 iph->daddr = target->dst.ip; 371 iph->daddr = target->dst.ip;
388 } 372 }
389 return 1; 373 return 1;
@@ -423,10 +407,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct,
423EXPORT_SYMBOL_GPL(ip_nat_packet); 407EXPORT_SYMBOL_GPL(ip_nat_packet);
424 408
425/* Dir is direction ICMP is coming from (opposite to packet it contains) */ 409/* Dir is direction ICMP is coming from (opposite to packet it contains) */
426int ip_nat_icmp_reply_translation(struct sk_buff **pskb, 410int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
427 struct ip_conntrack *ct, 411 enum ip_conntrack_info ctinfo,
428 enum ip_nat_manip_type manip, 412 unsigned int hooknum,
429 enum ip_conntrack_dir dir) 413 struct sk_buff **pskb)
430{ 414{
431 struct { 415 struct {
432 struct icmphdr icmp; 416 struct icmphdr icmp;
@@ -434,7 +418,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
434 } *inside; 418 } *inside;
435 struct ip_conntrack_tuple inner, target; 419 struct ip_conntrack_tuple inner, target;
436 int hdrlen = (*pskb)->nh.iph->ihl * 4; 420 int hdrlen = (*pskb)->nh.iph->ihl * 4;
421 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
437 unsigned long statusbit; 422 unsigned long statusbit;
423 enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
438 424
439 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) 425 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
440 return 0; 426 return 0;
@@ -443,12 +429,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
443 429
444 /* We're actually going to mangle it beyond trivial checksum 430 /* We're actually going to mangle it beyond trivial checksum
445 adjustment, so make sure the current checksum is correct. */ 431 adjustment, so make sure the current checksum is correct. */
446 if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { 432 if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
447 hdrlen = (*pskb)->nh.iph->ihl * 4; 433 return 0;
448 if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
449 (*pskb)->len - hdrlen, 0)))
450 return 0;
451 }
452 434
453 /* Must be RELATED */ 435 /* Must be RELATED */
454 IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || 436 IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
@@ -487,12 +469,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
487 !manip)) 469 !manip))
488 return 0; 470 return 0;
489 471
490 /* Reloading "inside" here since manip_pkt inner. */ 472 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
491 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; 473 /* Reloading "inside" here since manip_pkt inner. */
492 inside->icmp.checksum = 0; 474 inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
493 inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, 475 inside->icmp.checksum = 0;
494 (*pskb)->len - hdrlen, 476 inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
495 0)); 477 (*pskb)->len - hdrlen,
478 0));
479 }
496 480
497 /* Change outer to look the reply to an incoming packet 481 /* Change outer to look the reply to an incoming packet
498 * (proto 0 means don't invert per-proto part). */ 482 * (proto 0 means don't invert per-proto part). */
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index cbcaa45370ae..7f6a75984f6c 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -27,16 +27,12 @@
27#include <net/tcp.h> 27#include <net/tcp.h>
28#include <net/udp.h> 28#include <net/udp.h>
29 29
30#define ASSERT_READ_LOCK(x)
31#define ASSERT_WRITE_LOCK(x)
32
33#include <linux/netfilter_ipv4/ip_conntrack.h> 30#include <linux/netfilter_ipv4/ip_conntrack.h>
34#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 31#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
35#include <linux/netfilter_ipv4/ip_nat.h> 32#include <linux/netfilter_ipv4/ip_nat.h>
36#include <linux/netfilter_ipv4/ip_nat_protocol.h> 33#include <linux/netfilter_ipv4/ip_nat_protocol.h>
37#include <linux/netfilter_ipv4/ip_nat_core.h> 34#include <linux/netfilter_ipv4/ip_nat_core.h>
38#include <linux/netfilter_ipv4/ip_nat_helper.h> 35#include <linux/netfilter_ipv4/ip_nat_helper.h>
39#include <linux/netfilter_ipv4/listhelp.h>
40 36
41#if 0 37#if 0
42#define DEBUGP printk 38#define DEBUGP printk
@@ -165,7 +161,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
165{ 161{
166 struct iphdr *iph; 162 struct iphdr *iph;
167 struct tcphdr *tcph; 163 struct tcphdr *tcph;
168 int datalen; 164 int oldlen, datalen;
169 165
170 if (!skb_make_writable(pskb, (*pskb)->len)) 166 if (!skb_make_writable(pskb, (*pskb)->len))
171 return 0; 167 return 0;
@@ -180,13 +176,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
180 iph = (*pskb)->nh.iph; 176 iph = (*pskb)->nh.iph;
181 tcph = (void *)iph + iph->ihl*4; 177 tcph = (void *)iph + iph->ihl*4;
182 178
179 oldlen = (*pskb)->len - iph->ihl*4;
183 mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, 180 mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
184 match_offset, match_len, rep_buffer, rep_len); 181 match_offset, match_len, rep_buffer, rep_len);
185 182
186 datalen = (*pskb)->len - iph->ihl*4; 183 datalen = (*pskb)->len - iph->ihl*4;
187 tcph->check = 0; 184 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
188 tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, 185 tcph->check = 0;
189 csum_partial((char *)tcph, datalen, 0)); 186 tcph->check = tcp_v4_check(tcph, datalen,
187 iph->saddr, iph->daddr,
188 csum_partial((char *)tcph,
189 datalen, 0));
190 } else
191 tcph->check = nf_proto_csum_update(*pskb,
192 htons(oldlen) ^ 0xFFFF,
193 htons(datalen),
194 tcph->check, 1);
190 195
191 if (rep_len != match_len) { 196 if (rep_len != match_len) {
192 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); 197 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -221,6 +226,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
221{ 226{
222 struct iphdr *iph; 227 struct iphdr *iph;
223 struct udphdr *udph; 228 struct udphdr *udph;
229 int datalen, oldlen;
224 230
225 /* UDP helpers might accidentally mangle the wrong packet */ 231 /* UDP helpers might accidentally mangle the wrong packet */
226 iph = (*pskb)->nh.iph; 232 iph = (*pskb)->nh.iph;
@@ -238,22 +244,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
238 244
239 iph = (*pskb)->nh.iph; 245 iph = (*pskb)->nh.iph;
240 udph = (void *)iph + iph->ihl*4; 246 udph = (void *)iph + iph->ihl*4;
247
248 oldlen = (*pskb)->len - iph->ihl*4;
241 mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), 249 mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
242 match_offset, match_len, rep_buffer, rep_len); 250 match_offset, match_len, rep_buffer, rep_len);
243 251
244 /* update the length of the UDP packet */ 252 /* update the length of the UDP packet */
245 udph->len = htons((*pskb)->len - iph->ihl*4); 253 datalen = (*pskb)->len - iph->ihl*4;
254 udph->len = htons(datalen);
246 255
247 /* fix udp checksum if udp checksum was previously calculated */ 256 /* fix udp checksum if udp checksum was previously calculated */
248 if (udph->check) { 257 if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
249 int datalen = (*pskb)->len - iph->ihl * 4; 258 return 1;
259
260 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
250 udph->check = 0; 261 udph->check = 0;
251 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 262 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
252 datalen, IPPROTO_UDP, 263 datalen, IPPROTO_UDP,
253 csum_partial((char *)udph, 264 csum_partial((char *)udph,
254 datalen, 0)); 265 datalen, 0));
255 } 266 if (!udph->check)
256 267 udph->check = -1;
268 } else
269 udph->check = nf_proto_csum_update(*pskb,
270 htons(oldlen) ^ 0xFFFF,
271 htons(datalen),
272 udph->check, 1);
257 return 1; 273 return 1;
258} 274}
259EXPORT_SYMBOL(ip_nat_mangle_udp_packet); 275EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -293,11 +309,14 @@ sack_adjust(struct sk_buff *skb,
293 ntohl(sack->start_seq), new_start_seq, 309 ntohl(sack->start_seq), new_start_seq,
294 ntohl(sack->end_seq), new_end_seq); 310 ntohl(sack->end_seq), new_end_seq);
295 311
296 tcph->check = 312 tcph->check = nf_proto_csum_update(skb,
297 ip_nat_cheat_check(~sack->start_seq, new_start_seq, 313 ~sack->start_seq,
298 ip_nat_cheat_check(~sack->end_seq, 314 new_start_seq,
299 new_end_seq, 315 tcph->check, 0);
300 tcph->check)); 316 tcph->check = nf_proto_csum_update(skb,
317 ~sack->end_seq,
318 new_end_seq,
319 tcph->check, 0);
301 sack->start_seq = new_start_seq; 320 sack->start_seq = new_start_seq;
302 sack->end_seq = new_end_seq; 321 sack->end_seq = new_end_seq;
303 sackoff += sizeof(*sack); 322 sackoff += sizeof(*sack);
@@ -381,10 +400,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
381 newack = ntohl(tcph->ack_seq) - other_way->offset_before; 400 newack = ntohl(tcph->ack_seq) - other_way->offset_before;
382 newack = htonl(newack); 401 newack = htonl(newack);
383 402
384 tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, 403 tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
385 ip_nat_cheat_check(~tcph->ack_seq, 404 tcph->check, 0);
386 newack, 405 tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
387 tcph->check)); 406 tcph->check, 0);
388 407
389 DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", 408 DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
390 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), 409 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 1d149964dc38..2ff578807123 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -32,7 +32,7 @@
32 * 2005-06-10 - Version 3.0 32 * 2005-06-10 - Version 3.0
33 * - kernel >= 2.6.11 version, 33 * - kernel >= 2.6.11 version,
34 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) 34 * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
35 * 35 *
36 */ 36 */
37 37
38#include <linux/module.h> 38#include <linux/module.h>
@@ -85,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
85 DEBUGP("we are PNS->PAC\n"); 85 DEBUGP("we are PNS->PAC\n");
86 /* therefore, build tuple for PAC->PNS */ 86 /* therefore, build tuple for PAC->PNS */
87 t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; 87 t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
88 t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); 88 t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
89 t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; 89 t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
90 t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); 90 t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
91 t.dst.protonum = IPPROTO_GRE; 91 t.dst.protonum = IPPROTO_GRE;
92 } else { 92 } else {
93 DEBUGP("we are PAC->PNS\n"); 93 DEBUGP("we are PAC->PNS\n");
94 /* build tuple for PNS->PAC */ 94 /* build tuple for PNS->PAC */
95 t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; 95 t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
96 t.src.u.gre.key = 96 t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
97 htons(master->nat.help.nat_pptp_info.pns_call_id);
98 t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; 97 t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
99 t.dst.u.gre.key = 98 t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
100 htons(master->nat.help.nat_pptp_info.pac_call_id);
101 t.dst.protonum = IPPROTO_GRE; 99 t.dst.protonum = IPPROTO_GRE;
102 } 100 }
103 101
@@ -149,51 +147,52 @@ pptp_outbound_pkt(struct sk_buff **pskb,
149{ 147{
150 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; 148 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
151 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; 149 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
152 u_int16_t msg, new_callid; 150 u_int16_t msg;
151 __be16 new_callid;
153 unsigned int cid_off; 152 unsigned int cid_off;
154 153
155 new_callid = htons(ct_pptp_info->pns_call_id); 154 new_callid = ct_pptp_info->pns_call_id;
156 155
157 switch (msg = ntohs(ctlh->messageType)) { 156 switch (msg = ntohs(ctlh->messageType)) {
158 case PPTP_OUT_CALL_REQUEST: 157 case PPTP_OUT_CALL_REQUEST:
159 cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); 158 cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
160 /* FIXME: ideally we would want to reserve a call ID 159 /* FIXME: ideally we would want to reserve a call ID
161 * here. current netfilter NAT core is not able to do 160 * here. current netfilter NAT core is not able to do
162 * this :( For now we use TCP source port. This breaks 161 * this :( For now we use TCP source port. This breaks
163 * multiple calls within one control session */ 162 * multiple calls within one control session */
164 163
165 /* save original call ID in nat_info */ 164 /* save original call ID in nat_info */
166 nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; 165 nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
167 166
168 /* don't use tcph->source since we are at a DSTmanip 167 /* don't use tcph->source since we are at a DSTmanip
169 * hook (e.g. PREROUTING) and pkt is not mangled yet */ 168 * hook (e.g. PREROUTING) and pkt is not mangled yet */
170 new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; 169 new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
171 170
172 /* save new call ID in ct info */ 171 /* save new call ID in ct info */
173 ct_pptp_info->pns_call_id = ntohs(new_callid); 172 ct_pptp_info->pns_call_id = new_callid;
174 break; 173 break;
175 case PPTP_IN_CALL_REPLY: 174 case PPTP_IN_CALL_REPLY:
176 cid_off = offsetof(union pptp_ctrl_union, icreq.callID); 175 cid_off = offsetof(union pptp_ctrl_union, icack.callID);
177 break; 176 break;
178 case PPTP_CALL_CLEAR_REQUEST: 177 case PPTP_CALL_CLEAR_REQUEST:
179 cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); 178 cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
180 break; 179 break;
181 default: 180 default:
182 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, 181 DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
183 (msg <= PPTP_MSG_MAX)? 182 (msg <= PPTP_MSG_MAX)?
184 pptp_msg_name[msg]:pptp_msg_name[0]); 183 pptp_msg_name[msg]:pptp_msg_name[0]);
185 /* fall through */ 184 /* fall through */
186 185
187 case PPTP_SET_LINK_INFO: 186 case PPTP_SET_LINK_INFO:
188 /* only need to NAT in case PAC is behind NAT box */ 187 /* only need to NAT in case PAC is behind NAT box */
189 case PPTP_START_SESSION_REQUEST: 188 case PPTP_START_SESSION_REQUEST:
190 case PPTP_START_SESSION_REPLY: 189 case PPTP_START_SESSION_REPLY:
191 case PPTP_STOP_SESSION_REQUEST: 190 case PPTP_STOP_SESSION_REQUEST:
192 case PPTP_STOP_SESSION_REPLY: 191 case PPTP_STOP_SESSION_REPLY:
193 case PPTP_ECHO_REQUEST: 192 case PPTP_ECHO_REQUEST:
194 case PPTP_ECHO_REPLY: 193 case PPTP_ECHO_REPLY:
195 /* no need to alter packet */ 194 /* no need to alter packet */
196 return NF_ACCEPT; 195 return NF_ACCEPT;
197 } 196 }
198 197
199 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass 198 /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
@@ -212,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb,
212 return NF_ACCEPT; 211 return NF_ACCEPT;
213} 212}
214 213
215static int 214static void
216pptp_exp_gre(struct ip_conntrack_expect *expect_orig, 215pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
217 struct ip_conntrack_expect *expect_reply) 216 struct ip_conntrack_expect *expect_reply)
218{ 217{
219 struct ip_ct_pptp_master *ct_pptp_info =
220 &expect_orig->master->help.ct_pptp_info;
221 struct ip_nat_pptp *nat_pptp_info =
222 &expect_orig->master->nat.help.nat_pptp_info;
223
224 struct ip_conntrack *ct = expect_orig->master; 218 struct ip_conntrack *ct = expect_orig->master;
225 219 struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
226 struct ip_conntrack_tuple inv_t; 220 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
227 struct ip_conntrack_tuple *orig_t, *reply_t;
228 221
229 /* save original PAC call ID in nat_info */ 222 /* save original PAC call ID in nat_info */
230 nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; 223 nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
231 224
232 /* alter expectation */
233 orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
234 reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
235
236 /* alter expectation for PNS->PAC direction */ 225 /* alter expectation for PNS->PAC direction */
237 invert_tuplepr(&inv_t, &expect_orig->tuple); 226 expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
238 expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); 227 expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
239 expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); 228 expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
240 expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
241 expect_orig->dir = IP_CT_DIR_ORIGINAL; 229 expect_orig->dir = IP_CT_DIR_ORIGINAL;
242 inv_t.src.ip = reply_t->src.ip;
243 inv_t.dst.ip = reply_t->dst.ip;
244 inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
245 inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
246
247 if (!ip_conntrack_expect_related(expect_orig)) {
248 DEBUGP("successfully registered expect\n");
249 } else {
250 DEBUGP("can't expect_related(expect_orig)\n");
251 return 1;
252 }
253 230
254 /* alter expectation for PAC->PNS direction */ 231 /* alter expectation for PAC->PNS direction */
255 invert_tuplepr(&inv_t, &expect_reply->tuple); 232 expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
256 expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); 233 expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
257 expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); 234 expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
258 expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
259 expect_reply->dir = IP_CT_DIR_REPLY; 235 expect_reply->dir = IP_CT_DIR_REPLY;
260 inv_t.src.ip = orig_t->src.ip;
261 inv_t.dst.ip = orig_t->dst.ip;
262 inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
263 inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
264
265 if (!ip_conntrack_expect_related(expect_reply)) {
266 DEBUGP("successfully registered expect\n");
267 } else {
268 DEBUGP("can't expect_related(expect_reply)\n");
269 ip_conntrack_unexpect_related(expect_orig);
270 return 1;
271 }
272
273 if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
274 DEBUGP("can't register original keymap\n");
275 ip_conntrack_unexpect_related(expect_orig);
276 ip_conntrack_unexpect_related(expect_reply);
277 return 1;
278 }
279
280 if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
281 DEBUGP("can't register reply keymap\n");
282 ip_conntrack_unexpect_related(expect_orig);
283 ip_conntrack_unexpect_related(expect_reply);
284 ip_ct_gre_keymap_destroy(ct);
285 return 1;
286 }
287
288 return 0;
289} 236}
290 237
291/* inbound packets == from PAC to PNS */ 238/* inbound packets == from PAC to PNS */
@@ -297,15 +244,15 @@ pptp_inbound_pkt(struct sk_buff **pskb,
297 union pptp_ctrl_union *pptpReq) 244 union pptp_ctrl_union *pptpReq)
298{ 245{
299 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; 246 struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
300 u_int16_t msg, new_cid = 0, new_pcid; 247 u_int16_t msg;
301 unsigned int pcid_off, cid_off = 0; 248 __be16 new_pcid;
249 unsigned int pcid_off;
302 250
303 new_pcid = htons(nat_pptp_info->pns_call_id); 251 new_pcid = nat_pptp_info->pns_call_id;
304 252
305 switch (msg = ntohs(ctlh->messageType)) { 253 switch (msg = ntohs(ctlh->messageType)) {
306 case PPTP_OUT_CALL_REPLY: 254 case PPTP_OUT_CALL_REPLY:
307 pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); 255 pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
308 cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
309 break; 256 break;
310 case PPTP_IN_CALL_CONNECT: 257 case PPTP_IN_CALL_CONNECT:
311 pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); 258 pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
@@ -324,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
324 break; 271 break;
325 272
326 default: 273 default:
327 DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? 274 DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
328 pptp_msg_name[msg]:pptp_msg_name[0]); 275 pptp_msg_name[msg]:pptp_msg_name[0]);
329 /* fall through */ 276 /* fall through */
330 277
@@ -351,17 +298,6 @@ pptp_inbound_pkt(struct sk_buff **pskb,
351 sizeof(new_pcid), (char *)&new_pcid, 298 sizeof(new_pcid), (char *)&new_pcid,
352 sizeof(new_pcid)) == 0) 299 sizeof(new_pcid)) == 0)
353 return NF_DROP; 300 return NF_DROP;
354
355 if (new_cid) {
356 DEBUGP("altering call id from 0x%04x to 0x%04x\n",
357 ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid));
358 if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
359 cid_off + sizeof(struct pptp_pkt_hdr) +
360 sizeof(struct PptpControlHeader),
361 sizeof(new_cid), (char *)&new_cid,
362 sizeof(new_cid)) == 0)
363 return NF_DROP;
364 }
365 return NF_ACCEPT; 301 return NF_ACCEPT;
366} 302}
367 303
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 38acfdf540eb..bf91f9312b3c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -6,10 +6,10 @@
6 * GRE is a generic encapsulation protocol, which is generally not very 6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers. 7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 * 8 *
9 * It has an optional key field, which may help us distinguishing two 9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts. 10 * connections between the same two hosts.
11 * 11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 * 13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory 14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key 15 * field called "CallID", which serves us for the same purpose as the key
@@ -60,14 +60,14 @@ gre_in_range(const struct ip_conntrack_tuple *tuple,
60} 60}
61 61
62/* generate unique tuple ... */ 62/* generate unique tuple ... */
63static int 63static int
64gre_unique_tuple(struct ip_conntrack_tuple *tuple, 64gre_unique_tuple(struct ip_conntrack_tuple *tuple,
65 const struct ip_nat_range *range, 65 const struct ip_nat_range *range,
66 enum ip_nat_manip_type maniptype, 66 enum ip_nat_manip_type maniptype,
67 const struct ip_conntrack *conntrack) 67 const struct ip_conntrack *conntrack)
68{ 68{
69 static u_int16_t key; 69 static u_int16_t key;
70 u_int16_t *keyptr; 70 __be16 *keyptr;
71 unsigned int min, i, range_size; 71 unsigned int min, i, range_size;
72 72
73 if (maniptype == IP_NAT_MANIP_SRC) 73 if (maniptype == IP_NAT_MANIP_SRC)
@@ -84,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple,
84 range_size = ntohs(range->max.gre.key) - min + 1; 84 range_size = ntohs(range->max.gre.key) - min + 1;
85 } 85 }
86 86
87 DEBUGP("min = %u, range_size = %u\n", min, range_size); 87 DEBUGP("min = %u, range_size = %u\n", min, range_size);
88 88
89 for (i = 0; i < range_size; i++, key++) { 89 for (i = 0; i < range_size; i++, key++) {
90 *keyptr = htons(min + key % range_size); 90 *keyptr = htons(min + key % range_size);
@@ -117,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb,
117 greh = (void *)(*pskb)->data + hdroff; 117 greh = (void *)(*pskb)->data + hdroff;
118 pgreh = (struct gre_hdr_pptp *) greh; 118 pgreh = (struct gre_hdr_pptp *) greh;
119 119
120 /* we only have destination manip of a packet, since 'source key' 120 /* we only have destination manip of a packet, since 'source key'
121 * is not present in the packet itself */ 121 * is not present in the packet itself */
122 if (maniptype == IP_NAT_MANIP_DST) { 122 if (maniptype == IP_NAT_MANIP_DST) {
123 /* key manipulation is always dest */ 123 /* key manipulation is always dest */
@@ -129,15 +129,16 @@ gre_manip_pkt(struct sk_buff **pskb,
129 } 129 }
130 if (greh->csum) { 130 if (greh->csum) {
131 /* FIXME: Never tested this code... */ 131 /* FIXME: Never tested this code... */
132 *(gre_csum(greh)) = 132 *(gre_csum(greh)) =
133 ip_nat_cheat_check(~*(gre_key(greh)), 133 nf_proto_csum_update(*pskb,
134 ~*(gre_key(greh)),
134 tuple->dst.u.gre.key, 135 tuple->dst.u.gre.key,
135 *(gre_csum(greh))); 136 *(gre_csum(greh)), 0);
136 } 137 }
137 *(gre_key(greh)) = tuple->dst.u.gre.key; 138 *(gre_key(greh)) = tuple->dst.u.gre.key;
138 break; 139 break;
139 case GRE_VERSION_PPTP: 140 case GRE_VERSION_PPTP:
140 DEBUGP("call_id -> 0x%04x\n", 141 DEBUGP("call_id -> 0x%04x\n",
141 ntohs(tuple->dst.u.gre.key)); 142 ntohs(tuple->dst.u.gre.key));
142 pgreh->call_id = tuple->dst.u.gre.key; 143 pgreh->call_id = tuple->dst.u.gre.key;
143 break; 144 break;
@@ -151,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb,
151} 152}
152 153
153/* nat helper struct */ 154/* nat helper struct */
154static struct ip_nat_protocol gre = { 155static struct ip_nat_protocol gre = {
155 .name = "GRE", 156 .name = "GRE",
156 .protonum = IPPROTO_GRE, 157 .protonum = IPPROTO_GRE,
157 .manip_pkt = gre_manip_pkt, 158 .manip_pkt = gre_manip_pkt,
158 .in_range = gre_in_range, 159 .in_range = gre_in_range,
@@ -163,7 +164,7 @@ static struct ip_nat_protocol gre = {
163 .nfattr_to_range = ip_nat_port_nfattr_to_range, 164 .nfattr_to_range = ip_nat_port_nfattr_to_range,
164#endif 165#endif
165}; 166};
166 167
167int __init ip_nat_proto_gre_init(void) 168int __init ip_nat_proto_gre_init(void)
168{ 169{
169 return ip_nat_protocol_register(&gre); 170 return ip_nat_protocol_register(&gre);
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 31a3f4ccb99c..ec50cc295317 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb,
66 return 0; 66 return 0;
67 67
68 hdr = (struct icmphdr *)((*pskb)->data + hdroff); 68 hdr = (struct icmphdr *)((*pskb)->data + hdroff);
69 69 hdr->checksum = nf_proto_csum_update(*pskb,
70 hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, 70 hdr->un.echo.id ^ 0xFFFF,
71 tuple->src.u.icmp.id, 71 tuple->src.u.icmp.id,
72 hdr->checksum); 72 hdr->checksum, 0);
73 hdr->un.echo.id = tuple->src.u.icmp.id; 73 hdr->un.echo.id = tuple->src.u.icmp.id;
74 return 1; 74 return 1;
75} 75}
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index a3d14079eba6..72a6307bd2db 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb,
129 if (hdrsize < sizeof(*hdr)) 129 if (hdrsize < sizeof(*hdr))
130 return 1; 130 return 1;
131 131
132 hdr->check = ip_nat_cheat_check(~oldip, newip, 132 hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
133 ip_nat_cheat_check(oldport ^ 0xFFFF, 133 hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport,
134 newport, 134 hdr->check, 0);
135 hdr->check));
136 return 1; 135 return 1;
137} 136}
138 137
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index ec6053fdc867..5da196ae758c 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb,
113 newport = tuple->dst.u.udp.port; 113 newport = tuple->dst.u.udp.port;
114 portptr = &hdr->dest; 114 portptr = &hdr->dest;
115 } 115 }
116 if (hdr->check) /* 0 is a special case meaning no checksum */ 116
117 hdr->check = ip_nat_cheat_check(~oldip, newip, 117 if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
118 ip_nat_cheat_check(*portptr ^ 0xFFFF, 118 hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
119 newport, 119 hdr->check, 1);
120 hdr->check)); 120 hdr->check = nf_proto_csum_update(*pskb,
121 *portptr ^ 0xFFFF, newport,
122 hdr->check, 0);
123 if (!hdr->check)
124 hdr->check = -1;
125 }
121 *portptr = newport; 126 *portptr = newport;
122 return 1; 127 return 1;
123} 128}
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 1aba926c1cb0..7b703839aa58 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -19,14 +19,10 @@
19#include <net/route.h> 19#include <net/route.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21 21
22#define ASSERT_READ_LOCK(x)
23#define ASSERT_WRITE_LOCK(x)
24
25#include <linux/netfilter_ipv4/ip_tables.h> 22#include <linux/netfilter_ipv4/ip_tables.h>
26#include <linux/netfilter_ipv4/ip_nat.h> 23#include <linux/netfilter_ipv4/ip_nat.h>
27#include <linux/netfilter_ipv4/ip_nat_core.h> 24#include <linux/netfilter_ipv4/ip_nat_core.h>
28#include <linux/netfilter_ipv4/ip_nat_rule.h> 25#include <linux/netfilter_ipv4/ip_nat_rule.h>
29#include <linux/netfilter_ipv4/listhelp.h>
30 26
31#if 0 27#if 0
32#define DEBUGP printk 28#define DEBUGP printk
@@ -104,8 +100,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
104 const struct net_device *out, 100 const struct net_device *out,
105 unsigned int hooknum, 101 unsigned int hooknum,
106 const struct ipt_target *target, 102 const struct ipt_target *target,
107 const void *targinfo, 103 const void *targinfo)
108 void *userinfo)
109{ 104{
110 struct ip_conntrack *ct; 105 struct ip_conntrack *ct;
111 enum ip_conntrack_info ctinfo; 106 enum ip_conntrack_info ctinfo;
@@ -147,8 +142,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
147 const struct net_device *out, 142 const struct net_device *out,
148 unsigned int hooknum, 143 unsigned int hooknum,
149 const struct ipt_target *target, 144 const struct ipt_target *target,
150 const void *targinfo, 145 const void *targinfo)
151 void *userinfo)
152{ 146{
153 struct ip_conntrack *ct; 147 struct ip_conntrack *ct;
154 enum ip_conntrack_info ctinfo; 148 enum ip_conntrack_info ctinfo;
@@ -174,7 +168,6 @@ static int ipt_snat_checkentry(const char *tablename,
174 const void *entry, 168 const void *entry,
175 const struct ipt_target *target, 169 const struct ipt_target *target,
176 void *targinfo, 170 void *targinfo,
177 unsigned int targinfosize,
178 unsigned int hook_mask) 171 unsigned int hook_mask)
179{ 172{
180 struct ip_nat_multi_range_compat *mr = targinfo; 173 struct ip_nat_multi_range_compat *mr = targinfo;
@@ -191,7 +184,6 @@ static int ipt_dnat_checkentry(const char *tablename,
191 const void *entry, 184 const void *entry,
192 const struct ipt_target *target, 185 const struct ipt_target *target,
193 void *targinfo, 186 void *targinfo,
194 unsigned int targinfosize,
195 unsigned int hook_mask) 187 unsigned int hook_mask)
196{ 188{
197 struct ip_nat_multi_range_compat *mr = targinfo; 189 struct ip_nat_multi_range_compat *mr = targinfo;
@@ -255,7 +247,7 @@ int ip_nat_rule_find(struct sk_buff **pskb,
255{ 247{
256 int ret; 248 int ret;
257 249
258 ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); 250 ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
259 251
260 if (ret == NF_ACCEPT) { 252 if (ret == NF_ACCEPT) {
261 if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) 253 if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 17de077a7901..9c577db62047 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -30,9 +30,6 @@
30#include <net/checksum.h> 30#include <net/checksum.h>
31#include <linux/spinlock.h> 31#include <linux/spinlock.h>
32 32
33#define ASSERT_READ_LOCK(x)
34#define ASSERT_WRITE_LOCK(x)
35
36#include <linux/netfilter_ipv4/ip_nat.h> 33#include <linux/netfilter_ipv4/ip_nat.h>
37#include <linux/netfilter_ipv4/ip_nat_rule.h> 34#include <linux/netfilter_ipv4/ip_nat_rule.h>
38#include <linux/netfilter_ipv4/ip_nat_protocol.h> 35#include <linux/netfilter_ipv4/ip_nat_protocol.h>
@@ -40,7 +37,6 @@
40#include <linux/netfilter_ipv4/ip_nat_helper.h> 37#include <linux/netfilter_ipv4/ip_nat_helper.h>
41#include <linux/netfilter_ipv4/ip_tables.h> 38#include <linux/netfilter_ipv4/ip_tables.h>
42#include <linux/netfilter_ipv4/ip_conntrack_core.h> 39#include <linux/netfilter_ipv4/ip_conntrack_core.h>
43#include <linux/netfilter_ipv4/listhelp.h>
44 40
45#if 0 41#if 0
46#define DEBUGP printk 42#define DEBUGP printk
@@ -110,11 +106,6 @@ ip_nat_fn(unsigned int hooknum,
110 IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off 106 IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
111 & htons(IP_MF|IP_OFFSET))); 107 & htons(IP_MF|IP_OFFSET)));
112 108
113 /* If we had a hardware checksum before, it's now invalid */
114 if ((*pskb)->ip_summed == CHECKSUM_HW)
115 if (skb_checksum_help(*pskb, (out == NULL)))
116 return NF_DROP;
117
118 ct = ip_conntrack_get(*pskb, &ctinfo); 109 ct = ip_conntrack_get(*pskb, &ctinfo);
119 /* Can't track? It's not due to stress, or conntrack would 110 /* Can't track? It's not due to stress, or conntrack would
120 have dropped it. Hence it's the user's responsibilty to 111 have dropped it. Hence it's the user's responsibilty to
@@ -145,8 +136,8 @@ ip_nat_fn(unsigned int hooknum,
145 case IP_CT_RELATED: 136 case IP_CT_RELATED:
146 case IP_CT_RELATED+IP_CT_IS_REPLY: 137 case IP_CT_RELATED+IP_CT_IS_REPLY:
147 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { 138 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
148 if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, 139 if (!ip_nat_icmp_reply_translation(ct, ctinfo,
149 CTINFO2DIR(ctinfo))) 140 hooknum, pskb))
150 return NF_DROP; 141 return NF_DROP;
151 else 142 else
152 return NF_ACCEPT; 143 return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 198ac36db861..7edad790478a 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -52,15 +52,15 @@ struct ipq_queue_entry {
52 52
53typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); 53typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
54 54
55static unsigned char copy_mode = IPQ_COPY_NONE; 55static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
56static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; 56static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
57static DEFINE_RWLOCK(queue_lock); 57static DEFINE_RWLOCK(queue_lock);
58static int peer_pid; 58static int peer_pid __read_mostly;
59static unsigned int copy_range; 59static unsigned int copy_range __read_mostly;
60static unsigned int queue_total; 60static unsigned int queue_total;
61static unsigned int queue_dropped = 0; 61static unsigned int queue_dropped = 0;
62static unsigned int queue_user_dropped = 0; 62static unsigned int queue_user_dropped = 0;
63static struct sock *ipqnl; 63static struct sock *ipqnl __read_mostly;
64static LIST_HEAD(queue_list); 64static LIST_HEAD(queue_list);
65static DEFINE_MUTEX(ipqnl_mutex); 65static DEFINE_MUTEX(ipqnl_mutex);
66 66
@@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
208 break; 208 break;
209 209
210 case IPQ_COPY_PACKET: 210 case IPQ_COPY_PACKET:
211 if (entry->skb->ip_summed == CHECKSUM_HW && 211 if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
212 (*errp = skb_checksum_help(entry->skb, 212 entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
213 entry->info->outdev == NULL))) { 213 (*errp = skb_checksum_help(entry->skb))) {
214 read_unlock_bh(&queue_lock); 214 read_unlock_bh(&queue_lock);
215 return NULL; 215 return NULL;
216 } 216 }
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 048514f15f2f..800067d69a9a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -180,8 +180,7 @@ ipt_error(struct sk_buff **pskb,
180 const struct net_device *out, 180 const struct net_device *out,
181 unsigned int hooknum, 181 unsigned int hooknum,
182 const struct xt_target *target, 182 const struct xt_target *target,
183 const void *targinfo, 183 const void *targinfo)
184 void *userinfo)
185{ 184{
186 if (net_ratelimit()) 185 if (net_ratelimit())
187 printk("ip_tables: error: `%s'\n", (char *)targinfo); 186 printk("ip_tables: error: `%s'\n", (char *)targinfo);
@@ -217,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb,
217 unsigned int hook, 216 unsigned int hook,
218 const struct net_device *in, 217 const struct net_device *in,
219 const struct net_device *out, 218 const struct net_device *out,
220 struct ipt_table *table, 219 struct ipt_table *table)
221 void *userdata)
222{ 220{
223 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 221 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
224 u_int16_t offset; 222 u_int16_t offset;
@@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb,
308 in, out, 306 in, out,
309 hook, 307 hook,
310 t->u.kernel.target, 308 t->u.kernel.target,
311 t->data, 309 t->data);
312 userdata);
313 310
314#ifdef CONFIG_NETFILTER_DEBUG 311#ifdef CONFIG_NETFILTER_DEBUG
315 if (((struct ipt_entry *)table_base)->comefrom 312 if (((struct ipt_entry *)table_base)->comefrom
@@ -467,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i)
467 return 1; 464 return 1;
468 465
469 if (m->u.kernel.match->destroy) 466 if (m->u.kernel.match->destroy)
470 m->u.kernel.match->destroy(m->u.kernel.match, m->data, 467 m->u.kernel.match->destroy(m->u.kernel.match, m->data);
471 m->u.match_size - sizeof(*m));
472 module_put(m->u.kernel.match->me); 468 module_put(m->u.kernel.match->me);
473 return 0; 469 return 0;
474} 470}
@@ -521,7 +517,6 @@ check_match(struct ipt_entry_match *m,
521 517
522 if (m->u.kernel.match->checkentry 518 if (m->u.kernel.match->checkentry
523 && !m->u.kernel.match->checkentry(name, ip, match, m->data, 519 && !m->u.kernel.match->checkentry(name, ip, match, m->data,
524 m->u.match_size - sizeof(*m),
525 hookmask)) { 520 hookmask)) {
526 duprintf("ip_tables: check failed for `%s'.\n", 521 duprintf("ip_tables: check failed for `%s'.\n",
527 m->u.kernel.match->name); 522 m->u.kernel.match->name);
@@ -578,12 +573,10 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size,
578 if (t->u.kernel.target == &ipt_standard_target) { 573 if (t->u.kernel.target == &ipt_standard_target) {
579 if (!standard_check(t, size)) { 574 if (!standard_check(t, size)) {
580 ret = -EINVAL; 575 ret = -EINVAL;
581 goto cleanup_matches; 576 goto err;
582 } 577 }
583 } else if (t->u.kernel.target->checkentry 578 } else if (t->u.kernel.target->checkentry
584 && !t->u.kernel.target->checkentry(name, e, target, t->data, 579 && !t->u.kernel.target->checkentry(name, e, target, t->data,
585 t->u.target_size
586 - sizeof(*t),
587 e->comefrom)) { 580 e->comefrom)) {
588 duprintf("ip_tables: check failed for `%s'.\n", 581 duprintf("ip_tables: check failed for `%s'.\n",
589 t->u.kernel.target->name); 582 t->u.kernel.target->name);
@@ -655,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
655 IPT_MATCH_ITERATE(e, cleanup_match, NULL); 648 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
656 t = ipt_get_target(e); 649 t = ipt_get_target(e);
657 if (t->u.kernel.target->destroy) 650 if (t->u.kernel.target->destroy)
658 t->u.kernel.target->destroy(t->u.kernel.target, t->data, 651 t->u.kernel.target->destroy(t->u.kernel.target, t->data);
659 t->u.target_size - sizeof(*t));
660 module_put(t->u.kernel.target->me); 652 module_put(t->u.kernel.target->me);
661 return 0; 653 return 0;
662} 654}
@@ -950,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset)
950 return delta; 942 return delta;
951} 943}
952 944
953struct compat_ipt_standard_target 945static void compat_standard_from_user(void *dst, void *src)
954{ 946{
955 struct compat_xt_entry_target target; 947 int v = *(compat_int_t *)src;
956 compat_int_t verdict;
957};
958
959struct compat_ipt_standard
960{
961 struct compat_ipt_entry entry;
962 struct compat_ipt_standard_target target;
963};
964 948
965#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target)) 949 if (v > 0)
966#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target)) 950 v += compat_calc_jump(v);
967#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN) 951 memcpy(dst, &v, sizeof(v));
952}
968 953
969static int compat_ipt_standard_fn(void *target, 954static int compat_standard_to_user(void __user *dst, void *src)
970 void **dstptr, int *size, int convert)
971{ 955{
972 struct compat_ipt_standard_target compat_st, *pcompat_st; 956 compat_int_t cv = *(int *)src;
973 struct ipt_standard_target st, *pst;
974 int ret;
975 957
976 ret = 0; 958 if (cv > 0)
977 switch (convert) { 959 cv -= compat_calc_jump(cv);
978 case COMPAT_TO_USER: 960 return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
979 pst = target;
980 memcpy(&compat_st.target, &pst->target,
981 sizeof(compat_st.target));
982 compat_st.verdict = pst->verdict;
983 if (compat_st.verdict > 0)
984 compat_st.verdict -=
985 compat_calc_jump(compat_st.verdict);
986 compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN;
987 if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN))
988 ret = -EFAULT;
989 *size -= IPT_ST_OFFSET;
990 *dstptr += IPT_ST_COMPAT_LEN;
991 break;
992 case COMPAT_FROM_USER:
993 pcompat_st = target;
994 memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN);
995 st.verdict = pcompat_st->verdict;
996 if (st.verdict > 0)
997 st.verdict += compat_calc_jump(st.verdict);
998 st.target.u.user.target_size = IPT_ST_LEN;
999 memcpy(*dstptr, &st, IPT_ST_LEN);
1000 *size += IPT_ST_OFFSET;
1001 *dstptr += IPT_ST_LEN;
1002 break;
1003 case COMPAT_CALC_SIZE:
1004 *size += IPT_ST_OFFSET;
1005 break;
1006 default:
1007 ret = -ENOPROTOOPT;
1008 break;
1009 }
1010 return ret;
1011} 961}
1012 962
1013static inline int 963static inline int
1014compat_calc_match(struct ipt_entry_match *m, int * size) 964compat_calc_match(struct ipt_entry_match *m, int * size)
1015{ 965{
1016 if (m->u.kernel.match->compat) 966 *size += xt_compat_match_offset(m->u.kernel.match);
1017 m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
1018 else
1019 xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
1020 return 0; 967 return 0;
1021} 968}
1022 969
@@ -1031,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
1031 entry_offset = (void *)e - base; 978 entry_offset = (void *)e - base;
1032 IPT_MATCH_ITERATE(e, compat_calc_match, &off); 979 IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1033 t = ipt_get_target(e); 980 t = ipt_get_target(e);
1034 if (t->u.kernel.target->compat) 981 off += xt_compat_target_offset(t->u.kernel.target);
1035 t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
1036 else
1037 xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
1038 newinfo->size -= off; 982 newinfo->size -= off;
1039 ret = compat_add_offset(entry_offset, off); 983 ret = compat_add_offset(entry_offset, off);
1040 if (ret) 984 if (ret)
@@ -1420,17 +1364,13 @@ struct compat_ipt_replace {
1420}; 1364};
1421 1365
1422static inline int compat_copy_match_to_user(struct ipt_entry_match *m, 1366static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
1423 void __user **dstptr, compat_uint_t *size) 1367 void * __user *dstptr, compat_uint_t *size)
1424{ 1368{
1425 if (m->u.kernel.match->compat) 1369 return xt_compat_match_to_user(m, dstptr, size);
1426 return m->u.kernel.match->compat(m, dstptr, size,
1427 COMPAT_TO_USER);
1428 else
1429 return xt_compat_match(m, dstptr, size, COMPAT_TO_USER);
1430} 1370}
1431 1371
1432static int compat_copy_entry_to_user(struct ipt_entry *e, 1372static int compat_copy_entry_to_user(struct ipt_entry *e,
1433 void __user **dstptr, compat_uint_t *size) 1373 void * __user *dstptr, compat_uint_t *size)
1434{ 1374{
1435 struct ipt_entry_target __user *t; 1375 struct ipt_entry_target __user *t;
1436 struct compat_ipt_entry __user *ce; 1376 struct compat_ipt_entry __user *ce;
@@ -1450,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e,
1450 if (ret) 1390 if (ret)
1451 goto out; 1391 goto out;
1452 t = ipt_get_target(e); 1392 t = ipt_get_target(e);
1453 if (t->u.kernel.target->compat) 1393 ret = xt_compat_target_to_user(t, dstptr, size);
1454 ret = t->u.kernel.target->compat(t, dstptr, size,
1455 COMPAT_TO_USER);
1456 else
1457 ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER);
1458 if (ret) 1394 if (ret)
1459 goto out; 1395 goto out;
1460 ret = -EFAULT; 1396 ret = -EFAULT;
@@ -1486,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m,
1486 return match ? PTR_ERR(match) : -ENOENT; 1422 return match ? PTR_ERR(match) : -ENOENT;
1487 } 1423 }
1488 m->u.kernel.match = match; 1424 m->u.kernel.match = match;
1489 1425 *size += xt_compat_match_offset(match);
1490 if (m->u.kernel.match->compat)
1491 m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
1492 else
1493 xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
1494 1426
1495 (*i)++; 1427 (*i)++;
1496 return 0; 1428 return 0;
@@ -1537,7 +1469,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1537 ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, 1469 ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
1538 e->comefrom, &off, &j); 1470 e->comefrom, &off, &j);
1539 if (ret != 0) 1471 if (ret != 0)
1540 goto out; 1472 goto cleanup_matches;
1541 1473
1542 t = ipt_get_target(e); 1474 t = ipt_get_target(e);
1543 target = try_then_request_module(xt_find_target(AF_INET, 1475 target = try_then_request_module(xt_find_target(AF_INET,
@@ -1547,14 +1479,11 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1547 if (IS_ERR(target) || !target) { 1479 if (IS_ERR(target) || !target) {
1548 duprintf("check_entry: `%s' not found\n", t->u.user.name); 1480 duprintf("check_entry: `%s' not found\n", t->u.user.name);
1549 ret = target ? PTR_ERR(target) : -ENOENT; 1481 ret = target ? PTR_ERR(target) : -ENOENT;
1550 goto out; 1482 goto cleanup_matches;
1551 } 1483 }
1552 t->u.kernel.target = target; 1484 t->u.kernel.target = target;
1553 1485
1554 if (t->u.kernel.target->compat) 1486 off += xt_compat_target_offset(target);
1555 t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
1556 else
1557 xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
1558 *size += off; 1487 *size += off;
1559 ret = compat_add_offset(entry_offset, off); 1488 ret = compat_add_offset(entry_offset, off);
1560 if (ret) 1489 if (ret)
@@ -1574,14 +1503,17 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
1574 1503
1575 (*i)++; 1504 (*i)++;
1576 return 0; 1505 return 0;
1506
1577out: 1507out:
1508 module_put(t->u.kernel.target->me);
1509cleanup_matches:
1578 IPT_MATCH_ITERATE(e, cleanup_match, &j); 1510 IPT_MATCH_ITERATE(e, cleanup_match, &j);
1579 return ret; 1511 return ret;
1580} 1512}
1581 1513
1582static inline int compat_copy_match_from_user(struct ipt_entry_match *m, 1514static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
1583 void **dstptr, compat_uint_t *size, const char *name, 1515 void **dstptr, compat_uint_t *size, const char *name,
1584 const struct ipt_ip *ip, unsigned int hookmask) 1516 const struct ipt_ip *ip, unsigned int hookmask, int *i)
1585{ 1517{
1586 struct ipt_entry_match *dm; 1518 struct ipt_entry_match *dm;
1587 struct ipt_match *match; 1519 struct ipt_match *match;
@@ -1589,26 +1521,28 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
1589 1521
1590 dm = (struct ipt_entry_match *)*dstptr; 1522 dm = (struct ipt_entry_match *)*dstptr;
1591 match = m->u.kernel.match; 1523 match = m->u.kernel.match;
1592 if (match->compat) 1524 xt_compat_match_from_user(m, dstptr, size);
1593 match->compat(m, dstptr, size, COMPAT_FROM_USER);
1594 else
1595 xt_compat_match(m, dstptr, size, COMPAT_FROM_USER);
1596 1525
1597 ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), 1526 ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm),
1598 name, hookmask, ip->proto, 1527 name, hookmask, ip->proto,
1599 ip->invflags & IPT_INV_PROTO); 1528 ip->invflags & IPT_INV_PROTO);
1600 if (ret) 1529 if (ret)
1601 return ret; 1530 goto err;
1602 1531
1603 if (m->u.kernel.match->checkentry 1532 if (m->u.kernel.match->checkentry
1604 && !m->u.kernel.match->checkentry(name, ip, match, dm->data, 1533 && !m->u.kernel.match->checkentry(name, ip, match, dm->data,
1605 dm->u.match_size - sizeof(*dm),
1606 hookmask)) { 1534 hookmask)) {
1607 duprintf("ip_tables: check failed for `%s'.\n", 1535 duprintf("ip_tables: check failed for `%s'.\n",
1608 m->u.kernel.match->name); 1536 m->u.kernel.match->name);
1609 return -EINVAL; 1537 ret = -EINVAL;
1538 goto err;
1610 } 1539 }
1540 (*i)++;
1611 return 0; 1541 return 0;
1542
1543err:
1544 module_put(m->u.kernel.match->me);
1545 return ret;
1612} 1546}
1613 1547
1614static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, 1548static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
@@ -1619,25 +1553,23 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1619 struct ipt_target *target; 1553 struct ipt_target *target;
1620 struct ipt_entry *de; 1554 struct ipt_entry *de;
1621 unsigned int origsize; 1555 unsigned int origsize;
1622 int ret, h; 1556 int ret, h, j;
1623 1557
1624 ret = 0; 1558 ret = 0;
1625 origsize = *size; 1559 origsize = *size;
1626 de = (struct ipt_entry *)*dstptr; 1560 de = (struct ipt_entry *)*dstptr;
1627 memcpy(de, e, sizeof(struct ipt_entry)); 1561 memcpy(de, e, sizeof(struct ipt_entry));
1628 1562
1563 j = 0;
1629 *dstptr += sizeof(struct compat_ipt_entry); 1564 *dstptr += sizeof(struct compat_ipt_entry);
1630 ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, 1565 ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
1631 name, &de->ip, de->comefrom); 1566 name, &de->ip, de->comefrom, &j);
1632 if (ret) 1567 if (ret)
1633 goto out; 1568 goto cleanup_matches;
1634 de->target_offset = e->target_offset - (origsize - *size); 1569 de->target_offset = e->target_offset - (origsize - *size);
1635 t = ipt_get_target(e); 1570 t = ipt_get_target(e);
1636 target = t->u.kernel.target; 1571 target = t->u.kernel.target;
1637 if (target->compat) 1572 xt_compat_target_from_user(t, dstptr, size);
1638 target->compat(t, dstptr, size, COMPAT_FROM_USER);
1639 else
1640 xt_compat_target(t, dstptr, size, COMPAT_FROM_USER);
1641 1573
1642 de->next_offset = e->next_offset - (origsize - *size); 1574 de->next_offset = e->next_offset - (origsize - *size);
1643 for (h = 0; h < NF_IP_NUMHOOKS; h++) { 1575 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
@@ -1653,22 +1585,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
1653 name, e->comefrom, e->ip.proto, 1585 name, e->comefrom, e->ip.proto,
1654 e->ip.invflags & IPT_INV_PROTO); 1586 e->ip.invflags & IPT_INV_PROTO);
1655 if (ret) 1587 if (ret)
1656 goto out; 1588 goto err;
1657 1589
1658 ret = -EINVAL; 1590 ret = -EINVAL;
1659 if (t->u.kernel.target == &ipt_standard_target) { 1591 if (t->u.kernel.target == &ipt_standard_target) {
1660 if (!standard_check(t, *size)) 1592 if (!standard_check(t, *size))
1661 goto out; 1593 goto err;
1662 } else if (t->u.kernel.target->checkentry 1594 } else if (t->u.kernel.target->checkentry
1663 && !t->u.kernel.target->checkentry(name, de, target, 1595 && !t->u.kernel.target->checkentry(name, de, target,
1664 t->data, t->u.target_size - sizeof(*t), 1596 t->data, de->comefrom)) {
1665 de->comefrom)) {
1666 duprintf("ip_tables: compat: check failed for `%s'.\n", 1597 duprintf("ip_tables: compat: check failed for `%s'.\n",
1667 t->u.kernel.target->name); 1598 t->u.kernel.target->name);
1668 goto out; 1599 goto err;
1669 } 1600 }
1670 ret = 0; 1601 ret = 0;
1671out: 1602 return ret;
1603
1604err:
1605 module_put(t->u.kernel.target->me);
1606cleanup_matches:
1607 IPT_MATCH_ITERATE(e, cleanup_match, &j);
1672 return ret; 1608 return ret;
1673} 1609}
1674 1610
@@ -1989,6 +1925,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
1989 return ret; 1925 return ret;
1990} 1926}
1991 1927
1928static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1929
1992static int 1930static int
1993compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 1931compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1994{ 1932{
@@ -2002,8 +1940,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2002 ret = compat_get_entries(user, len); 1940 ret = compat_get_entries(user, len);
2003 break; 1941 break;
2004 default: 1942 default:
2005 duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd); 1943 ret = do_ipt_get_ctl(sk, cmd, user, len);
2006 ret = -EINVAL;
2007 } 1944 }
2008 return ret; 1945 return ret;
2009} 1946}
@@ -2185,7 +2122,6 @@ icmp_checkentry(const char *tablename,
2185 const void *info, 2122 const void *info,
2186 const struct xt_match *match, 2123 const struct xt_match *match,
2187 void *matchinfo, 2124 void *matchinfo,
2188 unsigned int matchsize,
2189 unsigned int hook_mask) 2125 unsigned int hook_mask)
2190{ 2126{
2191 const struct ipt_icmp *icmpinfo = matchinfo; 2127 const struct ipt_icmp *icmpinfo = matchinfo;
@@ -2200,7 +2136,9 @@ static struct ipt_target ipt_standard_target = {
2200 .targetsize = sizeof(int), 2136 .targetsize = sizeof(int),
2201 .family = AF_INET, 2137 .family = AF_INET,
2202#ifdef CONFIG_COMPAT 2138#ifdef CONFIG_COMPAT
2203 .compat = &compat_ipt_standard_fn, 2139 .compatsize = sizeof(compat_int_t),
2140 .compat_from_user = compat_standard_from_user,
2141 .compat_to_user = compat_standard_to_user,
2204#endif 2142#endif
2205}; 2143};
2206 2144
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d994c5f5744c..41589665fc5d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -302,8 +302,7 @@ target(struct sk_buff **pskb,
302 const struct net_device *out, 302 const struct net_device *out,
303 unsigned int hooknum, 303 unsigned int hooknum,
304 const struct xt_target *target, 304 const struct xt_target *target,
305 const void *targinfo, 305 const void *targinfo)
306 void *userinfo)
307{ 306{
308 const struct ipt_clusterip_tgt_info *cipinfo = targinfo; 307 const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
309 enum ip_conntrack_info ctinfo; 308 enum ip_conntrack_info ctinfo;
@@ -373,7 +372,6 @@ checkentry(const char *tablename,
373 const void *e_void, 372 const void *e_void,
374 const struct xt_target *target, 373 const struct xt_target *target,
375 void *targinfo, 374 void *targinfo,
376 unsigned int targinfosize,
377 unsigned int hook_mask) 375 unsigned int hook_mask)
378{ 376{
379 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 377 struct ipt_clusterip_tgt_info *cipinfo = targinfo;
@@ -450,8 +448,7 @@ checkentry(const char *tablename,
450} 448}
451 449
452/* drop reference count of cluster config when rule is deleted */ 450/* drop reference count of cluster config when rule is deleted */
453static void destroy(const struct xt_target *target, void *targinfo, 451static void destroy(const struct xt_target *target, void *targinfo)
454 unsigned int targinfosize)
455{ 452{
456 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 453 struct ipt_clusterip_tgt_info *cipinfo = targinfo;
457 454
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
deleted file mode 100644
index c8e971288dfe..000000000000
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ /dev/null
@@ -1,96 +0,0 @@
1/* iptables module for setting the IPv4 DSCP field, Version 1.8
2 *
3 * (C) 2002 by Harald Welte <laforge@netfilter.org>
4 * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * See RFC2474 for a description of the DSCP field within the IP Header.
11 *
12 * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
13*/
14
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/ip.h>
18#include <net/checksum.h>
19
20#include <linux/netfilter_ipv4/ip_tables.h>
21#include <linux/netfilter_ipv4/ipt_DSCP.h>
22
23MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
24MODULE_DESCRIPTION("iptables DSCP modification module");
25MODULE_LICENSE("GPL");
26
27static unsigned int
28target(struct sk_buff **pskb,
29 const struct net_device *in,
30 const struct net_device *out,
31 unsigned int hooknum,
32 const struct xt_target *target,
33 const void *targinfo,
34 void *userinfo)
35{
36 const struct ipt_DSCP_info *dinfo = targinfo;
37 u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
38
39
40 if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
41 u_int16_t diffs[2];
42
43 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
44 return NF_DROP;
45
46 diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
47 (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK)
48 | sh_dscp;
49 diffs[1] = htons((*pskb)->nh.iph->tos);
50 (*pskb)->nh.iph->check
51 = csum_fold(csum_partial((char *)diffs,
52 sizeof(diffs),
53 (*pskb)->nh.iph->check
54 ^ 0xFFFF));
55 }
56 return IPT_CONTINUE;
57}
58
59static int
60checkentry(const char *tablename,
61 const void *e_void,
62 const struct xt_target *target,
63 void *targinfo,
64 unsigned int targinfosize,
65 unsigned int hook_mask)
66{
67 const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp;
68
69 if ((dscp > IPT_DSCP_MAX)) {
70 printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
71 return 0;
72 }
73 return 1;
74}
75
76static struct ipt_target ipt_dscp_reg = {
77 .name = "DSCP",
78 .target = target,
79 .targetsize = sizeof(struct ipt_DSCP_info),
80 .table = "mangle",
81 .checkentry = checkentry,
82 .me = THIS_MODULE,
83};
84
85static int __init ipt_dscp_init(void)
86{
87 return ipt_register_target(&ipt_dscp_reg);
88}
89
90static void __exit ipt_dscp_fini(void)
91{
92 ipt_unregister_target(&ipt_dscp_reg);
93}
94
95module_init(ipt_dscp_init);
96module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4adf5c9d34f5..23f9c7ebe7eb 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -27,32 +27,28 @@ MODULE_DESCRIPTION("iptables ECN modification module");
27static inline int 27static inline int
28set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) 28set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
29{ 29{
30 if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK) 30 struct iphdr *iph = (*pskb)->nh.iph;
31 != (einfo->ip_ect & IPT_ECN_IP_MASK)) { 31 u_int16_t oldtos;
32 u_int16_t diffs[2];
33 32
33 if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
34 if (!skb_make_writable(pskb, sizeof(struct iphdr))) 34 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
35 return 0; 35 return 0;
36 36 iph = (*pskb)->nh.iph;
37 diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; 37 oldtos = iph->tos;
38 (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK; 38 iph->tos &= ~IPT_ECN_IP_MASK;
39 (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); 39 iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
40 diffs[1] = htons((*pskb)->nh.iph->tos); 40 iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
41 (*pskb)->nh.iph->check 41 iph->check);
42 = csum_fold(csum_partial((char *)diffs,
43 sizeof(diffs),
44 (*pskb)->nh.iph->check
45 ^0xFFFF));
46 } 42 }
47 return 1; 43 return 1;
48} 44}
49 45
50/* Return 0 if there was an error. */ 46/* Return 0 if there was an error. */
51static inline int 47static inline int
52set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) 48set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
53{ 49{
54 struct tcphdr _tcph, *tcph; 50 struct tcphdr _tcph, *tcph;
55 u_int16_t diffs[2]; 51 u_int16_t oldval;
56 52
57 /* Not enought header? */ 53 /* Not enought header? */
58 tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, 54 tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
@@ -70,22 +66,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
70 return 0; 66 return 0;
71 tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; 67 tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
72 68
73 if ((*pskb)->ip_summed == CHECKSUM_HW && 69 oldval = ((u_int16_t *)tcph)[6];
74 skb_checksum_help(*pskb, inward))
75 return 0;
76
77 diffs[0] = ((u_int16_t *)tcph)[6];
78 if (einfo->operation & IPT_ECN_OP_SET_ECE) 70 if (einfo->operation & IPT_ECN_OP_SET_ECE)
79 tcph->ece = einfo->proto.tcp.ece; 71 tcph->ece = einfo->proto.tcp.ece;
80 if (einfo->operation & IPT_ECN_OP_SET_CWR) 72 if (einfo->operation & IPT_ECN_OP_SET_CWR)
81 tcph->cwr = einfo->proto.tcp.cwr; 73 tcph->cwr = einfo->proto.tcp.cwr;
82 diffs[1] = ((u_int16_t *)tcph)[6];
83 diffs[0] = diffs[0] ^ 0xFFFF;
84 74
85 if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) 75 tcph->check = nf_proto_csum_update((*pskb),
86 tcph->check = csum_fold(csum_partial((char *)diffs, 76 oldval ^ 0xFFFF,
87 sizeof(diffs), 77 ((u_int16_t *)tcph)[6],
88 tcph->check^0xFFFF)); 78 tcph->check, 0);
89 return 1; 79 return 1;
90} 80}
91 81
@@ -95,8 +85,7 @@ target(struct sk_buff **pskb,
95 const struct net_device *out, 85 const struct net_device *out,
96 unsigned int hooknum, 86 unsigned int hooknum,
97 const struct xt_target *target, 87 const struct xt_target *target,
98 const void *targinfo, 88 const void *targinfo)
99 void *userinfo)
100{ 89{
101 const struct ipt_ECN_info *einfo = targinfo; 90 const struct ipt_ECN_info *einfo = targinfo;
102 91
@@ -106,7 +95,7 @@ target(struct sk_buff **pskb,
106 95
107 if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) 96 if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
108 && (*pskb)->nh.iph->protocol == IPPROTO_TCP) 97 && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
109 if (!set_ect_tcp(pskb, einfo, (out == NULL))) 98 if (!set_ect_tcp(pskb, einfo))
110 return NF_DROP; 99 return NF_DROP;
111 100
112 return IPT_CONTINUE; 101 return IPT_CONTINUE;
@@ -117,7 +106,6 @@ checkentry(const char *tablename,
117 const void *e_void, 106 const void *e_void,
118 const struct xt_target *target, 107 const struct xt_target *target,
119 void *targinfo, 108 void *targinfo,
120 unsigned int targinfosize,
121 unsigned int hook_mask) 109 unsigned int hook_mask)
122{ 110{
123 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; 111 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b98f7b08b084..7dc820df8bc5 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb,
416 const struct net_device *out, 416 const struct net_device *out,
417 unsigned int hooknum, 417 unsigned int hooknum,
418 const struct xt_target *target, 418 const struct xt_target *target,
419 const void *targinfo, 419 const void *targinfo)
420 void *userinfo)
421{ 420{
422 const struct ipt_log_info *loginfo = targinfo; 421 const struct ipt_log_info *loginfo = targinfo;
423 struct nf_loginfo li; 422 struct nf_loginfo li;
@@ -440,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename,
440 const void *e, 439 const void *e,
441 const struct xt_target *target, 440 const struct xt_target *target,
442 void *targinfo, 441 void *targinfo,
443 unsigned int targinfosize,
444 unsigned int hook_mask) 442 unsigned int hook_mask)
445{ 443{
446 const struct ipt_log_info *loginfo = targinfo; 444 const struct ipt_log_info *loginfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ebd94f2abf0d..bc65168a3437 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -42,7 +42,6 @@ masquerade_check(const char *tablename,
42 const void *e, 42 const void *e,
43 const struct xt_target *target, 43 const struct xt_target *target,
44 void *targinfo, 44 void *targinfo,
45 unsigned int targinfosize,
46 unsigned int hook_mask) 45 unsigned int hook_mask)
47{ 46{
48 const struct ip_nat_multi_range_compat *mr = targinfo; 47 const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -64,8 +63,7 @@ masquerade_target(struct sk_buff **pskb,
64 const struct net_device *out, 63 const struct net_device *out,
65 unsigned int hooknum, 64 unsigned int hooknum,
66 const struct xt_target *target, 65 const struct xt_target *target,
67 const void *targinfo, 66 const void *targinfo)
68 void *userinfo)
69{ 67{
70 struct ip_conntrack *ct; 68 struct ip_conntrack *ct;
71 enum ip_conntrack_info ctinfo; 69 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 736c4b5a86a7..beb2914225ff 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -33,7 +33,6 @@ check(const char *tablename,
33 const void *e, 33 const void *e,
34 const struct xt_target *target, 34 const struct xt_target *target,
35 void *targinfo, 35 void *targinfo,
36 unsigned int targinfosize,
37 unsigned int hook_mask) 36 unsigned int hook_mask)
38{ 37{
39 const struct ip_nat_multi_range_compat *mr = targinfo; 38 const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -55,8 +54,7 @@ target(struct sk_buff **pskb,
55 const struct net_device *out, 54 const struct net_device *out,
56 unsigned int hooknum, 55 unsigned int hooknum,
57 const struct xt_target *target, 56 const struct xt_target *target,
58 const void *targinfo, 57 const void *targinfo)
59 void *userinfo)
60{ 58{
61 struct ip_conntrack *ct; 59 struct ip_conntrack *ct;
62 enum ip_conntrack_info ctinfo; 60 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f290463232de..f03d43671c6d 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -36,7 +36,6 @@ redirect_check(const char *tablename,
36 const void *e, 36 const void *e,
37 const struct xt_target *target, 37 const struct xt_target *target,
38 void *targinfo, 38 void *targinfo,
39 unsigned int targinfosize,
40 unsigned int hook_mask) 39 unsigned int hook_mask)
41{ 40{
42 const struct ip_nat_multi_range_compat *mr = targinfo; 41 const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -58,8 +57,7 @@ redirect_target(struct sk_buff **pskb,
58 const struct net_device *out, 57 const struct net_device *out,
59 unsigned int hooknum, 58 unsigned int hooknum,
60 const struct xt_target *target, 59 const struct xt_target *target,
61 const void *targinfo, 60 const void *targinfo)
62 void *userinfo)
63{ 61{
64 struct ip_conntrack *ct; 62 struct ip_conntrack *ct;
65 enum ip_conntrack_info ctinfo; 63 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 269bc2067cb8..b81821edd893 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb,
90 fl.proto = IPPROTO_TCP; 90 fl.proto = IPPROTO_TCP;
91 fl.fl_ip_sport = tcph->dest; 91 fl.fl_ip_sport = tcph->dest;
92 fl.fl_ip_dport = tcph->source; 92 fl.fl_ip_dport = tcph->source;
93 security_skb_classify_flow(skb, &fl);
93 94
94 xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); 95 xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
95 96
@@ -184,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
184 tcph->urg_ptr = 0; 185 tcph->urg_ptr = 0;
185 186
186 /* Adjust TCP checksum */ 187 /* Adjust TCP checksum */
188 nskb->ip_summed = CHECKSUM_NONE;
187 tcph->check = 0; 189 tcph->check = 0;
188 tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), 190 tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
189 nskb->nh.iph->saddr, 191 nskb->nh.iph->saddr,
@@ -226,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb,
226 const struct net_device *out, 228 const struct net_device *out,
227 unsigned int hooknum, 229 unsigned int hooknum,
228 const struct xt_target *target, 230 const struct xt_target *target,
229 const void *targinfo, 231 const void *targinfo)
230 void *userinfo)
231{ 232{
232 const struct ipt_reject_info *reject = targinfo; 233 const struct ipt_reject_info *reject = targinfo;
233 234
@@ -275,7 +276,6 @@ static int check(const char *tablename,
275 const void *e_void, 276 const void *e_void,
276 const struct xt_target *target, 277 const struct xt_target *target,
277 void *targinfo, 278 void *targinfo,
278 unsigned int targinfosize,
279 unsigned int hook_mask) 279 unsigned int hook_mask)
280{ 280{
281 const struct ipt_reject_info *rejinfo = targinfo; 281 const struct ipt_reject_info *rejinfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7169b09b5a67..efbcb1198832 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -52,7 +52,6 @@ same_check(const char *tablename,
52 const void *e, 52 const void *e,
53 const struct xt_target *target, 53 const struct xt_target *target,
54 void *targinfo, 54 void *targinfo,
55 unsigned int targinfosize,
56 unsigned int hook_mask) 55 unsigned int hook_mask)
57{ 56{
58 unsigned int count, countess, rangeip, index = 0; 57 unsigned int count, countess, rangeip, index = 0;
@@ -116,8 +115,7 @@ same_check(const char *tablename,
116} 115}
117 116
118static void 117static void
119same_destroy(const struct xt_target *target, void *targinfo, 118same_destroy(const struct xt_target *target, void *targinfo)
120 unsigned int targinfosize)
121{ 119{
122 struct ipt_same_info *mr = targinfo; 120 struct ipt_same_info *mr = targinfo;
123 121
@@ -133,8 +131,7 @@ same_target(struct sk_buff **pskb,
133 const struct net_device *out, 131 const struct net_device *out,
134 unsigned int hooknum, 132 unsigned int hooknum,
135 const struct xt_target *target, 133 const struct xt_target *target,
136 const void *targinfo, 134 const void *targinfo)
137 void *userinfo)
138{ 135{
139 struct ip_conntrack *ct; 136 struct ip_conntrack *ct;
140 enum ip_conntrack_info ctinfo; 137 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index ef2fe5b3f0d8..4246c4321e5b 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -21,26 +21,14 @@ MODULE_LICENSE("GPL");
21MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 21MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
22MODULE_DESCRIPTION("iptables TCP MSS modification module"); 22MODULE_DESCRIPTION("iptables TCP MSS modification module");
23 23
24#if 0
25#define DEBUGP printk
26#else
27#define DEBUGP(format, args...)
28#endif
29
30static u_int16_t
31cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
32{
33 u_int32_t diffs[] = { oldvalinv, newval };
34 return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
35 oldcheck^0xFFFF));
36}
37
38static inline unsigned int 24static inline unsigned int
39optlen(const u_int8_t *opt, unsigned int offset) 25optlen(const u_int8_t *opt, unsigned int offset)
40{ 26{
41 /* Beware zero-length options: make finite progress */ 27 /* Beware zero-length options: make finite progress */
42 if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1; 28 if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
43 else return opt[offset+1]; 29 return 1;
30 else
31 return opt[offset+1];
44} 32}
45 33
46static unsigned int 34static unsigned int
@@ -49,8 +37,7 @@ ipt_tcpmss_target(struct sk_buff **pskb,
49 const struct net_device *out, 37 const struct net_device *out,
50 unsigned int hooknum, 38 unsigned int hooknum,
51 const struct xt_target *target, 39 const struct xt_target *target,
52 const void *targinfo, 40 const void *targinfo)
53 void *userinfo)
54{ 41{
55 const struct ipt_tcpmss_info *tcpmssinfo = targinfo; 42 const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
56 struct tcphdr *tcph; 43 struct tcphdr *tcph;
@@ -62,13 +49,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
62 if (!skb_make_writable(pskb, (*pskb)->len)) 49 if (!skb_make_writable(pskb, (*pskb)->len))
63 return NF_DROP; 50 return NF_DROP;
64 51
65 if ((*pskb)->ip_summed == CHECKSUM_HW &&
66 skb_checksum_help(*pskb, out == NULL))
67 return NF_DROP;
68
69 iph = (*pskb)->nh.iph; 52 iph = (*pskb)->nh.iph;
70 tcplen = (*pskb)->len - iph->ihl*4; 53 tcplen = (*pskb)->len - iph->ihl*4;
71
72 tcph = (void *)iph + iph->ihl*4; 54 tcph = (void *)iph + iph->ihl*4;
73 55
74 /* Since it passed flags test in tcp match, we know it is is 56 /* Since it passed flags test in tcp match, we know it is is
@@ -84,54 +66,41 @@ ipt_tcpmss_target(struct sk_buff **pskb,
84 return NF_DROP; 66 return NF_DROP;
85 } 67 }
86 68
87 if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { 69 if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
88 if(!(*pskb)->dst) { 70 if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) +
71 sizeof(struct tcphdr)) {
89 if (net_ratelimit()) 72 if (net_ratelimit())
90 printk(KERN_ERR 73 printk(KERN_ERR "ipt_tcpmss_target: "
91 "ipt_tcpmss_target: no dst?! can't determine path-MTU\n"); 74 "unknown or invalid path-MTU (%d)\n",
75 dst_mtu((*pskb)->dst));
92 return NF_DROP; /* or IPT_CONTINUE ?? */ 76 return NF_DROP; /* or IPT_CONTINUE ?? */
93 } 77 }
94 78
95 if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) { 79 newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) -
96 if (net_ratelimit()) 80 sizeof(struct tcphdr);
97 printk(KERN_ERR
98 "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst));
99 return NF_DROP; /* or IPT_CONTINUE ?? */
100 }
101
102 newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
103 } else 81 } else
104 newmss = tcpmssinfo->mss; 82 newmss = tcpmssinfo->mss;
105 83
106 opt = (u_int8_t *)tcph; 84 opt = (u_int8_t *)tcph;
107 for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){ 85 for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
108 if ((opt[i] == TCPOPT_MSS) && 86 if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
109 ((tcph->doff*4 - i) >= TCPOLEN_MSS) && 87 opt[i+1] == TCPOLEN_MSS) {
110 (opt[i+1] == TCPOLEN_MSS)) {
111 u_int16_t oldmss; 88 u_int16_t oldmss;
112 89
113 oldmss = (opt[i+2] << 8) | opt[i+3]; 90 oldmss = (opt[i+2] << 8) | opt[i+3];
114 91
115 if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && 92 if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
116 (oldmss <= newmss)) 93 oldmss <= newmss)
117 return IPT_CONTINUE; 94 return IPT_CONTINUE;
118 95
119 opt[i+2] = (newmss & 0xff00) >> 8; 96 opt[i+2] = (newmss & 0xff00) >> 8;
120 opt[i+3] = (newmss & 0x00ff); 97 opt[i+3] = (newmss & 0x00ff);
121 98
122 tcph->check = cheat_check(htons(oldmss)^0xFFFF, 99 tcph->check = nf_proto_csum_update(*pskb,
123 htons(newmss), 100 htons(oldmss)^0xFFFF,
124 tcph->check); 101 htons(newmss),
125 102 tcph->check, 0);
126 DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" 103 return IPT_CONTINUE;
127 "->%u.%u.%u.%u:%hu changed TCP MSS option"
128 " (from %u to %u)\n",
129 NIPQUAD((*pskb)->nh.iph->saddr),
130 ntohs(tcph->source),
131 NIPQUAD((*pskb)->nh.iph->daddr),
132 ntohs(tcph->dest),
133 oldmss, newmss);
134 goto retmodified;
135 } 104 }
136 } 105 }
137 106
@@ -143,13 +112,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
143 112
144 newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), 113 newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
145 TCPOLEN_MSS, GFP_ATOMIC); 114 TCPOLEN_MSS, GFP_ATOMIC);
146 if (!newskb) { 115 if (!newskb)
147 if (net_ratelimit())
148 printk(KERN_ERR "ipt_tcpmss_target:"
149 " unable to allocate larger skb\n");
150 return NF_DROP; 116 return NF_DROP;
151 }
152
153 kfree_skb(*pskb); 117 kfree_skb(*pskb);
154 *pskb = newskb; 118 *pskb = newskb;
155 iph = (*pskb)->nh.iph; 119 iph = (*pskb)->nh.iph;
@@ -161,36 +125,29 @@ ipt_tcpmss_target(struct sk_buff **pskb,
161 opt = (u_int8_t *)tcph + sizeof(struct tcphdr); 125 opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
162 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); 126 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
163 127
164 tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, 128 tcph->check = nf_proto_csum_update(*pskb,
165 htons(tcplen + TCPOLEN_MSS), tcph->check); 129 htons(tcplen) ^ 0xFFFF,
166 tcplen += TCPOLEN_MSS; 130 htons(tcplen + TCPOLEN_MSS),
167 131 tcph->check, 1);
168 opt[0] = TCPOPT_MSS; 132 opt[0] = TCPOPT_MSS;
169 opt[1] = TCPOLEN_MSS; 133 opt[1] = TCPOLEN_MSS;
170 opt[2] = (newmss & 0xff00) >> 8; 134 opt[2] = (newmss & 0xff00) >> 8;
171 opt[3] = (newmss & 0x00ff); 135 opt[3] = (newmss & 0x00ff);
172 136
173 tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); 137 tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt),
138 tcph->check, 0);
174 139
175 oldval = ((u_int16_t *)tcph)[6]; 140 oldval = ((u_int16_t *)tcph)[6];
176 tcph->doff += TCPOLEN_MSS/4; 141 tcph->doff += TCPOLEN_MSS/4;
177 tcph->check = cheat_check(oldval ^ 0xFFFF, 142 tcph->check = nf_proto_csum_update(*pskb,
178 ((u_int16_t *)tcph)[6], tcph->check); 143 oldval ^ 0xFFFF,
144 ((u_int16_t *)tcph)[6],
145 tcph->check, 0);
179 146
180 newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); 147 newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
181 iph->check = cheat_check(iph->tot_len ^ 0xFFFF, 148 iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF,
182 newtotlen, iph->check); 149 newtotlen, iph->check);
183 iph->tot_len = newtotlen; 150 iph->tot_len = newtotlen;
184
185 DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
186 "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n",
187 NIPQUAD((*pskb)->nh.iph->saddr),
188 ntohs(tcph->source),
189 NIPQUAD((*pskb)->nh.iph->daddr),
190 ntohs(tcph->dest),
191 newmss);
192
193 retmodified:
194 return IPT_CONTINUE; 151 return IPT_CONTINUE;
195} 152}
196 153
@@ -200,9 +157,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m)
200{ 157{
201 const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; 158 const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
202 159
203 if (strcmp(m->u.kernel.match->name, "tcp") == 0 160 if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
204 && (tcpinfo->flg_cmp & TH_SYN) 161 tcpinfo->flg_cmp & TH_SYN &&
205 && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) 162 !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
206 return 1; 163 return 1;
207 164
208 return 0; 165 return 0;
@@ -214,17 +171,17 @@ ipt_tcpmss_checkentry(const char *tablename,
214 const void *e_void, 171 const void *e_void,
215 const struct xt_target *target, 172 const struct xt_target *target,
216 void *targinfo, 173 void *targinfo,
217 unsigned int targinfosize,
218 unsigned int hook_mask) 174 unsigned int hook_mask)
219{ 175{
220 const struct ipt_tcpmss_info *tcpmssinfo = targinfo; 176 const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
221 const struct ipt_entry *e = e_void; 177 const struct ipt_entry *e = e_void;
222 178
223 if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && 179 if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
224 ((hook_mask & ~((1 << NF_IP_FORWARD) 180 (hook_mask & ~((1 << NF_IP_FORWARD) |
225 | (1 << NF_IP_LOCAL_OUT) 181 (1 << NF_IP_LOCAL_OUT) |
226 | (1 << NF_IP_POST_ROUTING))) != 0)) { 182 (1 << NF_IP_POST_ROUTING))) != 0) {
227 printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); 183 printk("TCPMSS: path-MTU clamping only supported in "
184 "FORWARD, OUTPUT and POSTROUTING hooks\n");
228 return 0; 185 return 0;
229 } 186 }
230 187
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 1c7a5ca399b3..471a4c438b0a 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -26,27 +26,20 @@ target(struct sk_buff **pskb,
26 const struct net_device *out, 26 const struct net_device *out,
27 unsigned int hooknum, 27 unsigned int hooknum,
28 const struct xt_target *target, 28 const struct xt_target *target,
29 const void *targinfo, 29 const void *targinfo)
30 void *userinfo)
31{ 30{
32 const struct ipt_tos_target_info *tosinfo = targinfo; 31 const struct ipt_tos_target_info *tosinfo = targinfo;
32 struct iphdr *iph = (*pskb)->nh.iph;
33 u_int16_t oldtos;
33 34
34 if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { 35 if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
35 u_int16_t diffs[2];
36
37 if (!skb_make_writable(pskb, sizeof(struct iphdr))) 36 if (!skb_make_writable(pskb, sizeof(struct iphdr)))
38 return NF_DROP; 37 return NF_DROP;
39 38 iph = (*pskb)->nh.iph;
40 diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; 39 oldtos = iph->tos;
41 (*pskb)->nh.iph->tos 40 iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
42 = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK) 41 iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
43 | tosinfo->tos; 42 iph->check);
44 diffs[1] = htons((*pskb)->nh.iph->tos);
45 (*pskb)->nh.iph->check
46 = csum_fold(csum_partial((char *)diffs,
47 sizeof(diffs),
48 (*pskb)->nh.iph->check
49 ^0xFFFF));
50 } 43 }
51 return IPT_CONTINUE; 44 return IPT_CONTINUE;
52} 45}
@@ -56,7 +49,6 @@ checkentry(const char *tablename,
56 const void *e_void, 49 const void *e_void,
57 const struct xt_target *target, 50 const struct xt_target *target,
58 void *targinfo, 51 void *targinfo,
59 unsigned int targinfosize,
60 unsigned int hook_mask) 52 unsigned int hook_mask)
61{ 53{
62 const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; 54 const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index f48892ae0be5..96e79cc6d0f2 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -23,11 +23,10 @@ static unsigned int
23ipt_ttl_target(struct sk_buff **pskb, 23ipt_ttl_target(struct sk_buff **pskb,
24 const struct net_device *in, const struct net_device *out, 24 const struct net_device *in, const struct net_device *out,
25 unsigned int hooknum, const struct xt_target *target, 25 unsigned int hooknum, const struct xt_target *target,
26 const void *targinfo, void *userinfo) 26 const void *targinfo)
27{ 27{
28 struct iphdr *iph; 28 struct iphdr *iph;
29 const struct ipt_TTL_info *info = targinfo; 29 const struct ipt_TTL_info *info = targinfo;
30 u_int16_t diffs[2];
31 int new_ttl; 30 int new_ttl;
32 31
33 if (!skb_make_writable(pskb, (*pskb)->len)) 32 if (!skb_make_writable(pskb, (*pskb)->len))
@@ -55,12 +54,10 @@ ipt_ttl_target(struct sk_buff **pskb,
55 } 54 }
56 55
57 if (new_ttl != iph->ttl) { 56 if (new_ttl != iph->ttl) {
58 diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; 57 iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF,
58 ntohs(new_ttl << 8),
59 iph->check);
59 iph->ttl = new_ttl; 60 iph->ttl = new_ttl;
60 diffs[1] = htons(((unsigned)iph->ttl) << 8);
61 iph->check = csum_fold(csum_partial((char *)diffs,
62 sizeof(diffs),
63 iph->check^0xFFFF));
64 } 61 }
65 62
66 return IPT_CONTINUE; 63 return IPT_CONTINUE;
@@ -70,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename,
70 const void *e, 67 const void *e,
71 const struct xt_target *target, 68 const struct xt_target *target,
72 void *targinfo, 69 void *targinfo,
73 unsigned int targinfosize,
74 unsigned int hook_mask) 70 unsigned int hook_mask)
75{ 71{
76 struct ipt_TTL_info *info = targinfo; 72 struct ipt_TTL_info *info = targinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d46fd677fa11..2b104ea54f48 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -308,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
308 const struct net_device *out, 308 const struct net_device *out,
309 unsigned int hooknum, 309 unsigned int hooknum,
310 const struct xt_target *target, 310 const struct xt_target *target,
311 const void *targinfo, void *userinfo) 311 const void *targinfo)
312{ 312{
313 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; 313 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
314 314
@@ -346,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename,
346 const void *e, 346 const void *e,
347 const struct xt_target *target, 347 const struct xt_target *target,
348 void *targinfo, 348 void *targinfo,
349 unsigned int targinfosize,
350 unsigned int hookmask) 349 unsigned int hookmask)
351{ 350{
352 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; 351 struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 2927135873d7..1798f86bc534 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -74,7 +74,6 @@ checkentry(const char *tablename,
74 const void *ip_void, 74 const void *ip_void,
75 const struct xt_match *match, 75 const struct xt_match *match,
76 void *matchinfo, 76 void *matchinfo,
77 unsigned int matchinfosize,
78 unsigned int hook_mask) 77 unsigned int hook_mask)
79{ 78{
80 const struct ipt_ah *ahinfo = matchinfo; 79 const struct ipt_ah *ahinfo = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
deleted file mode 100644
index 47177591aeb6..000000000000
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ /dev/null
@@ -1,54 +0,0 @@
1/* IP tables module for matching the value of the IPv4 DSCP field
2 *
3 * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
4 *
5 * (C) 2002 by Harald Welte <laforge@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/module.h>
13#include <linux/skbuff.h>
14
15#include <linux/netfilter_ipv4/ipt_dscp.h>
16#include <linux/netfilter_ipv4/ip_tables.h>
17
18MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
19MODULE_DESCRIPTION("iptables DSCP matching module");
20MODULE_LICENSE("GPL");
21
22static int match(const struct sk_buff *skb,
23 const struct net_device *in, const struct net_device *out,
24 const struct xt_match *match, const void *matchinfo,
25 int offset, unsigned int protoff, int *hotdrop)
26{
27 const struct ipt_dscp_info *info = matchinfo;
28 const struct iphdr *iph = skb->nh.iph;
29
30 u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
31
32 return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
33}
34
35static struct ipt_match dscp_match = {
36 .name = "dscp",
37 .match = match,
38 .matchsize = sizeof(struct ipt_dscp_info),
39 .me = THIS_MODULE,
40};
41
42static int __init ipt_dscp_init(void)
43{
44 return ipt_register_match(&dscp_match);
45}
46
47static void __exit ipt_dscp_fini(void)
48{
49 ipt_unregister_match(&dscp_match);
50
51}
52
53module_init(ipt_dscp_init);
54module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b28250414933..dafbdec0efc0 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb,
88 88
89static int checkentry(const char *tablename, const void *ip_void, 89static int checkentry(const char *tablename, const void *ip_void,
90 const struct xt_match *match, 90 const struct xt_match *match,
91 void *matchinfo, unsigned int matchsize, 91 void *matchinfo, unsigned int hook_mask)
92 unsigned int hook_mask)
93{ 92{
94 const struct ipt_ecn_info *info = matchinfo; 93 const struct ipt_ecn_info *info = matchinfo;
95 const struct ipt_ip *ip = ip_void; 94 const struct ipt_ip *ip = ip_void;
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 3bd2368e1fc9..4f73a61aa3dd 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -478,7 +478,6 @@ hashlimit_checkentry(const char *tablename,
478 const void *inf, 478 const void *inf,
479 const struct xt_match *match, 479 const struct xt_match *match,
480 void *matchinfo, 480 void *matchinfo,
481 unsigned int matchsize,
482 unsigned int hook_mask) 481 unsigned int hook_mask)
483{ 482{
484 struct ipt_hashlimit_info *r = matchinfo; 483 struct ipt_hashlimit_info *r = matchinfo;
@@ -529,18 +528,46 @@ hashlimit_checkentry(const char *tablename,
529} 528}
530 529
531static void 530static void
532hashlimit_destroy(const struct xt_match *match, void *matchinfo, 531hashlimit_destroy(const struct xt_match *match, void *matchinfo)
533 unsigned int matchsize)
534{ 532{
535 struct ipt_hashlimit_info *r = matchinfo; 533 struct ipt_hashlimit_info *r = matchinfo;
536 534
537 htable_put(r->hinfo); 535 htable_put(r->hinfo);
538} 536}
539 537
538#ifdef CONFIG_COMPAT
539struct compat_ipt_hashlimit_info {
540 char name[IFNAMSIZ];
541 struct hashlimit_cfg cfg;
542 compat_uptr_t hinfo;
543 compat_uptr_t master;
544};
545
546static void compat_from_user(void *dst, void *src)
547{
548 int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
549
550 memcpy(dst, src, off);
551 memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off);
552}
553
554static int compat_to_user(void __user *dst, void *src)
555{
556 int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
557
558 return copy_to_user(dst, src, off) ? -EFAULT : 0;
559}
560#endif
561
540static struct ipt_match ipt_hashlimit = { 562static struct ipt_match ipt_hashlimit = {
541 .name = "hashlimit", 563 .name = "hashlimit",
542 .match = hashlimit_match, 564 .match = hashlimit_match,
543 .matchsize = sizeof(struct ipt_hashlimit_info), 565 .matchsize = sizeof(struct ipt_hashlimit_info),
566#ifdef CONFIG_COMPAT
567 .compatsize = sizeof(struct compat_ipt_hashlimit_info),
568 .compat_from_user = compat_from_user,
569 .compat_to_user = compat_to_user,
570#endif
544 .checkentry = hashlimit_checkentry, 571 .checkentry = hashlimit_checkentry,
545 .destroy = hashlimit_destroy, 572 .destroy = hashlimit_destroy,
546 .me = THIS_MODULE 573 .me = THIS_MODULE
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 5ac6ac023b5e..78c336f12a9e 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -56,7 +56,6 @@ checkentry(const char *tablename,
56 const void *ip, 56 const void *ip,
57 const struct xt_match *match, 57 const struct xt_match *match,
58 void *matchinfo, 58 void *matchinfo,
59 unsigned int matchsize,
60 unsigned int hook_mask) 59 unsigned int hook_mask)
61{ 60{
62 const struct ipt_owner_info *info = matchinfo; 61 const struct ipt_owner_info *info = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 61a2139f9cfd..32ae8d7ac506 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -35,14 +35,20 @@ static unsigned int ip_list_tot = 100;
35static unsigned int ip_pkt_list_tot = 20; 35static unsigned int ip_pkt_list_tot = 20;
36static unsigned int ip_list_hash_size = 0; 36static unsigned int ip_list_hash_size = 0;
37static unsigned int ip_list_perms = 0644; 37static unsigned int ip_list_perms = 0644;
38static unsigned int ip_list_uid = 0;
39static unsigned int ip_list_gid = 0;
38module_param(ip_list_tot, uint, 0400); 40module_param(ip_list_tot, uint, 0400);
39module_param(ip_pkt_list_tot, uint, 0400); 41module_param(ip_pkt_list_tot, uint, 0400);
40module_param(ip_list_hash_size, uint, 0400); 42module_param(ip_list_hash_size, uint, 0400);
41module_param(ip_list_perms, uint, 0400); 43module_param(ip_list_perms, uint, 0400);
44module_param(ip_list_uid, uint, 0400);
45module_param(ip_list_gid, uint, 0400);
42MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); 46MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
43MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); 47MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
44MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); 48MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
45MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); 49MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
50MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
51MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
46 52
47 53
48struct recent_entry { 54struct recent_entry {
@@ -232,7 +238,7 @@ out:
232static int 238static int
233ipt_recent_checkentry(const char *tablename, const void *ip, 239ipt_recent_checkentry(const char *tablename, const void *ip,
234 const struct xt_match *match, void *matchinfo, 240 const struct xt_match *match, void *matchinfo,
235 unsigned int matchsize, unsigned int hook_mask) 241 unsigned int hook_mask)
236{ 242{
237 const struct ipt_recent_info *info = matchinfo; 243 const struct ipt_recent_info *info = matchinfo;
238 struct recent_table *t; 244 struct recent_table *t;
@@ -274,6 +280,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip,
274 goto out; 280 goto out;
275 } 281 }
276 t->proc->proc_fops = &recent_fops; 282 t->proc->proc_fops = &recent_fops;
283 t->proc->uid = ip_list_uid;
284 t->proc->gid = ip_list_gid;
277 t->proc->data = t; 285 t->proc->data = t;
278#endif 286#endif
279 spin_lock_bh(&recent_lock); 287 spin_lock_bh(&recent_lock);
@@ -286,8 +294,7 @@ out:
286} 294}
287 295
288static void 296static void
289ipt_recent_destroy(const struct xt_match *match, void *matchinfo, 297ipt_recent_destroy(const struct xt_match *match, void *matchinfo)
290 unsigned int matchsize)
291{ 298{
292 const struct ipt_recent_info *info = matchinfo; 299 const struct ipt_recent_info *info = matchinfo;
293 struct recent_table *t; 300 struct recent_table *t;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7f417484bfbf..e2e7dd8d7903 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -90,7 +90,7 @@ ipt_hook(unsigned int hook,
90 const struct net_device *out, 90 const struct net_device *out,
91 int (*okfn)(struct sk_buff *)) 91 int (*okfn)(struct sk_buff *))
92{ 92{
93 return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); 93 return ipt_do_table(pskb, hook, in, out, &packet_filter);
94} 94}
95 95
96static unsigned int 96static unsigned int
@@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook,
108 return NF_ACCEPT; 108 return NF_ACCEPT;
109 } 109 }
110 110
111 return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); 111 return ipt_do_table(pskb, hook, in, out, &packet_filter);
112} 112}
113 113
114static struct nf_hook_ops ipt_ops[] = { 114static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4e7998beda63..79336cb42527 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -119,7 +119,7 @@ ipt_route_hook(unsigned int hook,
119 const struct net_device *out, 119 const struct net_device *out,
120 int (*okfn)(struct sk_buff *)) 120 int (*okfn)(struct sk_buff *))
121{ 121{
122 return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); 122 return ipt_do_table(pskb, hook, in, out, &packet_mangler);
123} 123}
124 124
125static unsigned int 125static unsigned int
@@ -148,7 +148,7 @@ ipt_local_hook(unsigned int hook,
148 daddr = (*pskb)->nh.iph->daddr; 148 daddr = (*pskb)->nh.iph->daddr;
149 tos = (*pskb)->nh.iph->tos; 149 tos = (*pskb)->nh.iph->tos;
150 150
151 ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); 151 ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
152 /* Reroute for ANY change. */ 152 /* Reroute for ANY change. */
153 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE 153 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
154 && ((*pskb)->nh.iph->saddr != saddr 154 && ((*pskb)->nh.iph->saddr != saddr
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 7912cce1e1b8..bcbeb4aeacd9 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -95,7 +95,7 @@ ipt_hook(unsigned int hook,
95 const struct net_device *out, 95 const struct net_device *out,
96 int (*okfn)(struct sk_buff *)) 96 int (*okfn)(struct sk_buff *))
97{ 97{
98 return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); 98 return ipt_do_table(pskb, hook, in, out, &packet_raw);
99} 99}
100 100
101/* 'raw' is the very first table. */ 101/* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 663a73ee3f2f..790f00d500c3 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -25,7 +25,7 @@
25#include <net/netfilter/nf_conntrack_protocol.h> 25#include <net/netfilter/nf_conntrack_protocol.h>
26#include <net/netfilter/nf_conntrack_core.h> 26#include <net/netfilter/nf_conntrack_core.h>
27 27
28unsigned long nf_ct_icmp_timeout = 30*HZ; 28unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
29 29
30#if 0 30#if 0
31#define DEBUGP printk 31#define DEBUGP printk
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d61e2a9d394d..9c6cbe3d9fb8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = {
173 SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), 173 SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
174 SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), 174 SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
175 SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), 175 SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
176 SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
177 SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
176 SNMP_MIB_SENTINEL 178 SNMP_MIB_SENTINEL
177}; 179};
178 180
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 62b2762a2420..0e935b4c8741 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -38,8 +38,7 @@
38 * as published by the Free Software Foundation; either version 38 * as published by the Free Software Foundation; either version
39 * 2 of the License, or (at your option) any later version. 39 * 2 of the License, or (at your option) any later version.
40 */ 40 */
41 41
42#include <linux/config.h>
43#include <linux/types.h> 42#include <linux/types.h>
44#include <asm/atomic.h> 43#include <asm/atomic.h>
45#include <asm/byteorder.h> 44#include <asm/byteorder.h>
@@ -484,6 +483,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
484 if (!inet->hdrincl) 483 if (!inet->hdrincl)
485 raw_probe_proto_opt(&fl, msg); 484 raw_probe_proto_opt(&fl, msg);
486 485
486 security_sk_classify_flow(sk, &fl);
487 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); 487 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
488 } 488 }
489 if (err) 489 if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b873cbcdd0b8..20ffe8e88c0f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2639,51 +2639,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2639{ 2639{
2640 struct rtable *rt = (struct rtable*)skb->dst; 2640 struct rtable *rt = (struct rtable*)skb->dst;
2641 struct rtmsg *r; 2641 struct rtmsg *r;
2642 struct nlmsghdr *nlh; 2642 struct nlmsghdr *nlh;
2643 unsigned char *b = skb->tail;
2644 struct rta_cacheinfo ci; 2643 struct rta_cacheinfo ci;
2645#ifdef CONFIG_IP_MROUTE 2644
2646 struct rtattr *eptr; 2645 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2647#endif 2646 if (nlh == NULL)
2648 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); 2647 return -ENOBUFS;
2649 r = NLMSG_DATA(nlh); 2648
2649 r = nlmsg_data(nlh);
2650 r->rtm_family = AF_INET; 2650 r->rtm_family = AF_INET;
2651 r->rtm_dst_len = 32; 2651 r->rtm_dst_len = 32;
2652 r->rtm_src_len = 0; 2652 r->rtm_src_len = 0;
2653 r->rtm_tos = rt->fl.fl4_tos; 2653 r->rtm_tos = rt->fl.fl4_tos;
2654 r->rtm_table = RT_TABLE_MAIN; 2654 r->rtm_table = RT_TABLE_MAIN;
2655 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2655 r->rtm_type = rt->rt_type; 2656 r->rtm_type = rt->rt_type;
2656 r->rtm_scope = RT_SCOPE_UNIVERSE; 2657 r->rtm_scope = RT_SCOPE_UNIVERSE;
2657 r->rtm_protocol = RTPROT_UNSPEC; 2658 r->rtm_protocol = RTPROT_UNSPEC;
2658 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 2659 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2659 if (rt->rt_flags & RTCF_NOTIFY) 2660 if (rt->rt_flags & RTCF_NOTIFY)
2660 r->rtm_flags |= RTM_F_NOTIFY; 2661 r->rtm_flags |= RTM_F_NOTIFY;
2661 RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); 2662
2663 NLA_PUT_U32(skb, RTA_DST, rt->rt_dst);
2664
2662 if (rt->fl.fl4_src) { 2665 if (rt->fl.fl4_src) {
2663 r->rtm_src_len = 32; 2666 r->rtm_src_len = 32;
2664 RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); 2667 NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src);
2665 } 2668 }
2666 if (rt->u.dst.dev) 2669 if (rt->u.dst.dev)
2667 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); 2670 NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
2668#ifdef CONFIG_NET_CLS_ROUTE 2671#ifdef CONFIG_NET_CLS_ROUTE
2669 if (rt->u.dst.tclassid) 2672 if (rt->u.dst.tclassid)
2670 RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); 2673 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
2671#endif 2674#endif
2672#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2675#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2673 if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { 2676 if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
2674 __u32 alg = rt->rt_multipath_alg; 2677 NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
2675
2676 RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
2677 }
2678#endif 2678#endif
2679 if (rt->fl.iif) 2679 if (rt->fl.iif)
2680 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); 2680 NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2681 else if (rt->rt_src != rt->fl.fl4_src) 2681 else if (rt->rt_src != rt->fl.fl4_src)
2682 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); 2682 NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src);
2683
2683 if (rt->rt_dst != rt->rt_gateway) 2684 if (rt->rt_dst != rt->rt_gateway)
2684 RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); 2685 NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway);
2686
2685 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2687 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2686 goto rtattr_failure; 2688 goto nla_put_failure;
2689
2687 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 2690 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2688 ci.rta_used = rt->u.dst.__use; 2691 ci.rta_used = rt->u.dst.__use;
2689 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 2692 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
@@ -2700,10 +2703,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2700 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; 2703 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
2701 } 2704 }
2702 } 2705 }
2703#ifdef CONFIG_IP_MROUTE 2706
2704 eptr = (struct rtattr*)skb->tail;
2705#endif
2706 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2707 if (rt->fl.iif) { 2707 if (rt->fl.iif) {
2708#ifdef CONFIG_IP_MROUTE 2708#ifdef CONFIG_IP_MROUTE
2709 u32 dst = rt->rt_dst; 2709 u32 dst = rt->rt_dst;
@@ -2715,41 +2715,46 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2715 if (!nowait) { 2715 if (!nowait) {
2716 if (err == 0) 2716 if (err == 0)
2717 return 0; 2717 return 0;
2718 goto nlmsg_failure; 2718 goto nla_put_failure;
2719 } else { 2719 } else {
2720 if (err == -EMSGSIZE) 2720 if (err == -EMSGSIZE)
2721 goto nlmsg_failure; 2721 goto nla_put_failure;
2722 ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; 2722 ci.rta_error = err;
2723 } 2723 }
2724 } 2724 }
2725 } else 2725 } else
2726#endif 2726#endif
2727 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 2727 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2728 } 2728 }
2729 2729
2730 nlh->nlmsg_len = skb->tail - b; 2730 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2731 return skb->len; 2731
2732 return nlmsg_end(skb, nlh);
2732 2733
2733nlmsg_failure: 2734nla_put_failure:
2734rtattr_failure: 2735 return nlmsg_cancel(skb, nlh);
2735 skb_trim(skb, b - skb->data);
2736 return -1;
2737} 2736}
2738 2737
2739int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2738int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2740{ 2739{
2741 struct rtattr **rta = arg; 2740 struct rtmsg *rtm;
2742 struct rtmsg *rtm = NLMSG_DATA(nlh); 2741 struct nlattr *tb[RTA_MAX+1];
2743 struct rtable *rt = NULL; 2742 struct rtable *rt = NULL;
2744 u32 dst = 0; 2743 u32 dst, src, iif;
2745 u32 src = 0; 2744 int err;
2746 int iif = 0;
2747 int err = -ENOBUFS;
2748 struct sk_buff *skb; 2745 struct sk_buff *skb;
2749 2746
2747 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2748 if (err < 0)
2749 goto errout;
2750
2751 rtm = nlmsg_data(nlh);
2752
2750 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2753 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2751 if (!skb) 2754 if (skb == NULL) {
2752 goto out; 2755 err = -ENOBUFS;
2756 goto errout;
2757 }
2753 2758
2754 /* Reserve room for dummy headers, this skb can pass 2759 /* Reserve room for dummy headers, this skb can pass
2755 through good chunk of routing engine. 2760 through good chunk of routing engine.
@@ -2760,62 +2765,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2760 skb->nh.iph->protocol = IPPROTO_ICMP; 2765 skb->nh.iph->protocol = IPPROTO_ICMP;
2761 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); 2766 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2762 2767
2763 if (rta[RTA_SRC - 1]) 2768 src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0;
2764 memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); 2769 dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0;
2765 if (rta[RTA_DST - 1]) 2770 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2766 memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
2767 if (rta[RTA_IIF - 1])
2768 memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
2769 2771
2770 if (iif) { 2772 if (iif) {
2771 struct net_device *dev = __dev_get_by_index(iif); 2773 struct net_device *dev;
2772 err = -ENODEV; 2774
2773 if (!dev) 2775 dev = __dev_get_by_index(iif);
2774 goto out_free; 2776 if (dev == NULL) {
2777 err = -ENODEV;
2778 goto errout_free;
2779 }
2780
2775 skb->protocol = htons(ETH_P_IP); 2781 skb->protocol = htons(ETH_P_IP);
2776 skb->dev = dev; 2782 skb->dev = dev;
2777 local_bh_disable(); 2783 local_bh_disable();
2778 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2784 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2779 local_bh_enable(); 2785 local_bh_enable();
2780 rt = (struct rtable*)skb->dst; 2786
2781 if (!err && rt->u.dst.error) 2787 rt = (struct rtable*) skb->dst;
2788 if (err == 0 && rt->u.dst.error)
2782 err = -rt->u.dst.error; 2789 err = -rt->u.dst.error;
2783 } else { 2790 } else {
2784 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, 2791 struct flowi fl = {
2785 .saddr = src, 2792 .nl_u = {
2786 .tos = rtm->rtm_tos } } }; 2793 .ip4_u = {
2787 int oif = 0; 2794 .daddr = dst,
2788 if (rta[RTA_OIF - 1]) 2795 .saddr = src,
2789 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 2796 .tos = rtm->rtm_tos,
2790 fl.oif = oif; 2797 },
2798 },
2799 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2800 };
2791 err = ip_route_output_key(&rt, &fl); 2801 err = ip_route_output_key(&rt, &fl);
2792 } 2802 }
2803
2793 if (err) 2804 if (err)
2794 goto out_free; 2805 goto errout_free;
2795 2806
2796 skb->dst = &rt->u.dst; 2807 skb->dst = &rt->u.dst;
2797 if (rtm->rtm_flags & RTM_F_NOTIFY) 2808 if (rtm->rtm_flags & RTM_F_NOTIFY)
2798 rt->rt_flags |= RTCF_NOTIFY; 2809 rt->rt_flags |= RTCF_NOTIFY;
2799 2810
2800 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2801
2802 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2811 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2803 RTM_NEWROUTE, 0, 0); 2812 RTM_NEWROUTE, 0, 0);
2804 if (!err) 2813 if (err <= 0)
2805 goto out_free; 2814 goto errout_free;
2806 if (err < 0) {
2807 err = -EMSGSIZE;
2808 goto out_free;
2809 }
2810 2815
2811 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 2816 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2812 if (err > 0) 2817errout:
2813 err = 0; 2818 return err;
2814out: return err;
2815 2819
2816out_free: 2820errout_free:
2817 kfree_skb(skb); 2821 kfree_skb(skb);
2818 goto out; 2822 goto errout;
2819} 2823}
2820 2824
2821int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) 2825int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3143,13 +3147,9 @@ int __init ip_rt_init(void)
3143 } 3147 }
3144#endif 3148#endif
3145 3149
3146 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", 3150 ipv4_dst_ops.kmem_cachep =
3147 sizeof(struct rtable), 3151 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
3148 0, SLAB_HWCACHE_ALIGN, 3152 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
3149 NULL, NULL);
3150
3151 if (!ipv4_dst_ops.kmem_cachep)
3152 panic("IP: failed to allocate ip_dst_cache\n");
3153 3153
3154 rt_hash_table = (struct rt_hash_bucket *) 3154 rt_hash_table = (struct rt_hash_bucket *)
3155 alloc_large_system_hash("IP route cache", 3155 alloc_large_system_hash("IP route cache",
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e20be3331f67..661e0a4bca72 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
214 if (!req) 214 if (!req)
215 goto out; 215 goto out;
216 216
217 if (security_inet_conn_request(sk, skb, req)) {
218 reqsk_free(req);
219 goto out;
220 }
217 ireq = inet_rsk(req); 221 ireq = inet_rsk(req);
218 treq = tcp_rsk(req); 222 treq = tcp_rsk(req);
219 treq->rcv_isn = htonl(skb->h.th->seq) - 1; 223 treq->rcv_isn = htonl(skb->h.th->seq) - 1;
@@ -259,6 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
259 .uli_u = { .ports = 263 .uli_u = { .ports =
260 { .sport = skb->h.th->dest, 264 { .sport = skb->h.th->dest,
261 .dport = skb->h.th->source } } }; 265 .dport = skb->h.th->source } } };
266 security_req_classify_flow(req, &fl);
262 if (ip_route_output_key(&rt, &fl)) { 267 if (ip_route_output_key(&rt, &fl)) {
263 reqsk_free(req); 268 reqsk_free(req);
264 goto out; 269 goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 70cea9d08a38..19b2071ff319 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -17,6 +17,7 @@
17#include <net/ip.h> 17#include <net/ip.h>
18#include <net/route.h> 18#include <net/route.h>
19#include <net/tcp.h> 19#include <net/tcp.h>
20#include <net/cipso_ipv4.h>
20 21
21/* From af_inet.c */ 22/* From af_inet.c */
22extern int sysctl_ip_nonlocal_bind; 23extern int sysctl_ip_nonlocal_bind;
@@ -697,6 +698,40 @@ ctl_table ipv4_table[] = {
697 .mode = 0644, 698 .mode = 0644,
698 .proc_handler = &proc_dointvec 699 .proc_handler = &proc_dointvec
699 }, 700 },
701#ifdef CONFIG_NETLABEL
702 {
703 .ctl_name = NET_CIPSOV4_CACHE_ENABLE,
704 .procname = "cipso_cache_enable",
705 .data = &cipso_v4_cache_enabled,
706 .maxlen = sizeof(int),
707 .mode = 0644,
708 .proc_handler = &proc_dointvec,
709 },
710 {
711 .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE,
712 .procname = "cipso_cache_bucket_size",
713 .data = &cipso_v4_cache_bucketsize,
714 .maxlen = sizeof(int),
715 .mode = 0644,
716 .proc_handler = &proc_dointvec,
717 },
718 {
719 .ctl_name = NET_CIPSOV4_RBM_OPTFMT,
720 .procname = "cipso_rbm_optfmt",
721 .data = &cipso_v4_rbm_optfmt,
722 .maxlen = sizeof(int),
723 .mode = 0644,
724 .proc_handler = &proc_dointvec,
725 },
726 {
727 .ctl_name = NET_CIPSOV4_RBM_STRICTVALID,
728 .procname = "cipso_rbm_strictvalid",
729 .data = &cipso_v4_rbm_strictvalid,
730 .maxlen = sizeof(int),
731 .mode = 0644,
732 .proc_handler = &proc_dointvec,
733 },
734#endif /* CONFIG_NETLABEL */
700 { .ctl_name = 0 } 735 { .ctl_name = 0 }
701}; 736};
702 737
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 934396bb1376..66e9a729f6df 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -268,7 +268,7 @@
268#include <asm/uaccess.h> 268#include <asm/uaccess.h>
269#include <asm/ioctls.h> 269#include <asm/ioctls.h>
270 270
271int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; 271int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
272 272
273DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; 273DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
274 274
@@ -568,7 +568,7 @@ new_segment:
568 skb->truesize += copy; 568 skb->truesize += copy;
569 sk->sk_wmem_queued += copy; 569 sk->sk_wmem_queued += copy;
570 sk->sk_forward_alloc -= copy; 570 sk->sk_forward_alloc -= copy;
571 skb->ip_summed = CHECKSUM_HW; 571 skb->ip_summed = CHECKSUM_PARTIAL;
572 tp->write_seq += copy; 572 tp->write_seq += copy;
573 TCP_SKB_CB(skb)->end_seq += copy; 573 TCP_SKB_CB(skb)->end_seq += copy;
574 skb_shinfo(skb)->gso_segs = 0; 574 skb_shinfo(skb)->gso_segs = 0;
@@ -723,7 +723,7 @@ new_segment:
723 * Check whether we can use HW checksum. 723 * Check whether we can use HW checksum.
724 */ 724 */
725 if (sk->sk_route_caps & NETIF_F_ALL_CSUM) 725 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
726 skb->ip_summed = CHECKSUM_HW; 726 skb->ip_summed = CHECKSUM_PARTIAL;
727 727
728 skb_entail(sk, tp, skb); 728 skb_entail(sk, tp, skb);
729 copy = size_goal; 729 copy = size_goal;
@@ -955,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
955 * receive buffer and there was a small segment 955 * receive buffer and there was a small segment
956 * in queue. 956 * in queue.
957 */ 957 */
958 (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && 958 (copied > 0 &&
959 !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) 959 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
960 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
961 !icsk->icsk_ack.pingpong)) &&
962 !atomic_read(&sk->sk_rmem_alloc)))
960 time_to_ack = 1; 963 time_to_ack = 1;
961 } 964 }
962 965
@@ -2205,7 +2208,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2205 th->fin = th->psh = 0; 2208 th->fin = th->psh = 0;
2206 2209
2207 th->check = ~csum_fold(th->check + delta); 2210 th->check = ~csum_fold(th->check + delta);
2208 if (skb->ip_summed != CHECKSUM_HW) 2211 if (skb->ip_summed != CHECKSUM_PARTIAL)
2209 th->check = csum_fold(csum_partial(skb->h.raw, thlen, 2212 th->check = csum_fold(csum_partial(skb->h.raw, thlen,
2210 skb->csum)); 2213 skb->csum));
2211 2214
@@ -2219,7 +2222,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2219 2222
2220 delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); 2223 delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
2221 th->check = ~csum_fold(th->check + delta); 2224 th->check = ~csum_fold(th->check + delta);
2222 if (skb->ip_summed != CHECKSUM_HW) 2225 if (skb->ip_summed != CHECKSUM_PARTIAL)
2223 th->check = csum_fold(csum_partial(skb->h.raw, thlen, 2226 th->check = csum_fold(csum_partial(skb->h.raw, thlen,
2224 skb->csum)); 2227 skb->csum));
2225 2228
@@ -2254,9 +2257,7 @@ void __init tcp_init(void)
2254 tcp_hashinfo.bind_bucket_cachep = 2257 tcp_hashinfo.bind_bucket_cachep =
2255 kmem_cache_create("tcp_bind_bucket", 2258 kmem_cache_create("tcp_bind_bucket",
2256 sizeof(struct inet_bind_bucket), 0, 2259 sizeof(struct inet_bind_bucket), 0,
2257 SLAB_HWCACHE_ALIGN, NULL, NULL); 2260 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
2258 if (!tcp_hashinfo.bind_bucket_cachep)
2259 panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
2260 2261
2261 /* Size and allocate the main established and bind bucket 2262 /* Size and allocate the main established and bind bucket
2262 * hash tables. 2263 * hash tables.
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index b0134ab08379..5730333cd0ac 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -231,7 +231,7 @@ static struct tcp_congestion_ops bictcp = {
231 231
232static int __init bictcp_register(void) 232static int __init bictcp_register(void)
233{ 233{
234 BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); 234 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
235 return tcp_register_congestion_control(&bictcp); 235 return tcp_register_congestion_control(&bictcp);
236} 236}
237 237
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 2be27980ca78..a60ef38d75c6 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = {
358 358
359static int __init cubictcp_register(void) 359static int __init cubictcp_register(void)
360{ 360{
361 BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); 361 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
362 362
363 /* Precompute a bunch of the scaling factors that are used per-packet 363 /* Precompute a bunch of the scaling factors that are used per-packet
364 * based on SRTT of 100ms 364 * based on SRTT of 100ms
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index fa3e1aad660c..c4fc811bf377 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = {
189 189
190static int __init hstcp_register(void) 190static int __init hstcp_register(void)
191{ 191{
192 BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); 192 BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
193 return tcp_register_congestion_control(&tcp_highspeed); 193 return tcp_register_congestion_control(&tcp_highspeed);
194} 194}
195 195
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 6edfe5e4510e..682e7d5b6f2f 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = {
286 286
287static int __init htcp_register(void) 287static int __init htcp_register(void)
288{ 288{
289 BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); 289 BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
290 BUILD_BUG_ON(BETA_MIN >= BETA_MAX); 290 BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
291 return tcp_register_congestion_control(&htcp); 291 return tcp_register_congestion_control(&htcp);
292} 292}
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 7406e0c5fb8e..59e691d26f64 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = {
170 170
171static int __init hybla_register(void) 171static int __init hybla_register(void)
172{ 172{
173 BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); 173 BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
174 return tcp_register_congestion_control(&tcp_hybla); 174 return tcp_register_congestion_control(&tcp_hybla);
175} 175}
176 176
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 159fa3f1ba67..b3def0df14fb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,24 +72,24 @@
72#include <asm/unaligned.h> 72#include <asm/unaligned.h>
73#include <net/netdma.h> 73#include <net/netdma.h>
74 74
75int sysctl_tcp_timestamps = 1; 75int sysctl_tcp_timestamps __read_mostly = 1;
76int sysctl_tcp_window_scaling = 1; 76int sysctl_tcp_window_scaling __read_mostly = 1;
77int sysctl_tcp_sack = 1; 77int sysctl_tcp_sack __read_mostly = 1;
78int sysctl_tcp_fack = 1; 78int sysctl_tcp_fack __read_mostly = 1;
79int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; 79int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
80int sysctl_tcp_ecn; 80int sysctl_tcp_ecn __read_mostly;
81int sysctl_tcp_dsack = 1; 81int sysctl_tcp_dsack __read_mostly = 1;
82int sysctl_tcp_app_win = 31; 82int sysctl_tcp_app_win __read_mostly = 31;
83int sysctl_tcp_adv_win_scale = 2; 83int sysctl_tcp_adv_win_scale __read_mostly = 2;
84 84
85int sysctl_tcp_stdurg; 85int sysctl_tcp_stdurg __read_mostly;
86int sysctl_tcp_rfc1337; 86int sysctl_tcp_rfc1337 __read_mostly;
87int sysctl_tcp_max_orphans = NR_FILE; 87int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
88int sysctl_tcp_frto; 88int sysctl_tcp_frto __read_mostly;
89int sysctl_tcp_nometrics_save; 89int sysctl_tcp_nometrics_save __read_mostly;
90 90
91int sysctl_tcp_moderate_rcvbuf = 1; 91int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
92int sysctl_tcp_abc; 92int sysctl_tcp_abc __read_mostly;
93 93
94#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 94#define FLAG_DATA 0x01 /* Incoming frame contained data. */
95#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 95#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
127 /* skb->len may jitter because of SACKs, even if peer 127 /* skb->len may jitter because of SACKs, even if peer
128 * sends good full-sized frames. 128 * sends good full-sized frames.
129 */ 129 */
130 len = skb->len; 130 len = skb_shinfo(skb)->gso_size ?: skb->len;
131 if (len >= icsk->icsk_ack.rcv_mss) { 131 if (len >= icsk->icsk_ack.rcv_mss) {
132 icsk->icsk_ack.rcv_mss = len; 132 icsk->icsk_ack.rcv_mss = len;
133 } else { 133 } else {
@@ -156,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk,
156 return; 156 return;
157 } 157 }
158 } 158 }
159 if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
160 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
159 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; 161 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
160 } 162 }
161} 163}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4b04c3edd4a9..39b179856082 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -78,8 +78,8 @@
78#include <linux/proc_fs.h> 78#include <linux/proc_fs.h>
79#include <linux/seq_file.h> 79#include <linux/seq_file.h>
80 80
81int sysctl_tcp_tw_reuse; 81int sysctl_tcp_tw_reuse __read_mostly;
82int sysctl_tcp_low_latency; 82int sysctl_tcp_low_latency __read_mostly;
83 83
84/* Check TCP sequence numbers in ICMP packets. */ 84/* Check TCP sequence numbers in ICMP packets. */
85#define ICMP_MIN_LENGTH 8 85#define ICMP_MIN_LENGTH 8
@@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
484 struct inet_sock *inet = inet_sk(sk); 484 struct inet_sock *inet = inet_sk(sk);
485 struct tcphdr *th = skb->h.th; 485 struct tcphdr *th = skb->h.th;
486 486
487 if (skb->ip_summed == CHECKSUM_HW) { 487 if (skb->ip_summed == CHECKSUM_PARTIAL) {
488 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); 488 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
489 skb->csum = offsetof(struct tcphdr, check); 489 skb->csum = offsetof(struct tcphdr, check);
490 } else { 490 } else {
@@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
509 th->check = 0; 509 th->check = 0;
510 th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); 510 th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
511 skb->csum = offsetof(struct tcphdr, check); 511 skb->csum = offsetof(struct tcphdr, check);
512 skb->ip_summed = CHECKSUM_HW; 512 skb->ip_summed = CHECKSUM_PARTIAL;
513 return 0; 513 return 0;
514} 514}
515 515
@@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
798 798
799 tcp_openreq_init(req, &tmp_opt, skb); 799 tcp_openreq_init(req, &tmp_opt, skb);
800 800
801 if (security_inet_conn_request(sk, skb, req))
802 goto drop_and_free;
803
801 ireq = inet_rsk(req); 804 ireq = inet_rsk(req);
802 ireq->loc_addr = daddr; 805 ireq->loc_addr = daddr;
803 ireq->rmt_addr = saddr; 806 ireq->rmt_addr = saddr;
@@ -948,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
948 if (req) 951 if (req)
949 return tcp_check_req(sk, skb, req, prev); 952 return tcp_check_req(sk, skb, req, prev);
950 953
951 nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, 954 nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
952 th->source, skb->nh.iph->daddr, 955 th->source, skb->nh.iph->daddr,
953 ntohs(th->dest), inet_iif(skb)); 956 th->dest, inet_iif(skb));
954 957
955 if (nsk) { 958 if (nsk) {
956 if (nsk->sk_state != TCP_TIME_WAIT) { 959 if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -970,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
970 973
971static int tcp_v4_checksum_init(struct sk_buff *skb) 974static int tcp_v4_checksum_init(struct sk_buff *skb)
972{ 975{
973 if (skb->ip_summed == CHECKSUM_HW) { 976 if (skb->ip_summed == CHECKSUM_COMPLETE) {
974 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, 977 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
975 skb->nh.iph->daddr, skb->csum)) { 978 skb->nh.iph->daddr, skb->csum)) {
976 skb->ip_summed = CHECKSUM_UNNECESSARY; 979 skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1087,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1087 TCP_SKB_CB(skb)->sacked = 0; 1090 TCP_SKB_CB(skb)->sacked = 0;
1088 1091
1089 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, 1092 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1090 skb->nh.iph->daddr, ntohs(th->dest), 1093 skb->nh.iph->daddr, th->dest,
1091 inet_iif(skb)); 1094 inet_iif(skb));
1092 1095
1093 if (!sk) 1096 if (!sk)
@@ -1101,7 +1104,7 @@ process:
1101 goto discard_and_relse; 1104 goto discard_and_relse;
1102 nf_reset(skb); 1105 nf_reset(skb);
1103 1106
1104 if (sk_filter(sk, skb, 0)) 1107 if (sk_filter(sk, skb))
1105 goto discard_and_relse; 1108 goto discard_and_relse;
1106 1109
1107 skb->dev = NULL; 1110 skb->dev = NULL;
@@ -1165,7 +1168,7 @@ do_time_wait:
1165 case TCP_TW_SYN: { 1168 case TCP_TW_SYN: {
1166 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, 1169 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1167 skb->nh.iph->daddr, 1170 skb->nh.iph->daddr,
1168 ntohs(th->dest), 1171 th->dest,
1169 inet_iif(skb)); 1172 inet_iif(skb));
1170 if (sk2) { 1173 if (sk2) {
1171 inet_twsk_deschedule((struct inet_timewait_sock *)sk, 1174 inet_twsk_deschedule((struct inet_timewait_sock *)sk,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 48f28d617ce6..308fb7e071c5 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -35,7 +35,6 @@
35 * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $ 35 * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $
36 */ 36 */
37 37
38#include <linux/config.h>
39#include <linux/module.h> 38#include <linux/module.h>
40#include <net/tcp.h> 39#include <net/tcp.h>
41 40
@@ -328,7 +327,7 @@ static struct tcp_congestion_ops tcp_lp = {
328 327
329static int __init tcp_lp_register(void) 328static int __init tcp_lp_register(void)
330{ 329{
331 BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); 330 BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
332 return tcp_register_congestion_control(&tcp_lp); 331 return tcp_register_congestion_control(&tcp_lp);
333} 332}
334 333
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 624e2b2c7f53..0163d9826907 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -34,8 +34,8 @@
34#define SYNC_INIT 1 34#define SYNC_INIT 1
35#endif 35#endif
36 36
37int sysctl_tcp_syncookies = SYNC_INIT; 37int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
38int sysctl_tcp_abort_on_overflow; 38int sysctl_tcp_abort_on_overflow __read_mostly;
39 39
40struct inet_timewait_death_row tcp_death_row = { 40struct inet_timewait_death_row tcp_death_row = {
41 .sysctl_max_tw_buckets = NR_FILE * 2, 41 .sysctl_max_tw_buckets = NR_FILE * 2,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4f3ffe1b3b4..061edfae0c29 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -43,24 +43,24 @@
43#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
44 44
45/* People can turn this off for buggy TCP's found in printers etc. */ 45/* People can turn this off for buggy TCP's found in printers etc. */
46int sysctl_tcp_retrans_collapse = 1; 46int sysctl_tcp_retrans_collapse __read_mostly = 1;
47 47
48/* People can turn this on to work with those rare, broken TCPs that 48/* People can turn this on to work with those rare, broken TCPs that
49 * interpret the window field as a signed quantity. 49 * interpret the window field as a signed quantity.
50 */ 50 */
51int sysctl_tcp_workaround_signed_windows = 0; 51int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52 52
53/* This limits the percentage of the congestion window which we 53/* This limits the percentage of the congestion window which we
54 * will allow a single TSO frame to consume. Building TSO frames 54 * will allow a single TSO frame to consume. Building TSO frames
55 * which are too large can cause TCP streams to be bursty. 55 * which are too large can cause TCP streams to be bursty.
56 */ 56 */
57int sysctl_tcp_tso_win_divisor = 3; 57int sysctl_tcp_tso_win_divisor __read_mostly = 3;
58 58
59int sysctl_tcp_mtu_probing = 0; 59int sysctl_tcp_mtu_probing __read_mostly = 0;
60int sysctl_tcp_base_mss = 512; 60int sysctl_tcp_base_mss __read_mostly = 512;
61 61
62/* By default, RFC2861 behavior. */ 62/* By default, RFC2861 behavior. */
63int sysctl_tcp_slow_start_after_idle = 1; 63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64 64
65static void update_send_head(struct sock *sk, struct tcp_sock *tp, 65static void update_send_head(struct sock *sk, struct tcp_sock *tp,
66 struct sk_buff *skb) 66 struct sk_buff *skb)
@@ -577,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
577 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 577 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
578 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; 578 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
579 579
580 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { 580 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
581 /* Copy and checksum data tail into the new buffer. */ 581 /* Copy and checksum data tail into the new buffer. */
582 buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), 582 buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
583 nsize, 0); 583 nsize, 0);
@@ -586,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
586 586
587 skb->csum = csum_block_sub(skb->csum, buff->csum, len); 587 skb->csum = csum_block_sub(skb->csum, buff->csum, len);
588 } else { 588 } else {
589 skb->ip_summed = CHECKSUM_HW; 589 skb->ip_summed = CHECKSUM_PARTIAL;
590 skb_split(skb, buff, len); 590 skb_split(skb, buff, len);
591 } 591 }
592 592
@@ -689,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
689 __pskb_trim_head(skb, len - skb_headlen(skb)); 689 __pskb_trim_head(skb, len - skb_headlen(skb));
690 690
691 TCP_SKB_CB(skb)->seq += len; 691 TCP_SKB_CB(skb)->seq += len;
692 skb->ip_summed = CHECKSUM_HW; 692 skb->ip_summed = CHECKSUM_PARTIAL;
693 693
694 skb->truesize -= len; 694 skb->truesize -= len;
695 sk->sk_wmem_queued -= len; 695 sk->sk_wmem_queued -= len;
@@ -1062,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1062 /* This packet was never sent out yet, so no SACK bits. */ 1062 /* This packet was never sent out yet, so no SACK bits. */
1063 TCP_SKB_CB(buff)->sacked = 0; 1063 TCP_SKB_CB(buff)->sacked = 0;
1064 1064
1065 buff->ip_summed = skb->ip_summed = CHECKSUM_HW; 1065 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1066 skb_split(skb, buff, len); 1066 skb_split(skb, buff, len);
1067 1067
1068 /* Fix up tso_factor for both original and new SKB. */ 1068 /* Fix up tso_factor for both original and new SKB. */
@@ -1206,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk)
1206 TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; 1206 TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
1207 TCP_SKB_CB(nskb)->sacked = 0; 1207 TCP_SKB_CB(nskb)->sacked = 0;
1208 nskb->csum = 0; 1208 nskb->csum = 0;
1209 if (skb->ip_summed == CHECKSUM_HW) 1209 nskb->ip_summed = skb->ip_summed;
1210 nskb->ip_summed = CHECKSUM_HW;
1211 1210
1212 len = 0; 1211 len = 0;
1213 while (len < probe_size) { 1212 while (len < probe_size) {
@@ -1231,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk)
1231 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1230 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
1232 if (!skb_shinfo(skb)->nr_frags) { 1231 if (!skb_shinfo(skb)->nr_frags) {
1233 skb_pull(skb, copy); 1232 skb_pull(skb, copy);
1234 if (skb->ip_summed != CHECKSUM_HW) 1233 if (skb->ip_summed != CHECKSUM_PARTIAL)
1235 skb->csum = csum_partial(skb->data, skb->len, 0); 1234 skb->csum = csum_partial(skb->data, skb->len, 0);
1236 } else { 1235 } else {
1237 __pskb_trim_head(skb, copy); 1236 __pskb_trim_head(skb, copy);
@@ -1572,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1572 1571
1573 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); 1572 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
1574 1573
1575 if (next_skb->ip_summed == CHECKSUM_HW) 1574 skb->ip_summed = next_skb->ip_summed;
1576 skb->ip_summed = CHECKSUM_HW;
1577 1575
1578 if (skb->ip_summed != CHECKSUM_HW) 1576 if (skb->ip_summed != CHECKSUM_PARTIAL)
1579 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); 1577 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
1580 1578
1581 /* Update sequence range on original skb. */ 1579 /* Update sequence range on original skb. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7c1bde3cd6cb..fb09ade5897b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -23,14 +23,14 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <net/tcp.h> 24#include <net/tcp.h>
25 25
26int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 26int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
27int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 27int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
28int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; 28int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
29int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; 29int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
30int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; 30int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
31int sysctl_tcp_retries1 = TCP_RETR1; 31int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
32int sysctl_tcp_retries2 = TCP_RETR2; 32int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
33int sysctl_tcp_orphan_retries; 33int sysctl_tcp_orphan_retries __read_mostly;
34 34
35static void tcp_write_timer(unsigned long); 35static void tcp_write_timer(unsigned long);
36static void tcp_delack_timer(unsigned long); 36static void tcp_delack_timer(unsigned long);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 490360b5b4bf..a3b7aa015a2f 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = {
370 370
371static int __init tcp_vegas_register(void) 371static int __init tcp_vegas_register(void)
372{ 372{
373 BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); 373 BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
374 tcp_register_congestion_control(&tcp_vegas); 374 tcp_register_congestion_control(&tcp_vegas);
375 return 0; 375 return 0;
376} 376}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 11b42a7135c1..ce57bf302f6c 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -9,7 +9,6 @@
9 * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf 9 * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
10 */ 10 */
11 11
12#include <linux/config.h>
13#include <linux/mm.h> 12#include <linux/mm.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <linux/skbuff.h> 14#include <linux/skbuff.h>
@@ -213,7 +212,7 @@ static struct tcp_congestion_ops tcp_veno = {
213 212
214static int __init tcp_veno_register(void) 213static int __init tcp_veno_register(void)
215{ 214{
216 BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); 215 BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
217 tcp_register_congestion_control(&tcp_veno); 216 tcp_register_congestion_control(&tcp_veno);
218 return 0; 217 return 0;
219} 218}
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 5446312ffd2a..4f42a86c77f3 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = {
289 289
290static int __init tcp_westwood_register(void) 290static int __init tcp_westwood_register(void)
291{ 291{
292 BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); 292 BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
293 return tcp_register_congestion_control(&tcp_westwood); 293 return tcp_register_congestion_control(&tcp_westwood);
294} 294}
295 295
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f136cec96d95..77e265d7bb8f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -118,14 +118,33 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
118struct hlist_head udp_hash[UDP_HTABLE_SIZE]; 118struct hlist_head udp_hash[UDP_HTABLE_SIZE];
119DEFINE_RWLOCK(udp_hash_lock); 119DEFINE_RWLOCK(udp_hash_lock);
120 120
121/* Shared by v4/v6 udp. */ 121static int udp_port_rover;
122int udp_port_rover;
123 122
124static int udp_v4_get_port(struct sock *sk, unsigned short snum) 123static inline int udp_lport_inuse(u16 num)
124{
125 struct sock *sk;
126 struct hlist_node *node;
127
128 sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
129 if (inet_sk(sk)->num == num)
130 return 1;
131 return 0;
132}
133
134/**
135 * udp_get_port - common port lookup for IPv4 and IPv6
136 *
137 * @sk: socket struct in question
138 * @snum: port number to look up
139 * @saddr_comp: AF-dependent comparison of bound local IP addresses
140 */
141int udp_get_port(struct sock *sk, unsigned short snum,
142 int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2))
125{ 143{
126 struct hlist_node *node; 144 struct hlist_node *node;
145 struct hlist_head *head;
127 struct sock *sk2; 146 struct sock *sk2;
128 struct inet_sock *inet = inet_sk(sk); 147 int error = 1;
129 148
130 write_lock_bh(&udp_hash_lock); 149 write_lock_bh(&udp_hash_lock);
131 if (snum == 0) { 150 if (snum == 0) {
@@ -137,11 +156,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
137 best_size_so_far = 32767; 156 best_size_so_far = 32767;
138 best = result = udp_port_rover; 157 best = result = udp_port_rover;
139 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { 158 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
140 struct hlist_head *list;
141 int size; 159 int size;
142 160
143 list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; 161 head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
144 if (hlist_empty(list)) { 162 if (hlist_empty(head)) {
145 if (result > sysctl_local_port_range[1]) 163 if (result > sysctl_local_port_range[1])
146 result = sysctl_local_port_range[0] + 164 result = sysctl_local_port_range[0] +
147 ((result - sysctl_local_port_range[0]) & 165 ((result - sysctl_local_port_range[0]) &
@@ -149,12 +167,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
149 goto gotit; 167 goto gotit;
150 } 168 }
151 size = 0; 169 size = 0;
152 sk_for_each(sk2, node, list) 170 sk_for_each(sk2, node, head)
153 if (++size >= best_size_so_far) 171 if (++size < best_size_so_far) {
154 goto next; 172 best_size_so_far = size;
155 best_size_so_far = size; 173 best = result;
156 best = result; 174 }
157 next:;
158 } 175 }
159 result = best; 176 result = best;
160 for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { 177 for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
@@ -170,38 +187,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum)
170gotit: 187gotit:
171 udp_port_rover = snum = result; 188 udp_port_rover = snum = result;
172 } else { 189 } else {
173 sk_for_each(sk2, node, 190 head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
174 &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { 191
175 struct inet_sock *inet2 = inet_sk(sk2); 192 sk_for_each(sk2, node, head)
176 193 if (inet_sk(sk2)->num == snum &&
177 if (inet2->num == snum && 194 sk2 != sk &&
178 sk2 != sk && 195 (!sk2->sk_reuse || !sk->sk_reuse) &&
179 !ipv6_only_sock(sk2) && 196 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
180 (!sk2->sk_bound_dev_if || 197 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
181 !sk->sk_bound_dev_if || 198 (*saddr_cmp)(sk, sk2) )
182 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
183 (!inet2->rcv_saddr ||
184 !inet->rcv_saddr ||
185 inet2->rcv_saddr == inet->rcv_saddr) &&
186 (!sk2->sk_reuse || !sk->sk_reuse))
187 goto fail; 199 goto fail;
188 }
189 } 200 }
190 inet->num = snum; 201 inet_sk(sk)->num = snum;
191 if (sk_unhashed(sk)) { 202 if (sk_unhashed(sk)) {
192 struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; 203 head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
193 204 sk_add_node(sk, head);
194 sk_add_node(sk, h);
195 sock_prot_inc_use(sk->sk_prot); 205 sock_prot_inc_use(sk->sk_prot);
196 } 206 }
197 write_unlock_bh(&udp_hash_lock); 207 error = 0;
198 return 0;
199
200fail: 208fail:
201 write_unlock_bh(&udp_hash_lock); 209 write_unlock_bh(&udp_hash_lock);
202 return 1; 210 return error;
211}
212
213static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
214{
215 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
216
217 return ( !ipv6_only_sock(sk2) &&
218 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
219 inet1->rcv_saddr == inet2->rcv_saddr ));
203} 220}
204 221
222static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
223{
224 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
225}
226
227
205static void udp_v4_hash(struct sock *sk) 228static void udp_v4_hash(struct sock *sk)
206{ 229{
207 BUG(); 230 BUG();
@@ -429,7 +452,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
429 /* 452 /*
430 * Only one fragment on the socket. 453 * Only one fragment on the socket.
431 */ 454 */
432 if (skb->ip_summed == CHECKSUM_HW) { 455 if (skb->ip_summed == CHECKSUM_PARTIAL) {
433 skb->csum = offsetof(struct udphdr, check); 456 skb->csum = offsetof(struct udphdr, check);
434 uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, 457 uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
435 up->len, IPPROTO_UDP, 0); 458 up->len, IPPROTO_UDP, 0);
@@ -448,7 +471,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up)
448 * fragments on the socket so that all csums of sk_buffs 471 * fragments on the socket so that all csums of sk_buffs
449 * should be together. 472 * should be together.
450 */ 473 */
451 if (skb->ip_summed == CHECKSUM_HW) { 474 if (skb->ip_summed == CHECKSUM_PARTIAL) {
452 int offset = (unsigned char *)uh - skb->data; 475 int offset = (unsigned char *)uh - skb->data;
453 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); 476 skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
454 477
@@ -603,6 +626,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
603 .uli_u = { .ports = 626 .uli_u = { .ports =
604 { .sport = inet->sport, 627 { .sport = inet->sport,
605 .dport = dport } } }; 628 .dport = dport } } };
629 security_sk_classify_flow(sk, &fl);
606 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); 630 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
607 if (err) 631 if (err)
608 goto out; 632 goto out;
@@ -661,6 +685,16 @@ out:
661 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); 685 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
662 return len; 686 return len;
663 } 687 }
688 /*
689 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
690 * ENOBUFS might not be good (it's not tunable per se), but otherwise
691 * we don't have a good statistic (IpOutDiscards but it can be too many
692 * things). We could add another new stat but at least for now that
693 * seems like overkill.
694 */
695 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
696 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
697 }
664 return err; 698 return err;
665 699
666do_confirm: 700do_confirm:
@@ -980,6 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
980static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 1014static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
981{ 1015{
982 struct udp_sock *up = udp_sk(sk); 1016 struct udp_sock *up = udp_sk(sk);
1017 int rc;
983 1018
984 /* 1019 /*
985 * Charge it to the socket, dropping if the queue is full. 1020 * Charge it to the socket, dropping if the queue is full.
@@ -1026,7 +1061,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1026 skb->ip_summed = CHECKSUM_UNNECESSARY; 1061 skb->ip_summed = CHECKSUM_UNNECESSARY;
1027 } 1062 }
1028 1063
1029 if (sock_queue_rcv_skb(sk,skb)<0) { 1064 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1065 /* Note that an ENOMEM error is charged twice */
1066 if (rc == -ENOMEM)
1067 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
1030 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1068 UDP_INC_STATS_BH(UDP_MIB_INERRORS);
1031 kfree_skb(skb); 1069 kfree_skb(skb);
1032 return -1; 1070 return -1;
@@ -1087,7 +1125,7 @@ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
1087{ 1125{
1088 if (uh->check == 0) { 1126 if (uh->check == 0) {
1089 skb->ip_summed = CHECKSUM_UNNECESSARY; 1127 skb->ip_summed = CHECKSUM_UNNECESSARY;
1090 } else if (skb->ip_summed == CHECKSUM_HW) { 1128 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1091 if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) 1129 if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
1092 skb->ip_summed = CHECKSUM_UNNECESSARY; 1130 skb->ip_summed = CHECKSUM_UNNECESSARY;
1093 } 1131 }
@@ -1581,7 +1619,7 @@ EXPORT_SYMBOL(udp_disconnect);
1581EXPORT_SYMBOL(udp_hash); 1619EXPORT_SYMBOL(udp_hash);
1582EXPORT_SYMBOL(udp_hash_lock); 1620EXPORT_SYMBOL(udp_hash_lock);
1583EXPORT_SYMBOL(udp_ioctl); 1621EXPORT_SYMBOL(udp_ioctl);
1584EXPORT_SYMBOL(udp_port_rover); 1622EXPORT_SYMBOL(udp_get_port);
1585EXPORT_SYMBOL(udp_prot); 1623EXPORT_SYMBOL(udp_prot);
1586EXPORT_SYMBOL(udp_sendmsg); 1624EXPORT_SYMBOL(udp_sendmsg);
1587EXPORT_SYMBOL(udp_poll); 1625EXPORT_SYMBOL(udp_poll);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 817ed84511a6..040e8475f295 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
106 if (x->mode->input(x, skb)) 106 if (x->mode->input(x, skb))
107 goto drop; 107 goto drop;
108 108
109 if (x->props.mode) { 109 if (x->props.mode == XFRM_MODE_TUNNEL) {
110 decaps = 1; 110 decaps = 1;
111 break; 111 break;
112 } 112 }
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index a9e6b3dd19c9..92676b7e4034 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -21,9 +21,8 @@
21 * On exit, skb->h will be set to the start of the payload to be processed 21 * On exit, skb->h will be set to the start of the payload to be processed
22 * by x->type->output and skb->nh will be set to the top IP header. 22 * by x->type->output and skb->nh will be set to the top IP header.
23 */ 23 */
24static int xfrm4_transport_output(struct sk_buff *skb) 24static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
25{ 25{
26 struct xfrm_state *x;
27 struct iphdr *iph; 26 struct iphdr *iph;
28 int ihl; 27 int ihl;
29 28
@@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb)
33 ihl = iph->ihl * 4; 32 ihl = iph->ihl * 4;
34 skb->h.raw += ihl; 33 skb->h.raw += ihl;
35 34
36 x = skb->dst->xfrm;
37 skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); 35 skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
38 return 0; 36 return 0;
39} 37}
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 13cafbe56ce3..e23c21d31a53 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
33 * On exit, skb->h will be set to the start of the payload to be processed 33 * On exit, skb->h will be set to the start of the payload to be processed
34 * by x->type->output and skb->nh will be set to the top IP header. 34 * by x->type->output and skb->nh will be set to the top IP header.
35 */ 35 */
36static int xfrm4_tunnel_output(struct sk_buff *skb) 36static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
37{ 37{
38 struct dst_entry *dst = skb->dst; 38 struct dst_entry *dst = skb->dst;
39 struct xfrm_state *x = dst->xfrm;
40 struct iphdr *iph, *top_iph; 39 struct iphdr *iph, *top_iph;
41 int flags; 40 int flags;
42 41
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d16f863cf687..04403fb01a58 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -48,13 +48,13 @@ static int xfrm4_output_one(struct sk_buff *skb)
48 struct xfrm_state *x = dst->xfrm; 48 struct xfrm_state *x = dst->xfrm;
49 int err; 49 int err;
50 50
51 if (skb->ip_summed == CHECKSUM_HW) { 51 if (skb->ip_summed == CHECKSUM_PARTIAL) {
52 err = skb_checksum_help(skb, 0); 52 err = skb_checksum_help(skb);
53 if (err) 53 if (err)
54 goto error_nolock; 54 goto error_nolock;
55 } 55 }
56 56
57 if (x->props.mode) { 57 if (x->props.mode == XFRM_MODE_TUNNEL) {
58 err = xfrm4_tunnel_check_size(skb); 58 err = xfrm4_tunnel_check_size(skb);
59 if (err) 59 if (err)
60 goto error_nolock; 60 goto error_nolock;
@@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
66 if (err) 66 if (err)
67 goto error; 67 goto error;
68 68
69 err = x->mode->output(skb); 69 err = x->mode->output(x, skb);
70 if (err) 70 if (err)
71 goto error; 71 goto error;
72 72
@@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb)
85 } 85 }
86 dst = skb->dst; 86 dst = skb->dst;
87 x = dst->xfrm; 87 x = dst->xfrm;
88 } while (x && !x->props.mode); 88 } while (x && (x->props.mode != XFRM_MODE_TUNNEL));
89 89
90 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; 90 IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
91 err = 0; 91 err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8f50eae47d03..eabcd27b1767 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -21,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
21 return __ip_route_output_key((struct rtable**)dst, fl); 21 return __ip_route_output_key((struct rtable**)dst, fl);
22} 22}
23 23
24static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
25{
26 struct rtable *rt;
27 struct flowi fl_tunnel = {
28 .nl_u = {
29 .ip4_u = {
30 .daddr = daddr->a4,
31 },
32 },
33 };
34
35 if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
36 saddr->a4 = rt->rt_src;
37 dst_release(&rt->u.dst);
38 return 0;
39 }
40 return -EHOSTUNREACH;
41}
42
24static struct dst_entry * 43static struct dst_entry *
25__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) 44__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
26{ 45{
@@ -33,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
33 xdst->u.rt.fl.fl4_dst == fl->fl4_dst && 52 xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
34 xdst->u.rt.fl.fl4_src == fl->fl4_src && 53 xdst->u.rt.fl.fl4_src == fl->fl4_src &&
35 xdst->u.rt.fl.fl4_tos == fl->fl4_tos && 54 xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
36 xfrm_bundle_ok(xdst, fl, AF_INET)) { 55 xfrm_bundle_ok(xdst, fl, AF_INET, 0)) {
37 dst_clone(dst); 56 dst_clone(dst);
38 break; 57 break;
39 } 58 }
@@ -93,10 +112,11 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
93 112
94 xdst = (struct xfrm_dst *)dst1; 113 xdst = (struct xfrm_dst *)dst1;
95 xdst->route = &rt->u.dst; 114 xdst->route = &rt->u.dst;
115 xdst->genid = xfrm[i]->genid;
96 116
97 dst1->next = dst_prev; 117 dst1->next = dst_prev;
98 dst_prev = dst1; 118 dst_prev = dst1;
99 if (xfrm[i]->props.mode) { 119 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
100 remote = xfrm[i]->id.daddr.a4; 120 remote = xfrm[i]->id.daddr.a4;
101 local = xfrm[i]->props.saddr.a4; 121 local = xfrm[i]->props.saddr.a4;
102 tunnel = 1; 122 tunnel = 1;
@@ -135,6 +155,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
135 dst_prev->flags |= DST_HOST; 155 dst_prev->flags |= DST_HOST;
136 dst_prev->lastuse = jiffies; 156 dst_prev->lastuse = jiffies;
137 dst_prev->header_len = header_len; 157 dst_prev->header_len = header_len;
158 dst_prev->nfheader_len = 0;
138 dst_prev->trailer_len = trailer_len; 159 dst_prev->trailer_len = trailer_len;
139 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); 160 memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
140 161
@@ -296,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
296 .family = AF_INET, 317 .family = AF_INET,
297 .dst_ops = &xfrm4_dst_ops, 318 .dst_ops = &xfrm4_dst_ops,
298 .dst_lookup = xfrm4_dst_lookup, 319 .dst_lookup = xfrm4_dst_lookup,
320 .get_saddr = xfrm4_get_saddr,
299 .find_bundle = __xfrm4_find_bundle, 321 .find_bundle = __xfrm4_find_bundle,
300 .bundle_create = __xfrm4_bundle_create, 322 .bundle_create = __xfrm4_bundle_create,
301 .decode_session = _decode_session4, 323 .decode_session = _decode_session4,
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 81e1751c966e..fe2034494d08 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,99 +42,15 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
42 x->props.saddr = tmpl->saddr; 42 x->props.saddr = tmpl->saddr;
43 if (x->props.saddr.a4 == 0) 43 if (x->props.saddr.a4 == 0)
44 x->props.saddr.a4 = saddr->a4; 44 x->props.saddr.a4 = saddr->a4;
45 if (tmpl->mode && x->props.saddr.a4 == 0) {
46 struct rtable *rt;
47 struct flowi fl_tunnel = {
48 .nl_u = {
49 .ip4_u = {
50 .daddr = x->id.daddr.a4,
51 }
52 }
53 };
54 if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
55 &fl_tunnel, AF_INET)) {
56 x->props.saddr.a4 = rt->rt_src;
57 dst_release(&rt->u.dst);
58 }
59 }
60 x->props.mode = tmpl->mode; 45 x->props.mode = tmpl->mode;
61 x->props.reqid = tmpl->reqid; 46 x->props.reqid = tmpl->reqid;
62 x->props.family = AF_INET; 47 x->props.family = AF_INET;
63} 48}
64 49
65static struct xfrm_state *
66__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
67{
68 unsigned h = __xfrm4_spi_hash(daddr, spi, proto);
69 struct xfrm_state *x;
70
71 list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) {
72 if (x->props.family == AF_INET &&
73 spi == x->id.spi &&
74 daddr->a4 == x->id.daddr.a4 &&
75 proto == x->id.proto) {
76 xfrm_state_hold(x);
77 return x;
78 }
79 }
80 return NULL;
81}
82
83static struct xfrm_state *
84__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto,
85 xfrm_address_t *daddr, xfrm_address_t *saddr,
86 int create)
87{
88 struct xfrm_state *x, *x0;
89 unsigned h = __xfrm4_dst_hash(daddr);
90
91 x0 = NULL;
92
93 list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) {
94 if (x->props.family == AF_INET &&
95 daddr->a4 == x->id.daddr.a4 &&
96 mode == x->props.mode &&
97 proto == x->id.proto &&
98 saddr->a4 == x->props.saddr.a4 &&
99 reqid == x->props.reqid &&
100 x->km.state == XFRM_STATE_ACQ &&
101 !x->id.spi) {
102 x0 = x;
103 break;
104 }
105 }
106 if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
107 x0->sel.daddr.a4 = daddr->a4;
108 x0->sel.saddr.a4 = saddr->a4;
109 x0->sel.prefixlen_d = 32;
110 x0->sel.prefixlen_s = 32;
111 x0->props.saddr.a4 = saddr->a4;
112 x0->km.state = XFRM_STATE_ACQ;
113 x0->id.daddr.a4 = daddr->a4;
114 x0->id.proto = proto;
115 x0->props.family = AF_INET;
116 x0->props.mode = mode;
117 x0->props.reqid = reqid;
118 x0->props.family = AF_INET;
119 x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
120 xfrm_state_hold(x0);
121 x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
122 add_timer(&x0->timer);
123 xfrm_state_hold(x0);
124 list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
125 wake_up(&km_waitq);
126 }
127 if (x0)
128 xfrm_state_hold(x0);
129 return x0;
130}
131
132static struct xfrm_state_afinfo xfrm4_state_afinfo = { 50static struct xfrm_state_afinfo xfrm4_state_afinfo = {
133 .family = AF_INET, 51 .family = AF_INET,
134 .init_flags = xfrm4_init_flags, 52 .init_flags = xfrm4_init_flags,
135 .init_tempsel = __xfrm4_init_tempsel, 53 .init_tempsel = __xfrm4_init_tempsel,
136 .state_lookup = __xfrm4_state_lookup,
137 .find_acq = __xfrm4_find_acq,
138}; 54};
139 55
140void __init xfrm4_state_init(void) 56void __init xfrm4_state_init(void)
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index f8ceaa127c83..f110af5b1319 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb)
28 28
29static int ipip_init_state(struct xfrm_state *x) 29static int ipip_init_state(struct xfrm_state *x)
30{ 30{
31 if (!x->props.mode) 31 if (x->props.mode != XFRM_MODE_TUNNEL)
32 return -EINVAL; 32 return -EINVAL;
33 33
34 if (x->encap) 34 if (x->encap)