aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-06-11 23:00:44 -0400
committerDavid S. Miller <davem@davemloft.net>2009-06-11 23:00:44 -0400
commitadf76cfe24dab32a54e2dd1f51534cea8277f32a (patch)
tree6935c74a4b7237bd5f95918b3145ac57e0769fca /net
parent17d0cdfa8f3c09a110061c67421d662b3e149d0a (diff)
parent24992eacd8a9f4af286bdaaab627b6802ceb8bce (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
Diffstat (limited to 'net')
-rw-r--r--net/bridge/netfilter/ebtables.c18
-rw-r--r--net/ipv4/netfilter/arp_tables.c109
-rw-r--r--net/ipv4/netfilter/ip_queue.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c172
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c12
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c17
-rw-r--r--net/ipv6/netfilter/ip6_queue.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c170
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c17
-rw-r--r--net/netfilter/Kconfig13
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/nf_conntrack_core.c30
-rw-r--r--net/netfilter/nf_conntrack_ecache.c83
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c263
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c31
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c27
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c140
-rw-r--r--net/netfilter/nf_queue.c4
-rw-r--r--net/netfilter/nfnetlink.c28
-rw-r--r--net/netfilter/x_tables.c42
-rw-r--r--net/netfilter/xt_NFQUEUE.c93
-rw-r--r--net/netfilter/xt_osf.c428
-rw-r--r--net/netfilter/xt_socket.c63
25 files changed, 1189 insertions, 581 deletions
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 820252aee81f..37928d5f2840 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -142,6 +142,12 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h,
142 return 0; 142 return 0;
143} 143}
144 144
145static inline __pure
146struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
147{
148 return (void *)entry + entry->next_offset;
149}
150
145/* Do some firewalling */ 151/* Do some firewalling */
146unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, 152unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
147 const struct net_device *in, const struct net_device *out, 153 const struct net_device *in, const struct net_device *out,
@@ -164,7 +170,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
164 mtpar.in = tgpar.in = in; 170 mtpar.in = tgpar.in = in;
165 mtpar.out = tgpar.out = out; 171 mtpar.out = tgpar.out = out;
166 mtpar.hotdrop = &hotdrop; 172 mtpar.hotdrop = &hotdrop;
167 tgpar.hooknum = hook; 173 mtpar.hooknum = tgpar.hooknum = hook;
168 174
169 read_lock_bh(&table->lock); 175 read_lock_bh(&table->lock);
170 private = table->private; 176 private = table->private;
@@ -249,8 +255,7 @@ letsreturn:
249 /* jump to a udc */ 255 /* jump to a udc */
250 cs[sp].n = i + 1; 256 cs[sp].n = i + 1;
251 cs[sp].chaininfo = chaininfo; 257 cs[sp].chaininfo = chaininfo;
252 cs[sp].e = (struct ebt_entry *) 258 cs[sp].e = ebt_next_entry(point);
253 (((char *)point) + point->next_offset);
254 i = 0; 259 i = 0;
255 chaininfo = (struct ebt_entries *) (base + verdict); 260 chaininfo = (struct ebt_entries *) (base + verdict);
256#ifdef CONFIG_NETFILTER_DEBUG 261#ifdef CONFIG_NETFILTER_DEBUG
@@ -266,8 +271,7 @@ letsreturn:
266 sp++; 271 sp++;
267 continue; 272 continue;
268letscontinue: 273letscontinue:
269 point = (struct ebt_entry *) 274 point = ebt_next_entry(point);
270 (((char *)point) + point->next_offset);
271 i++; 275 i++;
272 } 276 }
273 277
@@ -787,7 +791,7 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s
787 /* this can't be 0, so the loop test is correct */ 791 /* this can't be 0, so the loop test is correct */
788 cl_s[i].cs.n = pos + 1; 792 cl_s[i].cs.n = pos + 1;
789 pos = 0; 793 pos = 0;
790 cl_s[i].cs.e = ((void *)e + e->next_offset); 794 cl_s[i].cs.e = ebt_next_entry(e);
791 e = (struct ebt_entry *)(hlp2->data); 795 e = (struct ebt_entry *)(hlp2->data);
792 nentries = hlp2->nentries; 796 nentries = hlp2->nentries;
793 cl_s[i].from = chain_nr; 797 cl_s[i].from = chain_nr;
@@ -797,7 +801,7 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s
797 continue; 801 continue;
798 } 802 }
799letscontinue: 803letscontinue:
800 e = (void *)e + e->next_offset; 804 e = ebt_next_entry(e);
801 pos++; 805 pos++;
802 } 806 }
803 return 0; 807 return 0;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 831fe1879dc0..7505dff4ffdf 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -231,6 +231,12 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
231 return (struct arpt_entry *)(base + offset); 231 return (struct arpt_entry *)(base + offset);
232} 232}
233 233
234static inline __pure
235struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
236{
237 return (void *)entry + entry->next_offset;
238}
239
234unsigned int arpt_do_table(struct sk_buff *skb, 240unsigned int arpt_do_table(struct sk_buff *skb,
235 unsigned int hook, 241 unsigned int hook,
236 const struct net_device *in, 242 const struct net_device *in,
@@ -267,67 +273,64 @@ unsigned int arpt_do_table(struct sk_buff *skb,
267 273
268 arp = arp_hdr(skb); 274 arp = arp_hdr(skb);
269 do { 275 do {
270 if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 276 struct arpt_entry_target *t;
271 struct arpt_entry_target *t; 277 int hdr_len;
272 int hdr_len;
273
274 hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
275 (2 * skb->dev->addr_len);
276 278
277 ADD_COUNTER(e->counters, hdr_len, 1); 279 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
280 e = arpt_next_entry(e);
281 continue;
282 }
278 283
279 t = arpt_get_target(e); 284 hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
285 (2 * skb->dev->addr_len);
286 ADD_COUNTER(e->counters, hdr_len, 1);
280 287
281 /* Standard target? */ 288 t = arpt_get_target(e);
282 if (!t->u.kernel.target->target) {
283 int v;
284 289
285 v = ((struct arpt_standard_target *)t)->verdict; 290 /* Standard target? */
286 if (v < 0) { 291 if (!t->u.kernel.target->target) {
287 /* Pop from stack? */ 292 int v;
288 if (v != ARPT_RETURN) {
289 verdict = (unsigned)(-v) - 1;
290 break;
291 }
292 e = back;
293 back = get_entry(table_base,
294 back->comefrom);
295 continue;
296 }
297 if (table_base + v
298 != (void *)e + e->next_offset) {
299 /* Save old back ptr in next entry */
300 struct arpt_entry *next
301 = (void *)e + e->next_offset;
302 next->comefrom =
303 (void *)back - table_base;
304
305 /* set back pointer to next entry */
306 back = next;
307 }
308 293
309 e = get_entry(table_base, v); 294 v = ((struct arpt_standard_target *)t)->verdict;
310 } else { 295 if (v < 0) {
311 /* Targets which reenter must return 296 /* Pop from stack? */
312 * abs. verdicts 297 if (v != ARPT_RETURN) {
313 */ 298 verdict = (unsigned)(-v) - 1;
314 tgpar.target = t->u.kernel.target;
315 tgpar.targinfo = t->data;
316 verdict = t->u.kernel.target->target(skb,
317 &tgpar);
318
319 /* Target might have changed stuff. */
320 arp = arp_hdr(skb);
321
322 if (verdict == ARPT_CONTINUE)
323 e = (void *)e + e->next_offset;
324 else
325 /* Verdict */
326 break; 299 break;
300 }
301 e = back;
302 back = get_entry(table_base, back->comefrom);
303 continue;
327 } 304 }
328 } else { 305 if (table_base + v
329 e = (void *)e + e->next_offset; 306 != arpt_next_entry(e)) {
307 /* Save old back ptr in next entry */
308 struct arpt_entry *next = arpt_next_entry(e);
309 next->comefrom = (void *)back - table_base;
310
311 /* set back pointer to next entry */
312 back = next;
313 }
314
315 e = get_entry(table_base, v);
316 continue;
330 } 317 }
318
319 /* Targets which reenter must return
320 * abs. verdicts
321 */
322 tgpar.target = t->u.kernel.target;
323 tgpar.targinfo = t->data;
324 verdict = t->u.kernel.target->target(skb, &tgpar);
325
326 /* Target might have changed stuff. */
327 arp = arp_hdr(skb);
328
329 if (verdict == ARPT_CONTINUE)
330 e = arpt_next_entry(e);
331 else
332 /* Verdict */
333 break;
331 } while (!hotdrop); 334 } while (!hotdrop);
332 xt_info_rdunlock_bh(); 335 xt_info_rdunlock_bh();
333 336
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 5f22c91c6e15..c156db215987 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -596,7 +596,7 @@ static int __init ip_queue_init(void)
596#ifdef CONFIG_SYSCTL 596#ifdef CONFIG_SYSCTL
597 ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); 597 ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
598#endif 598#endif
599 status = nf_register_queue_handler(PF_INET, &nfqh); 599 status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
600 if (status < 0) { 600 if (status < 0) {
601 printk(KERN_ERR "ip_queue: failed to register queue handler\n"); 601 printk(KERN_ERR "ip_queue: failed to register queue handler\n");
602 goto cleanup_sysctl; 602 goto cleanup_sysctl;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2ec8d7290c40..fdefae6b5dfc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -238,8 +238,8 @@ static struct nf_loginfo trace_loginfo = {
238/* Mildly perf critical (only if packet tracing is on) */ 238/* Mildly perf critical (only if packet tracing is on) */
239static inline int 239static inline int
240get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, 240get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
241 char *hookname, char **chainname, 241 const char *hookname, const char **chainname,
242 char **comment, unsigned int *rulenum) 242 const char **comment, unsigned int *rulenum)
243{ 243{
244 struct ipt_standard_target *t = (void *)ipt_get_target(s); 244 struct ipt_standard_target *t = (void *)ipt_get_target(s);
245 245
@@ -257,8 +257,8 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
257 && unconditional(&s->ip)) { 257 && unconditional(&s->ip)) {
258 /* Tail of chains: STANDARD target (return/policy) */ 258 /* Tail of chains: STANDARD target (return/policy) */
259 *comment = *chainname == hookname 259 *comment = *chainname == hookname
260 ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY] 260 ? comments[NF_IP_TRACE_COMMENT_POLICY]
261 : (char *)comments[NF_IP_TRACE_COMMENT_RETURN]; 261 : comments[NF_IP_TRACE_COMMENT_RETURN];
262 } 262 }
263 return 1; 263 return 1;
264 } else 264 } else
@@ -277,14 +277,14 @@ static void trace_packet(struct sk_buff *skb,
277{ 277{
278 void *table_base; 278 void *table_base;
279 const struct ipt_entry *root; 279 const struct ipt_entry *root;
280 char *hookname, *chainname, *comment; 280 const char *hookname, *chainname, *comment;
281 unsigned int rulenum = 0; 281 unsigned int rulenum = 0;
282 282
283 table_base = (void *)private->entries[smp_processor_id()]; 283 table_base = private->entries[smp_processor_id()];
284 root = get_entry(table_base, private->hook_entry[hook]); 284 root = get_entry(table_base, private->hook_entry[hook]);
285 285
286 hookname = chainname = (char *)hooknames[hook]; 286 hookname = chainname = hooknames[hook];
287 comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE]; 287 comment = comments[NF_IP_TRACE_COMMENT_RULE];
288 288
289 IPT_ENTRY_ITERATE(root, 289 IPT_ENTRY_ITERATE(root,
290 private->size - private->hook_entry[hook], 290 private->size - private->hook_entry[hook],
@@ -297,6 +297,12 @@ static void trace_packet(struct sk_buff *skb,
297} 297}
298#endif 298#endif
299 299
300static inline __pure
301struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
302{
303 return (void *)entry + entry->next_offset;
304}
305
300/* Returns one of the generic firewall policies, like NF_ACCEPT. */ 306/* Returns one of the generic firewall policies, like NF_ACCEPT. */
301unsigned int 307unsigned int
302ipt_do_table(struct sk_buff *skb, 308ipt_do_table(struct sk_buff *skb,
@@ -305,6 +311,8 @@ ipt_do_table(struct sk_buff *skb,
305 const struct net_device *out, 311 const struct net_device *out,
306 struct xt_table *table) 312 struct xt_table *table)
307{ 313{
314#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
315
308 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 316 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
309 const struct iphdr *ip; 317 const struct iphdr *ip;
310 u_int16_t datalen; 318 u_int16_t datalen;
@@ -335,7 +343,7 @@ ipt_do_table(struct sk_buff *skb,
335 mtpar.in = tgpar.in = in; 343 mtpar.in = tgpar.in = in;
336 mtpar.out = tgpar.out = out; 344 mtpar.out = tgpar.out = out;
337 mtpar.family = tgpar.family = NFPROTO_IPV4; 345 mtpar.family = tgpar.family = NFPROTO_IPV4;
338 tgpar.hooknum = hook; 346 mtpar.hooknum = tgpar.hooknum = hook;
339 347
340 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 348 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
341 xt_info_rdlock_bh(); 349 xt_info_rdlock_bh();
@@ -348,92 +356,84 @@ ipt_do_table(struct sk_buff *skb,
348 back = get_entry(table_base, private->underflow[hook]); 356 back = get_entry(table_base, private->underflow[hook]);
349 357
350 do { 358 do {
359 struct ipt_entry_target *t;
360
351 IP_NF_ASSERT(e); 361 IP_NF_ASSERT(e);
352 IP_NF_ASSERT(back); 362 IP_NF_ASSERT(back);
353 if (ip_packet_match(ip, indev, outdev, 363 if (!ip_packet_match(ip, indev, outdev,
354 &e->ip, mtpar.fragoff)) { 364 &e->ip, mtpar.fragoff) ||
355 struct ipt_entry_target *t; 365 IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
356 366 e = ipt_next_entry(e);
357 if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) 367 continue;
358 goto no_match; 368 }
359 369
360 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); 370 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
361 371
362 t = ipt_get_target(e); 372 t = ipt_get_target(e);
363 IP_NF_ASSERT(t->u.kernel.target); 373 IP_NF_ASSERT(t->u.kernel.target);
364 374
365#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 375#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
366 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 376 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
367 /* The packet is traced: log it */ 377 /* The packet is traced: log it */
368 if (unlikely(skb->nf_trace)) 378 if (unlikely(skb->nf_trace))
369 trace_packet(skb, hook, in, out, 379 trace_packet(skb, hook, in, out,
370 table->name, private, e); 380 table->name, private, e);
371#endif 381#endif
372 /* Standard target? */ 382 /* Standard target? */
373 if (!t->u.kernel.target->target) { 383 if (!t->u.kernel.target->target) {
374 int v; 384 int v;
375 385
376 v = ((struct ipt_standard_target *)t)->verdict; 386 v = ((struct ipt_standard_target *)t)->verdict;
377 if (v < 0) { 387 if (v < 0) {
378 /* Pop from stack? */ 388 /* Pop from stack? */
379 if (v != IPT_RETURN) { 389 if (v != IPT_RETURN) {
380 verdict = (unsigned)(-v) - 1; 390 verdict = (unsigned)(-v) - 1;
381 break; 391 break;
382 }
383 e = back;
384 back = get_entry(table_base,
385 back->comefrom);
386 continue;
387 }
388 if (table_base + v != (void *)e + e->next_offset
389 && !(e->ip.flags & IPT_F_GOTO)) {
390 /* Save old back ptr in next entry */
391 struct ipt_entry *next
392 = (void *)e + e->next_offset;
393 next->comefrom
394 = (void *)back - table_base;
395 /* set back pointer to next entry */
396 back = next;
397 } 392 }
393 e = back;
394 back = get_entry(table_base, back->comefrom);
395 continue;
396 }
397 if (table_base + v != ipt_next_entry(e)
398 && !(e->ip.flags & IPT_F_GOTO)) {
399 /* Save old back ptr in next entry */
400 struct ipt_entry *next = ipt_next_entry(e);
401 next->comefrom = (void *)back - table_base;
402 /* set back pointer to next entry */
403 back = next;
404 }
405
406 e = get_entry(table_base, v);
407 continue;
408 }
409
410 /* Targets which reenter must return
411 abs. verdicts */
412 tgpar.target = t->u.kernel.target;
413 tgpar.targinfo = t->data;
414
398 415
399 e = get_entry(table_base, v);
400 } else {
401 /* Targets which reenter must return
402 abs. verdicts */
403 tgpar.target = t->u.kernel.target;
404 tgpar.targinfo = t->data;
405#ifdef CONFIG_NETFILTER_DEBUG 416#ifdef CONFIG_NETFILTER_DEBUG
406 ((struct ipt_entry *)table_base)->comefrom 417 tb_comefrom = 0xeeeeeeec;
407 = 0xeeeeeeec;
408#endif 418#endif
409 verdict = t->u.kernel.target->target(skb, 419 verdict = t->u.kernel.target->target(skb, &tgpar);
410 &tgpar);
411#ifdef CONFIG_NETFILTER_DEBUG 420#ifdef CONFIG_NETFILTER_DEBUG
412 if (((struct ipt_entry *)table_base)->comefrom 421 if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
413 != 0xeeeeeeec 422 printk("Target %s reentered!\n",
414 && verdict == IPT_CONTINUE) { 423 t->u.kernel.target->name);
415 printk("Target %s reentered!\n", 424 verdict = NF_DROP;
416 t->u.kernel.target->name); 425 }
417 verdict = NF_DROP; 426 tb_comefrom = 0x57acc001;
418 }
419 ((struct ipt_entry *)table_base)->comefrom
420 = 0x57acc001;
421#endif 427#endif
422 /* Target might have changed stuff. */ 428 /* Target might have changed stuff. */
423 ip = ip_hdr(skb); 429 ip = ip_hdr(skb);
424 datalen = skb->len - ip->ihl * 4; 430 datalen = skb->len - ip->ihl * 4;
425
426 if (verdict == IPT_CONTINUE)
427 e = (void *)e + e->next_offset;
428 else
429 /* Verdict */
430 break;
431 }
432 } else {
433 431
434 no_match: 432 if (verdict == IPT_CONTINUE)
435 e = (void *)e + e->next_offset; 433 e = ipt_next_entry(e);
436 } 434 else
435 /* Verdict */
436 break;
437 } while (!hotdrop); 437 } while (!hotdrop);
438 xt_info_rdunlock_bh(); 438 xt_info_rdunlock_bh();
439 439
@@ -444,6 +444,8 @@ ipt_do_table(struct sk_buff *skb,
444 return NF_DROP; 444 return NF_DROP;
445 else return verdict; 445 else return verdict;
446#endif 446#endif
447
448#undef tb_comefrom
447} 449}
448 450
449/* Figures out from what hook each rule can be called: returns 0 if 451/* Figures out from what hook each rule can be called: returns 0 if
@@ -2158,7 +2160,7 @@ static bool icmp_checkentry(const struct xt_mtchk_param *par)
2158static struct xt_target ipt_standard_target __read_mostly = { 2160static struct xt_target ipt_standard_target __read_mostly = {
2159 .name = IPT_STANDARD_TARGET, 2161 .name = IPT_STANDARD_TARGET,
2160 .targetsize = sizeof(int), 2162 .targetsize = sizeof(int),
2161 .family = AF_INET, 2163 .family = NFPROTO_IPV4,
2162#ifdef CONFIG_COMPAT 2164#ifdef CONFIG_COMPAT
2163 .compatsize = sizeof(compat_int_t), 2165 .compatsize = sizeof(compat_int_t),
2164 .compat_from_user = compat_standard_from_user, 2166 .compat_from_user = compat_standard_from_user,
@@ -2170,7 +2172,7 @@ static struct xt_target ipt_error_target __read_mostly = {
2170 .name = IPT_ERROR_TARGET, 2172 .name = IPT_ERROR_TARGET,
2171 .target = ipt_error, 2173 .target = ipt_error,
2172 .targetsize = IPT_FUNCTION_MAXNAMELEN, 2174 .targetsize = IPT_FUNCTION_MAXNAMELEN,
2173 .family = AF_INET, 2175 .family = NFPROTO_IPV4,
2174}; 2176};
2175 2177
2176static struct nf_sockopt_ops ipt_sockopts = { 2178static struct nf_sockopt_ops ipt_sockopts = {
@@ -2196,17 +2198,17 @@ static struct xt_match icmp_matchstruct __read_mostly = {
2196 .matchsize = sizeof(struct ipt_icmp), 2198 .matchsize = sizeof(struct ipt_icmp),
2197 .checkentry = icmp_checkentry, 2199 .checkentry = icmp_checkentry,
2198 .proto = IPPROTO_ICMP, 2200 .proto = IPPROTO_ICMP,
2199 .family = AF_INET, 2201 .family = NFPROTO_IPV4,
2200}; 2202};
2201 2203
2202static int __net_init ip_tables_net_init(struct net *net) 2204static int __net_init ip_tables_net_init(struct net *net)
2203{ 2205{
2204 return xt_proto_init(net, AF_INET); 2206 return xt_proto_init(net, NFPROTO_IPV4);
2205} 2207}
2206 2208
2207static void __net_exit ip_tables_net_exit(struct net *net) 2209static void __net_exit ip_tables_net_exit(struct net *net)
2208{ 2210{
2209 xt_proto_fini(net, AF_INET); 2211 xt_proto_fini(net, NFPROTO_IPV4);
2210} 2212}
2211 2213
2212static struct pernet_operations ip_tables_net_ops = { 2214static struct pernet_operations ip_tables_net_ops = {
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index c0992c75bdac..dada0863946d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -27,9 +27,6 @@ MODULE_LICENSE("GPL");
27MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 27MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
28MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); 28MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
29 29
30/* Lock protects masq region inside conntrack */
31static DEFINE_RWLOCK(masq_lock);
32
33/* FIXME: Multiple targets. --RR */ 30/* FIXME: Multiple targets. --RR */
34static bool masquerade_tg_check(const struct xt_tgchk_param *par) 31static bool masquerade_tg_check(const struct xt_tgchk_param *par)
35{ 32{
@@ -79,9 +76,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
79 return NF_DROP; 76 return NF_DROP;
80 } 77 }
81 78
82 write_lock_bh(&masq_lock);
83 nat->masq_index = par->out->ifindex; 79 nat->masq_index = par->out->ifindex;
84 write_unlock_bh(&masq_lock);
85 80
86 /* Transfer from original range. */ 81 /* Transfer from original range. */
87 newrange = ((struct nf_nat_range) 82 newrange = ((struct nf_nat_range)
@@ -97,16 +92,11 @@ static int
97device_cmp(struct nf_conn *i, void *ifindex) 92device_cmp(struct nf_conn *i, void *ifindex)
98{ 93{
99 const struct nf_conn_nat *nat = nfct_nat(i); 94 const struct nf_conn_nat *nat = nfct_nat(i);
100 int ret;
101 95
102 if (!nat) 96 if (!nat)
103 return 0; 97 return 0;
104 98
105 read_lock_bh(&masq_lock); 99 return nat->masq_index == (int)(long)ifindex;
106 ret = (nat->masq_index == (int)(long)ifindex);
107 read_unlock_bh(&masq_lock);
108
109 return ret;
110} 100}
111 101
112static int masq_device_event(struct notifier_block *this, 102static int masq_device_event(struct notifier_block *this,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 23b2c2ee869a..d71ba7677344 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -82,18 +82,10 @@ static int icmp_packet(struct nf_conn *ct,
82 u_int8_t pf, 82 u_int8_t pf,
83 unsigned int hooknum) 83 unsigned int hooknum)
84{ 84{
85 /* Try to delete connection immediately after all replies: 85 /* Do not immediately delete the connection after the first
86 won't actually vanish as we still have skb, and del_timer 86 successful reply to avoid excessive conntrackd traffic
87 means this will only run once even if count hits zero twice 87 and also to handle correctly ICMP echo reply duplicates. */
88 (theoretically possible with SMP) */ 88 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
89 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
90 if (atomic_dec_and_test(&ct->proto.icmp.count))
91 nf_ct_kill_acct(ct, ctinfo, skb);
92 } else {
93 atomic_inc(&ct->proto.icmp.count);
94 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
95 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
96 }
97 89
98 return NF_ACCEPT; 90 return NF_ACCEPT;
99} 91}
@@ -117,7 +109,6 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
117 nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple); 109 nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
118 return false; 110 return false;
119 } 111 }
120 atomic_set(&ct->proto.icmp.count, 0);
121 return true; 112 return true;
122} 113}
123 114
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index b693f841aeb4..1cf3f0c6a959 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -598,7 +598,7 @@ static int __init ip6_queue_init(void)
598#ifdef CONFIG_SYSCTL 598#ifdef CONFIG_SYSCTL
599 ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table); 599 ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
600#endif 600#endif
601 status = nf_register_queue_handler(PF_INET6, &nfqh); 601 status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
602 if (status < 0) { 602 if (status < 0) {
603 printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); 603 printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
604 goto cleanup_sysctl; 604 goto cleanup_sysctl;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 219e165aea10..ced1f2c0cb65 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -270,8 +270,8 @@ static struct nf_loginfo trace_loginfo = {
270/* Mildly perf critical (only if packet tracing is on) */ 270/* Mildly perf critical (only if packet tracing is on) */
271static inline int 271static inline int
272get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, 272get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
273 char *hookname, char **chainname, 273 const char *hookname, const char **chainname,
274 char **comment, unsigned int *rulenum) 274 const char **comment, unsigned int *rulenum)
275{ 275{
276 struct ip6t_standard_target *t = (void *)ip6t_get_target(s); 276 struct ip6t_standard_target *t = (void *)ip6t_get_target(s);
277 277
@@ -289,8 +289,8 @@ get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e,
289 && unconditional(&s->ipv6)) { 289 && unconditional(&s->ipv6)) {
290 /* Tail of chains: STANDARD target (return/policy) */ 290 /* Tail of chains: STANDARD target (return/policy) */
291 *comment = *chainname == hookname 291 *comment = *chainname == hookname
292 ? (char *)comments[NF_IP6_TRACE_COMMENT_POLICY] 292 ? comments[NF_IP6_TRACE_COMMENT_POLICY]
293 : (char *)comments[NF_IP6_TRACE_COMMENT_RETURN]; 293 : comments[NF_IP6_TRACE_COMMENT_RETURN];
294 } 294 }
295 return 1; 295 return 1;
296 } else 296 } else
@@ -309,14 +309,14 @@ static void trace_packet(struct sk_buff *skb,
309{ 309{
310 void *table_base; 310 void *table_base;
311 const struct ip6t_entry *root; 311 const struct ip6t_entry *root;
312 char *hookname, *chainname, *comment; 312 const char *hookname, *chainname, *comment;
313 unsigned int rulenum = 0; 313 unsigned int rulenum = 0;
314 314
315 table_base = (void *)private->entries[smp_processor_id()]; 315 table_base = private->entries[smp_processor_id()];
316 root = get_entry(table_base, private->hook_entry[hook]); 316 root = get_entry(table_base, private->hook_entry[hook]);
317 317
318 hookname = chainname = (char *)hooknames[hook]; 318 hookname = chainname = hooknames[hook];
319 comment = (char *)comments[NF_IP6_TRACE_COMMENT_RULE]; 319 comment = comments[NF_IP6_TRACE_COMMENT_RULE];
320 320
321 IP6T_ENTRY_ITERATE(root, 321 IP6T_ENTRY_ITERATE(root,
322 private->size - private->hook_entry[hook], 322 private->size - private->hook_entry[hook],
@@ -329,6 +329,12 @@ static void trace_packet(struct sk_buff *skb,
329} 329}
330#endif 330#endif
331 331
332static inline __pure struct ip6t_entry *
333ip6t_next_entry(const struct ip6t_entry *entry)
334{
335 return (void *)entry + entry->next_offset;
336}
337
332/* Returns one of the generic firewall policies, like NF_ACCEPT. */ 338/* Returns one of the generic firewall policies, like NF_ACCEPT. */
333unsigned int 339unsigned int
334ip6t_do_table(struct sk_buff *skb, 340ip6t_do_table(struct sk_buff *skb,
@@ -337,6 +343,8 @@ ip6t_do_table(struct sk_buff *skb,
337 const struct net_device *out, 343 const struct net_device *out,
338 struct xt_table *table) 344 struct xt_table *table)
339{ 345{
346#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
347
340 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 348 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
341 bool hotdrop = false; 349 bool hotdrop = false;
342 /* Initializing verdict to NF_DROP keeps gcc happy. */ 350 /* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -361,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb,
361 mtpar.in = tgpar.in = in; 369 mtpar.in = tgpar.in = in;
362 mtpar.out = tgpar.out = out; 370 mtpar.out = tgpar.out = out;
363 mtpar.family = tgpar.family = NFPROTO_IPV6; 371 mtpar.family = tgpar.family = NFPROTO_IPV6;
364 tgpar.hooknum = hook; 372 mtpar.hooknum = tgpar.hooknum = hook;
365 373
366 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 374 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
367 375
@@ -375,96 +383,86 @@ ip6t_do_table(struct sk_buff *skb,
375 back = get_entry(table_base, private->underflow[hook]); 383 back = get_entry(table_base, private->underflow[hook]);
376 384
377 do { 385 do {
386 struct ip6t_entry_target *t;
387
378 IP_NF_ASSERT(e); 388 IP_NF_ASSERT(e);
379 IP_NF_ASSERT(back); 389 IP_NF_ASSERT(back);
380 if (ip6_packet_match(skb, indev, outdev, &e->ipv6, 390 if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
381 &mtpar.thoff, &mtpar.fragoff, &hotdrop)) { 391 &mtpar.thoff, &mtpar.fragoff, &hotdrop) ||
382 struct ip6t_entry_target *t; 392 IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
383 393 e = ip6t_next_entry(e);
384 if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) 394 continue;
385 goto no_match; 395 }
386 396
387 ADD_COUNTER(e->counters, 397 ADD_COUNTER(e->counters,
388 ntohs(ipv6_hdr(skb)->payload_len) + 398 ntohs(ipv6_hdr(skb)->payload_len) +
389 sizeof(struct ipv6hdr), 1); 399 sizeof(struct ipv6hdr), 1);
390 400
391 t = ip6t_get_target(e); 401 t = ip6t_get_target(e);
392 IP_NF_ASSERT(t->u.kernel.target); 402 IP_NF_ASSERT(t->u.kernel.target);
393 403
394#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 404#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
395 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 405 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
396 /* The packet is traced: log it */ 406 /* The packet is traced: log it */
397 if (unlikely(skb->nf_trace)) 407 if (unlikely(skb->nf_trace))
398 trace_packet(skb, hook, in, out, 408 trace_packet(skb, hook, in, out,
399 table->name, private, e); 409 table->name, private, e);
400#endif 410#endif
401 /* Standard target? */ 411 /* Standard target? */
402 if (!t->u.kernel.target->target) { 412 if (!t->u.kernel.target->target) {
403 int v; 413 int v;
404 414
405 v = ((struct ip6t_standard_target *)t)->verdict; 415 v = ((struct ip6t_standard_target *)t)->verdict;
406 if (v < 0) { 416 if (v < 0) {
407 /* Pop from stack? */ 417 /* Pop from stack? */
408 if (v != IP6T_RETURN) { 418 if (v != IP6T_RETURN) {
409 verdict = (unsigned)(-v) - 1; 419 verdict = (unsigned)(-v) - 1;
410 break; 420 break;
411 }
412 e = back;
413 back = get_entry(table_base,
414 back->comefrom);
415 continue;
416 }
417 if (table_base + v != (void *)e + e->next_offset
418 && !(e->ipv6.flags & IP6T_F_GOTO)) {
419 /* Save old back ptr in next entry */
420 struct ip6t_entry *next
421 = (void *)e + e->next_offset;
422 next->comefrom
423 = (void *)back - table_base;
424 /* set back pointer to next entry */
425 back = next;
426 } 421 }
422 e = back;
423 back = get_entry(table_base, back->comefrom);
424 continue;
425 }
426 if (table_base + v != ip6t_next_entry(e)
427 && !(e->ipv6.flags & IP6T_F_GOTO)) {
428 /* Save old back ptr in next entry */
429 struct ip6t_entry *next = ip6t_next_entry(e);
430 next->comefrom = (void *)back - table_base;
431 /* set back pointer to next entry */
432 back = next;
433 }
427 434
428 e = get_entry(table_base, v); 435 e = get_entry(table_base, v);
429 } else { 436 continue;
430 /* Targets which reenter must return 437 }
431 abs. verdicts */
432 tgpar.target = t->u.kernel.target;
433 tgpar.targinfo = t->data;
434 438
435#ifdef CONFIG_NETFILTER_DEBUG 439 /* Targets which reenter must return
436 ((struct ip6t_entry *)table_base)->comefrom 440 abs. verdicts */
437 = 0xeeeeeeec; 441 tgpar.target = t->u.kernel.target;
438#endif 442 tgpar.targinfo = t->data;
439 verdict = t->u.kernel.target->target(skb,
440 &tgpar);
441 443
442#ifdef CONFIG_NETFILTER_DEBUG 444#ifdef CONFIG_NETFILTER_DEBUG
443 if (((struct ip6t_entry *)table_base)->comefrom 445 tb_comefrom = 0xeeeeeeec;
444 != 0xeeeeeeec
445 && verdict == IP6T_CONTINUE) {
446 printk("Target %s reentered!\n",
447 t->u.kernel.target->name);
448 verdict = NF_DROP;
449 }
450 ((struct ip6t_entry *)table_base)->comefrom
451 = 0x57acc001;
452#endif 446#endif
453 if (verdict == IP6T_CONTINUE) 447 verdict = t->u.kernel.target->target(skb, &tgpar);
454 e = (void *)e + e->next_offset;
455 else
456 /* Verdict */
457 break;
458 }
459 } else {
460 448
461 no_match: 449#ifdef CONFIG_NETFILTER_DEBUG
462 e = (void *)e + e->next_offset; 450 if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
451 printk("Target %s reentered!\n",
452 t->u.kernel.target->name);
453 verdict = NF_DROP;
463 } 454 }
455 tb_comefrom = 0x57acc001;
456#endif
457 if (verdict == IP6T_CONTINUE)
458 e = ip6t_next_entry(e);
459 else
460 /* Verdict */
461 break;
464 } while (!hotdrop); 462 } while (!hotdrop);
465 463
466#ifdef CONFIG_NETFILTER_DEBUG 464#ifdef CONFIG_NETFILTER_DEBUG
467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; 465 tb_comefrom = NETFILTER_LINK_POISON;
468#endif 466#endif
469 xt_info_rdunlock_bh(); 467 xt_info_rdunlock_bh();
470 468
@@ -475,6 +473,8 @@ ip6t_do_table(struct sk_buff *skb,
475 return NF_DROP; 473 return NF_DROP;
476 else return verdict; 474 else return verdict;
477#endif 475#endif
476
477#undef tb_comefrom
478} 478}
479 479
480/* Figures out from what hook each rule can be called: returns 0 if 480/* Figures out from what hook each rule can be called: returns 0 if
@@ -2191,7 +2191,7 @@ static bool icmp6_checkentry(const struct xt_mtchk_param *par)
2191static struct xt_target ip6t_standard_target __read_mostly = { 2191static struct xt_target ip6t_standard_target __read_mostly = {
2192 .name = IP6T_STANDARD_TARGET, 2192 .name = IP6T_STANDARD_TARGET,
2193 .targetsize = sizeof(int), 2193 .targetsize = sizeof(int),
2194 .family = AF_INET6, 2194 .family = NFPROTO_IPV6,
2195#ifdef CONFIG_COMPAT 2195#ifdef CONFIG_COMPAT
2196 .compatsize = sizeof(compat_int_t), 2196 .compatsize = sizeof(compat_int_t),
2197 .compat_from_user = compat_standard_from_user, 2197 .compat_from_user = compat_standard_from_user,
@@ -2203,7 +2203,7 @@ static struct xt_target ip6t_error_target __read_mostly = {
2203 .name = IP6T_ERROR_TARGET, 2203 .name = IP6T_ERROR_TARGET,
2204 .target = ip6t_error, 2204 .target = ip6t_error,
2205 .targetsize = IP6T_FUNCTION_MAXNAMELEN, 2205 .targetsize = IP6T_FUNCTION_MAXNAMELEN,
2206 .family = AF_INET6, 2206 .family = NFPROTO_IPV6,
2207}; 2207};
2208 2208
2209static struct nf_sockopt_ops ip6t_sockopts = { 2209static struct nf_sockopt_ops ip6t_sockopts = {
@@ -2229,17 +2229,17 @@ static struct xt_match icmp6_matchstruct __read_mostly = {
2229 .matchsize = sizeof(struct ip6t_icmp), 2229 .matchsize = sizeof(struct ip6t_icmp),
2230 .checkentry = icmp6_checkentry, 2230 .checkentry = icmp6_checkentry,
2231 .proto = IPPROTO_ICMPV6, 2231 .proto = IPPROTO_ICMPV6,
2232 .family = AF_INET6, 2232 .family = NFPROTO_IPV6,
2233}; 2233};
2234 2234
2235static int __net_init ip6_tables_net_init(struct net *net) 2235static int __net_init ip6_tables_net_init(struct net *net)
2236{ 2236{
2237 return xt_proto_init(net, AF_INET6); 2237 return xt_proto_init(net, NFPROTO_IPV6);
2238} 2238}
2239 2239
2240static void __net_exit ip6_tables_net_exit(struct net *net) 2240static void __net_exit ip6_tables_net_exit(struct net *net)
2241{ 2241{
2242 xt_proto_fini(net, AF_INET6); 2242 xt_proto_fini(net, NFPROTO_IPV6);
2243} 2243}
2244 2244
2245static struct pernet_operations ip6_tables_net_ops = { 2245static struct pernet_operations ip6_tables_net_ops = {
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 9903227bf37c..642dcb127bab 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -95,18 +95,10 @@ static int icmpv6_packet(struct nf_conn *ct,
95 u_int8_t pf, 95 u_int8_t pf,
96 unsigned int hooknum) 96 unsigned int hooknum)
97{ 97{
98 /* Try to delete connection immediately after all replies: 98 /* Do not immediately delete the connection after the first
99 won't actually vanish as we still have skb, and del_timer 99 successful reply to avoid excessive conntrackd traffic
100 means this will only run once even if count hits zero twice 100 and also to handle correctly ICMP echo reply duplicates. */
101 (theoretically possible with SMP) */ 101 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
102 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
103 if (atomic_dec_and_test(&ct->proto.icmp.count))
104 nf_ct_kill_acct(ct, ctinfo, skb);
105 } else {
106 atomic_inc(&ct->proto.icmp.count);
107 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
108 nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout);
109 }
110 102
111 return NF_ACCEPT; 103 return NF_ACCEPT;
112} 104}
@@ -132,7 +124,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
132 type + 128); 124 type + 128);
133 return false; 125 return false;
134 } 126 }
135 atomic_set(&ct->proto.icmp.count, 0);
136 return true; 127 return true;
137} 128}
138 129
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index cb3ad741ebf8..79ba47f042c0 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -917,6 +917,19 @@ config NETFILTER_XT_MATCH_U32
917 917
918 Details and examples are in the kernel module source. 918 Details and examples are in the kernel module source.
919 919
920config NETFILTER_XT_MATCH_OSF
921 tristate '"osf" Passive OS fingerprint match'
922 depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
923 help
924 This option selects the Passive OS Fingerprinting match module
925 that allows to passively match the remote operating system by
926 analyzing incoming TCP SYN packets.
927
928 Rules and loading software can be downloaded from
929 http://www.ioremap.net/projects/osf
930
931 To compile it as a module, choose M here. If unsure, say N.
932
920endif # NETFILTER_XTABLES 933endif # NETFILTER_XTABLES
921 934
922endmenu 935endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6282060fbda9..49f62ee4e9ff 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
77obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o 77obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
78obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o 78obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
79obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o 79obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
80obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
80obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o 81obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
81obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o 82obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
82obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o 83obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8020db6274b8..edf95695e0aa 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -398,11 +398,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
398 help = nfct_help(ct); 398 help = nfct_help(ct);
399 if (help && help->helper) 399 if (help && help->helper)
400 nf_conntrack_event_cache(IPCT_HELPER, ct); 400 nf_conntrack_event_cache(IPCT_HELPER, ct);
401#ifdef CONFIG_NF_NAT_NEEDED 401
402 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
403 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
404 nf_conntrack_event_cache(IPCT_NATINFO, ct);
405#endif
406 nf_conntrack_event_cache(master_ct(ct) ? 402 nf_conntrack_event_cache(master_ct(ct) ?
407 IPCT_RELATED : IPCT_NEW, ct); 403 IPCT_RELATED : IPCT_NEW, ct);
408 return NF_ACCEPT; 404 return NF_ACCEPT;
@@ -523,6 +519,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
523 return ERR_PTR(-ENOMEM); 519 return ERR_PTR(-ENOMEM);
524 } 520 }
525 521
522 spin_lock_init(&ct->lock);
526 atomic_set(&ct->ct_general.use, 1); 523 atomic_set(&ct->ct_general.use, 1);
527 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 524 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
528 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 525 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
@@ -807,8 +804,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
807 unsigned long extra_jiffies, 804 unsigned long extra_jiffies,
808 int do_acct) 805 int do_acct)
809{ 806{
810 int event = 0;
811
812 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); 807 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
813 NF_CT_ASSERT(skb); 808 NF_CT_ASSERT(skb);
814 809
@@ -821,7 +816,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
821 /* If not in hash table, timer will not be active yet */ 816 /* If not in hash table, timer will not be active yet */
822 if (!nf_ct_is_confirmed(ct)) { 817 if (!nf_ct_is_confirmed(ct)) {
823 ct->timeout.expires = extra_jiffies; 818 ct->timeout.expires = extra_jiffies;
824 event = IPCT_REFRESH;
825 } else { 819 } else {
826 unsigned long newtime = jiffies + extra_jiffies; 820 unsigned long newtime = jiffies + extra_jiffies;
827 821
@@ -832,7 +826,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
832 && del_timer(&ct->timeout)) { 826 && del_timer(&ct->timeout)) {
833 ct->timeout.expires = newtime; 827 ct->timeout.expires = newtime;
834 add_timer(&ct->timeout); 828 add_timer(&ct->timeout);
835 event = IPCT_REFRESH;
836 } 829 }
837 } 830 }
838 831
@@ -849,10 +842,6 @@ acct:
849 } 842 }
850 843
851 spin_unlock_bh(&nf_conntrack_lock); 844 spin_unlock_bh(&nf_conntrack_lock);
852
853 /* must be unlocked when calling event cache */
854 if (event)
855 nf_conntrack_event_cache(event, ct);
856} 845}
857EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); 846EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
858 847
@@ -1001,7 +990,7 @@ struct __nf_ct_flush_report {
1001 int report; 990 int report;
1002}; 991};
1003 992
1004static int kill_all(struct nf_conn *i, void *data) 993static int kill_report(struct nf_conn *i, void *data)
1005{ 994{
1006 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 995 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
1007 996
@@ -1013,6 +1002,11 @@ static int kill_all(struct nf_conn *i, void *data)
1013 return 1; 1002 return 1;
1014} 1003}
1015 1004
1005static int kill_all(struct nf_conn *i, void *data)
1006{
1007 return 1;
1008}
1009
1016void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) 1010void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
1017{ 1011{
1018 if (vmalloced) 1012 if (vmalloced)
@@ -1023,15 +1017,15 @@ void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
1023} 1017}
1024EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 1018EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
1025 1019
1026void nf_conntrack_flush(struct net *net, u32 pid, int report) 1020void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
1027{ 1021{
1028 struct __nf_ct_flush_report fr = { 1022 struct __nf_ct_flush_report fr = {
1029 .pid = pid, 1023 .pid = pid,
1030 .report = report, 1024 .report = report,
1031 }; 1025 };
1032 nf_ct_iterate_cleanup(net, kill_all, &fr); 1026 nf_ct_iterate_cleanup(net, kill_report, &fr);
1033} 1027}
1034EXPORT_SYMBOL_GPL(nf_conntrack_flush); 1028EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
1035 1029
1036static void nf_conntrack_cleanup_init_net(void) 1030static void nf_conntrack_cleanup_init_net(void)
1037{ 1031{
@@ -1045,7 +1039,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
1045 nf_ct_event_cache_flush(net); 1039 nf_ct_event_cache_flush(net);
1046 nf_conntrack_ecache_fini(net); 1040 nf_conntrack_ecache_fini(net);
1047 i_see_dead_people: 1041 i_see_dead_people:
1048 nf_conntrack_flush(net, 0, 0); 1042 nf_ct_iterate_cleanup(net, kill_all, NULL);
1049 if (atomic_read(&net->ct.count) != 0) { 1043 if (atomic_read(&net->ct.count) != 0) {
1050 schedule(); 1044 schedule();
1051 goto i_see_dead_people; 1045 goto i_see_dead_people;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index dee4190209cc..5516b3e64b43 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -16,24 +16,32 @@
16#include <linux/stddef.h> 16#include <linux/stddef.h>
17#include <linux/err.h> 17#include <linux/err.h>
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/notifier.h>
20#include <linux/kernel.h> 19#include <linux/kernel.h>
21#include <linux/netdevice.h> 20#include <linux/netdevice.h>
22 21
23#include <net/netfilter/nf_conntrack.h> 22#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_conntrack_core.h> 23#include <net/netfilter/nf_conntrack_core.h>
25 24
26ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); 25static DEFINE_MUTEX(nf_ct_ecache_mutex);
27EXPORT_SYMBOL_GPL(nf_conntrack_chain);
28 26
29ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); 27struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
30EXPORT_SYMBOL_GPL(nf_ct_expect_chain); 28EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
29
30struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
31EXPORT_SYMBOL_GPL(nf_expect_event_cb);
31 32
32/* deliver cached events and clear cache entry - must be called with locally 33/* deliver cached events and clear cache entry - must be called with locally
33 * disabled softirqs */ 34 * disabled softirqs */
34static inline void 35static inline void
35__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) 36__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
36{ 37{
38 struct nf_ct_event_notifier *notify;
39
40 rcu_read_lock();
41 notify = rcu_dereference(nf_conntrack_event_cb);
42 if (notify == NULL)
43 goto out_unlock;
44
37 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) 45 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
38 && ecache->events) { 46 && ecache->events) {
39 struct nf_ct_event item = { 47 struct nf_ct_event item = {
@@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
42 .report = 0 50 .report = 0
43 }; 51 };
44 52
45 atomic_notifier_call_chain(&nf_conntrack_chain, 53 notify->fcn(ecache->events, &item);
46 ecache->events,
47 &item);
48 } 54 }
49 55
50 ecache->events = 0; 56 ecache->events = 0;
51 nf_ct_put(ecache->ct); 57 nf_ct_put(ecache->ct);
52 ecache->ct = NULL; 58 ecache->ct = NULL;
59
60out_unlock:
61 rcu_read_unlock();
53} 62}
54 63
55/* Deliver all cached events for a particular conntrack. This is called 64/* Deliver all cached events for a particular conntrack. This is called
@@ -111,26 +120,68 @@ void nf_conntrack_ecache_fini(struct net *net)
111 free_percpu(net->ct.ecache); 120 free_percpu(net->ct.ecache);
112} 121}
113 122
114int nf_conntrack_register_notifier(struct notifier_block *nb) 123int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
115{ 124{
116 return atomic_notifier_chain_register(&nf_conntrack_chain, nb); 125 int ret = 0;
126 struct nf_ct_event_notifier *notify;
127
128 mutex_lock(&nf_ct_ecache_mutex);
129 notify = rcu_dereference(nf_conntrack_event_cb);
130 if (notify != NULL) {
131 ret = -EBUSY;
132 goto out_unlock;
133 }
134 rcu_assign_pointer(nf_conntrack_event_cb, new);
135 mutex_unlock(&nf_ct_ecache_mutex);
136 return ret;
137
138out_unlock:
139 mutex_unlock(&nf_ct_ecache_mutex);
140 return ret;
117} 141}
118EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); 142EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
119 143
120int nf_conntrack_unregister_notifier(struct notifier_block *nb) 144void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
121{ 145{
122 return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb); 146 struct nf_ct_event_notifier *notify;
147
148 mutex_lock(&nf_ct_ecache_mutex);
149 notify = rcu_dereference(nf_conntrack_event_cb);
150 BUG_ON(notify != new);
151 rcu_assign_pointer(nf_conntrack_event_cb, NULL);
152 mutex_unlock(&nf_ct_ecache_mutex);
123} 153}
124EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); 154EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
125 155
126int nf_ct_expect_register_notifier(struct notifier_block *nb) 156int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
127{ 157{
128 return atomic_notifier_chain_register(&nf_ct_expect_chain, nb); 158 int ret = 0;
159 struct nf_exp_event_notifier *notify;
160
161 mutex_lock(&nf_ct_ecache_mutex);
162 notify = rcu_dereference(nf_expect_event_cb);
163 if (notify != NULL) {
164 ret = -EBUSY;
165 goto out_unlock;
166 }
167 rcu_assign_pointer(nf_expect_event_cb, new);
168 mutex_unlock(&nf_ct_ecache_mutex);
169 return ret;
170
171out_unlock:
172 mutex_unlock(&nf_ct_ecache_mutex);
173 return ret;
129} 174}
130EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); 175EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
131 176
132int nf_ct_expect_unregister_notifier(struct notifier_block *nb) 177void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
133{ 178{
134 return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb); 179 struct nf_exp_event_notifier *notify;
180
181 mutex_lock(&nf_ct_ecache_mutex);
182 notify = rcu_dereference(nf_expect_event_cb);
183 BUG_ON(notify != new);
184 rcu_assign_pointer(nf_expect_event_cb, NULL);
185 mutex_unlock(&nf_ct_ecache_mutex);
135} 186}
136EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); 187EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 00fecc385f9b..5509dd1f14cf 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -338,11 +338,9 @@ static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
338 338
339 if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { 339 if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
340 info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; 340 info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
341 nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
342 } else if (oldest != NUM_SEQ_TO_REMEMBER && 341 } else if (oldest != NUM_SEQ_TO_REMEMBER &&
343 after(nl_seq, info->seq_aft_nl[dir][oldest])) { 342 after(nl_seq, info->seq_aft_nl[dir][oldest])) {
344 info->seq_aft_nl[dir][oldest] = nl_seq; 343 info->seq_aft_nl[dir][oldest] = nl_seq;
345 nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
346 } 344 }
347} 345}
348 346
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c523f0b8cee5..4e503ada5728 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -27,7 +27,6 @@
27#include <linux/netlink.h> 27#include <linux/netlink.h>
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
30#include <linux/notifier.h>
31 30
32#include <linux/netfilter.h> 31#include <linux/netfilter.h>
33#include <net/netlink.h> 32#include <net/netlink.h>
@@ -144,7 +143,7 @@ nla_put_failure:
144} 143}
145 144
146static inline int 145static inline int
147ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) 146ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
148{ 147{
149 struct nf_conntrack_l4proto *l4proto; 148 struct nf_conntrack_l4proto *l4proto;
150 struct nlattr *nest_proto; 149 struct nlattr *nest_proto;
@@ -346,23 +345,21 @@ nla_put_failure:
346 return -1; 345 return -1;
347} 346}
348 347
349#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
350
351static int 348static int
352ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, 349ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
353 int event, int nowait, 350 int event, struct nf_conn *ct)
354 const struct nf_conn *ct)
355{ 351{
356 struct nlmsghdr *nlh; 352 struct nlmsghdr *nlh;
357 struct nfgenmsg *nfmsg; 353 struct nfgenmsg *nfmsg;
358 struct nlattr *nest_parms; 354 struct nlattr *nest_parms;
359 unsigned char *b = skb_tail_pointer(skb); 355 unsigned int flags = pid ? NLM_F_MULTI : 0;
360 356
361 event |= NFNL_SUBSYS_CTNETLINK << 8; 357 event |= NFNL_SUBSYS_CTNETLINK << 8;
362 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); 358 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
363 nfmsg = NLMSG_DATA(nlh); 359 if (nlh == NULL)
360 goto nlmsg_failure;
364 361
365 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; 362 nfmsg = nlmsg_data(nlh);
366 nfmsg->nfgen_family = nf_ct_l3num(ct); 363 nfmsg->nfgen_family = nf_ct_l3num(ct);
367 nfmsg->version = NFNETLINK_V0; 364 nfmsg->version = NFNETLINK_V0;
368 nfmsg->res_id = 0; 365 nfmsg->res_id = 0;
@@ -370,14 +367,14 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
370 nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); 367 nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
371 if (!nest_parms) 368 if (!nest_parms)
372 goto nla_put_failure; 369 goto nla_put_failure;
373 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) 370 if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
374 goto nla_put_failure; 371 goto nla_put_failure;
375 nla_nest_end(skb, nest_parms); 372 nla_nest_end(skb, nest_parms);
376 373
377 nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); 374 nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
378 if (!nest_parms) 375 if (!nest_parms)
379 goto nla_put_failure; 376 goto nla_put_failure;
380 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) 377 if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
381 goto nla_put_failure; 378 goto nla_put_failure;
382 nla_nest_end(skb, nest_parms); 379 nla_nest_end(skb, nest_parms);
383 380
@@ -395,104 +392,81 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
395 ctnetlink_dump_nat_seq_adj(skb, ct) < 0) 392 ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
396 goto nla_put_failure; 393 goto nla_put_failure;
397 394
398 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 395 nlmsg_end(skb, nlh);
399 return skb->len; 396 return skb->len;
400 397
401nlmsg_failure: 398nlmsg_failure:
402nla_put_failure: 399nla_put_failure:
403 nlmsg_trim(skb, b); 400 nlmsg_cancel(skb, nlh);
404 return -1; 401 return -1;
405} 402}
406 403
407#ifdef CONFIG_NF_CONNTRACK_EVENTS 404#ifdef CONFIG_NF_CONNTRACK_EVENTS
408/* 405static inline size_t
409 * The general structure of a ctnetlink event is 406ctnetlink_proto_size(const struct nf_conn *ct)
410 *
411 * CTA_TUPLE_ORIG
412 * <l3/l4-proto-attributes>
413 * CTA_TUPLE_REPLY
414 * <l3/l4-proto-attributes>
415 * CTA_ID
416 * ...
417 * CTA_PROTOINFO
418 * <l4-proto-attributes>
419 * CTA_TUPLE_MASTER
420 * <l3/l4-proto-attributes>
421 *
422 * Therefore the formular is
423 *
424 * size = sizeof(headers) + sizeof(generic_nlas) + 3 * sizeof(tuple_nlas)
425 * + sizeof(protoinfo_nlas)
426 */
427static struct sk_buff *
428ctnetlink_alloc_skb(const struct nf_conntrack_tuple *tuple, gfp_t gfp)
429{ 407{
430 struct nf_conntrack_l3proto *l3proto; 408 struct nf_conntrack_l3proto *l3proto;
431 struct nf_conntrack_l4proto *l4proto; 409 struct nf_conntrack_l4proto *l4proto;
432 int len; 410 size_t len = 0;
433 411
434#define NLA_TYPE_SIZE(type) nla_total_size(sizeof(type)) 412 rcu_read_lock();
435 413 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
436 /* proto independant part */ 414 len += l3proto->nla_size;
437 len = NLMSG_SPACE(sizeof(struct nfgenmsg)) 415
438 + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ 416 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
439 + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ 417 len += l4proto->nla_size;
440 + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */ 418 rcu_read_unlock();
441 + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */ 419
442 + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */ 420 return len;
443 + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */ 421}
422
423static inline size_t
424ctnetlink_nlmsg_size(const struct nf_conn *ct)
425{
426 return NLMSG_ALIGN(sizeof(struct nfgenmsg))
427 + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
428 + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
429 + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
430 + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
431 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
432 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
444#ifdef CONFIG_NF_CT_ACCT 433#ifdef CONFIG_NF_CT_ACCT
445 + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ 434 + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
446 + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */ 435 + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
447 + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */ 436 + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
448#endif 437#endif
449 + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */ 438 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
450 + nla_total_size(0) /* CTA_PROTOINFO */ 439 + nla_total_size(0) /* CTA_PROTOINFO */
451 + nla_total_size(0) /* CTA_HELP */ 440 + nla_total_size(0) /* CTA_HELP */
452 + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ 441 + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
453#ifdef CONFIG_NF_CONNTRACK_SECMARK 442#ifdef CONFIG_NF_CONNTRACK_SECMARK
454 + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */ 443 + nla_total_size(sizeof(u_int32_t)) /* CTA_SECMARK */
455#endif 444#endif
456#ifdef CONFIG_NF_NAT_NEEDED 445#ifdef CONFIG_NF_NAT_NEEDED
457 + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ 446 + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
458 + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */ 447 + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
459 + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */
460 + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */
461#endif 448#endif
462#ifdef CONFIG_NF_CONNTRACK_MARK 449#ifdef CONFIG_NF_CONNTRACK_MARK
463 + NLA_TYPE_SIZE(u_int32_t) /* CTA_MARK */ 450 + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
464#endif 451#endif
465 ; 452 + ctnetlink_proto_size(ct)
466 453 ;
467#undef NLA_TYPE_SIZE
468
469 rcu_read_lock();
470 l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
471 len += l3proto->nla_size;
472
473 l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
474 len += l4proto->nla_size;
475 rcu_read_unlock();
476
477 return alloc_skb(len, gfp);
478} 454}
479 455
480static int ctnetlink_conntrack_event(struct notifier_block *this, 456static int
481 unsigned long events, void *ptr) 457ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
482{ 458{
483 struct nlmsghdr *nlh; 459 struct nlmsghdr *nlh;
484 struct nfgenmsg *nfmsg; 460 struct nfgenmsg *nfmsg;
485 struct nlattr *nest_parms; 461 struct nlattr *nest_parms;
486 struct nf_ct_event *item = (struct nf_ct_event *)ptr;
487 struct nf_conn *ct = item->ct; 462 struct nf_conn *ct = item->ct;
488 struct sk_buff *skb; 463 struct sk_buff *skb;
489 unsigned int type; 464 unsigned int type;
490 sk_buff_data_t b;
491 unsigned int flags = 0, group; 465 unsigned int flags = 0, group;
492 466
493 /* ignore our fake conntrack entry */ 467 /* ignore our fake conntrack entry */
494 if (ct == &nf_conntrack_untracked) 468 if (ct == &nf_conntrack_untracked)
495 return NOTIFY_DONE; 469 return 0;
496 470
497 if (events & IPCT_DESTROY) { 471 if (events & IPCT_DESTROY) {
498 type = IPCTNL_MSG_CT_DELETE; 472 type = IPCTNL_MSG_CT_DELETE;
@@ -501,26 +475,25 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
501 type = IPCTNL_MSG_CT_NEW; 475 type = IPCTNL_MSG_CT_NEW;
502 flags = NLM_F_CREATE|NLM_F_EXCL; 476 flags = NLM_F_CREATE|NLM_F_EXCL;
503 group = NFNLGRP_CONNTRACK_NEW; 477 group = NFNLGRP_CONNTRACK_NEW;
504 } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { 478 } else if (events) {
505 type = IPCTNL_MSG_CT_NEW; 479 type = IPCTNL_MSG_CT_NEW;
506 group = NFNLGRP_CONNTRACK_UPDATE; 480 group = NFNLGRP_CONNTRACK_UPDATE;
507 } else 481 } else
508 return NOTIFY_DONE; 482 return 0;
509 483
510 if (!item->report && !nfnetlink_has_listeners(group)) 484 if (!item->report && !nfnetlink_has_listeners(group))
511 return NOTIFY_DONE; 485 return 0;
512 486
513 skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC); 487 skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
514 if (!skb) 488 if (skb == NULL)
515 goto errout; 489 goto errout;
516 490
517 b = skb->tail;
518
519 type |= NFNL_SUBSYS_CTNETLINK << 8; 491 type |= NFNL_SUBSYS_CTNETLINK << 8;
520 nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); 492 nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
521 nfmsg = NLMSG_DATA(nlh); 493 if (nlh == NULL)
494 goto nlmsg_failure;
522 495
523 nlh->nlmsg_flags = flags; 496 nfmsg = nlmsg_data(nlh);
524 nfmsg->nfgen_family = nf_ct_l3num(ct); 497 nfmsg->nfgen_family = nf_ct_l3num(ct);
525 nfmsg->version = NFNETLINK_V0; 498 nfmsg->version = NFNETLINK_V0;
526 nfmsg->res_id = 0; 499 nfmsg->res_id = 0;
@@ -529,14 +502,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
529 nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); 502 nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
530 if (!nest_parms) 503 if (!nest_parms)
531 goto nla_put_failure; 504 goto nla_put_failure;
532 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) 505 if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
533 goto nla_put_failure; 506 goto nla_put_failure;
534 nla_nest_end(skb, nest_parms); 507 nla_nest_end(skb, nest_parms);
535 508
536 nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); 509 nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
537 if (!nest_parms) 510 if (!nest_parms)
538 goto nla_put_failure; 511 goto nla_put_failure;
539 if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) 512 if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
540 goto nla_put_failure; 513 goto nla_put_failure;
541 nla_nest_end(skb, nest_parms); 514 nla_nest_end(skb, nest_parms);
542 515
@@ -584,17 +557,18 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
584#endif 557#endif
585 rcu_read_unlock(); 558 rcu_read_unlock();
586 559
587 nlh->nlmsg_len = skb->tail - b; 560 nlmsg_end(skb, nlh);
588 nfnetlink_send(skb, item->pid, group, item->report); 561 nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
589 return NOTIFY_DONE; 562 return 0;
590 563
591nla_put_failure: 564nla_put_failure:
592 rcu_read_unlock(); 565 rcu_read_unlock();
566 nlmsg_cancel(skb, nlh);
593nlmsg_failure: 567nlmsg_failure:
594 kfree_skb(skb); 568 kfree_skb(skb);
595errout: 569errout:
596 nfnetlink_set_err(0, group, -ENOBUFS); 570 nfnetlink_set_err(0, group, -ENOBUFS);
597 return NOTIFY_DONE; 571 return 0;
598} 572}
599#endif /* CONFIG_NF_CONNTRACK_EVENTS */ 573#endif /* CONFIG_NF_CONNTRACK_EVENTS */
600 574
@@ -611,7 +585,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
611 struct nf_conn *ct, *last; 585 struct nf_conn *ct, *last;
612 struct nf_conntrack_tuple_hash *h; 586 struct nf_conntrack_tuple_hash *h;
613 struct hlist_nulls_node *n; 587 struct hlist_nulls_node *n;
614 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); 588 struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
615 u_int8_t l3proto = nfmsg->nfgen_family; 589 u_int8_t l3proto = nfmsg->nfgen_family;
616 590
617 rcu_read_lock(); 591 rcu_read_lock();
@@ -637,8 +611,7 @@ restart:
637 } 611 }
638 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, 612 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
639 cb->nlh->nlmsg_seq, 613 cb->nlh->nlmsg_seq,
640 IPCTNL_MSG_CT_NEW, 614 IPCTNL_MSG_CT_NEW, ct) < 0) {
641 1, ct) < 0) {
642 cb->args[1] = (unsigned long)ct; 615 cb->args[1] = (unsigned long)ct;
643 goto out; 616 goto out;
644 } 617 }
@@ -792,7 +765,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
792 struct nf_conntrack_tuple_hash *h; 765 struct nf_conntrack_tuple_hash *h;
793 struct nf_conntrack_tuple tuple; 766 struct nf_conntrack_tuple tuple;
794 struct nf_conn *ct; 767 struct nf_conn *ct;
795 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 768 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
796 u_int8_t u3 = nfmsg->nfgen_family; 769 u_int8_t u3 = nfmsg->nfgen_family;
797 int err = 0; 770 int err = 0;
798 771
@@ -802,9 +775,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
802 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); 775 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
803 else { 776 else {
804 /* Flush the whole table */ 777 /* Flush the whole table */
805 nf_conntrack_flush(&init_net, 778 nf_conntrack_flush_report(&init_net,
806 NETLINK_CB(skb).pid, 779 NETLINK_CB(skb).pid,
807 nlmsg_report(nlh)); 780 nlmsg_report(nlh));
808 return 0; 781 return 0;
809 } 782 }
810 783
@@ -847,7 +820,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
847 struct nf_conntrack_tuple tuple; 820 struct nf_conntrack_tuple tuple;
848 struct nf_conn *ct; 821 struct nf_conn *ct;
849 struct sk_buff *skb2 = NULL; 822 struct sk_buff *skb2 = NULL;
850 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 823 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
851 u_int8_t u3 = nfmsg->nfgen_family; 824 u_int8_t u3 = nfmsg->nfgen_family;
852 int err = 0; 825 int err = 0;
853 826
@@ -872,15 +845,15 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
872 ct = nf_ct_tuplehash_to_ctrack(h); 845 ct = nf_ct_tuplehash_to_ctrack(h);
873 846
874 err = -ENOMEM; 847 err = -ENOMEM;
875 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 848 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
876 if (!skb2) { 849 if (skb2 == NULL) {
877 nf_ct_put(ct); 850 nf_ct_put(ct);
878 return -ENOMEM; 851 return -ENOMEM;
879 } 852 }
880 853
881 rcu_read_lock(); 854 rcu_read_lock();
882 err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 855 err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
883 IPCTNL_MSG_CT_NEW, 1, ct); 856 IPCTNL_MSG_CT_NEW, ct);
884 rcu_read_unlock(); 857 rcu_read_unlock();
885 nf_ct_put(ct); 858 nf_ct_put(ct);
886 if (err <= 0) 859 if (err <= 0)
@@ -1325,7 +1298,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1325{ 1298{
1326 struct nf_conntrack_tuple otuple, rtuple; 1299 struct nf_conntrack_tuple otuple, rtuple;
1327 struct nf_conntrack_tuple_hash *h = NULL; 1300 struct nf_conntrack_tuple_hash *h = NULL;
1328 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 1301 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1329 u_int8_t u3 = nfmsg->nfgen_family; 1302 u_int8_t u3 = nfmsg->nfgen_family;
1330 int err = 0; 1303 int err = 0;
1331 1304
@@ -1503,19 +1476,18 @@ nla_put_failure:
1503 1476
1504static int 1477static int
1505ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, 1478ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1506 int event, 1479 int event, const struct nf_conntrack_expect *exp)
1507 int nowait,
1508 const struct nf_conntrack_expect *exp)
1509{ 1480{
1510 struct nlmsghdr *nlh; 1481 struct nlmsghdr *nlh;
1511 struct nfgenmsg *nfmsg; 1482 struct nfgenmsg *nfmsg;
1512 unsigned char *b = skb_tail_pointer(skb); 1483 unsigned int flags = pid ? NLM_F_MULTI : 0;
1513 1484
1514 event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; 1485 event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
1515 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); 1486 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
1516 nfmsg = NLMSG_DATA(nlh); 1487 if (nlh == NULL)
1488 goto nlmsg_failure;
1517 1489
1518 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; 1490 nfmsg = nlmsg_data(nlh);
1519 nfmsg->nfgen_family = exp->tuple.src.l3num; 1491 nfmsg->nfgen_family = exp->tuple.src.l3num;
1520 nfmsg->version = NFNETLINK_V0; 1492 nfmsg->version = NFNETLINK_V0;
1521 nfmsg->res_id = 0; 1493 nfmsg->res_id = 0;
@@ -1523,49 +1495,46 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1523 if (ctnetlink_exp_dump_expect(skb, exp) < 0) 1495 if (ctnetlink_exp_dump_expect(skb, exp) < 0)
1524 goto nla_put_failure; 1496 goto nla_put_failure;
1525 1497
1526 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1498 nlmsg_end(skb, nlh);
1527 return skb->len; 1499 return skb->len;
1528 1500
1529nlmsg_failure: 1501nlmsg_failure:
1530nla_put_failure: 1502nla_put_failure:
1531 nlmsg_trim(skb, b); 1503 nlmsg_cancel(skb, nlh);
1532 return -1; 1504 return -1;
1533} 1505}
1534 1506
1535#ifdef CONFIG_NF_CONNTRACK_EVENTS 1507#ifdef CONFIG_NF_CONNTRACK_EVENTS
1536static int ctnetlink_expect_event(struct notifier_block *this, 1508static int
1537 unsigned long events, void *ptr) 1509ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
1538{ 1510{
1539 struct nlmsghdr *nlh; 1511 struct nlmsghdr *nlh;
1540 struct nfgenmsg *nfmsg; 1512 struct nfgenmsg *nfmsg;
1541 struct nf_exp_event *item = (struct nf_exp_event *)ptr;
1542 struct nf_conntrack_expect *exp = item->exp; 1513 struct nf_conntrack_expect *exp = item->exp;
1543 struct sk_buff *skb; 1514 struct sk_buff *skb;
1544 unsigned int type; 1515 unsigned int type;
1545 sk_buff_data_t b;
1546 int flags = 0; 1516 int flags = 0;
1547 1517
1548 if (events & IPEXP_NEW) { 1518 if (events & IPEXP_NEW) {
1549 type = IPCTNL_MSG_EXP_NEW; 1519 type = IPCTNL_MSG_EXP_NEW;
1550 flags = NLM_F_CREATE|NLM_F_EXCL; 1520 flags = NLM_F_CREATE|NLM_F_EXCL;
1551 } else 1521 } else
1552 return NOTIFY_DONE; 1522 return 0;
1553 1523
1554 if (!item->report && 1524 if (!item->report &&
1555 !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) 1525 !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
1556 return NOTIFY_DONE; 1526 return 0;
1557 1527
1558 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 1528 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1559 if (!skb) 1529 if (skb == NULL)
1560 goto errout; 1530 goto errout;
1561 1531
1562 b = skb->tail;
1563
1564 type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; 1532 type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
1565 nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); 1533 nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
1566 nfmsg = NLMSG_DATA(nlh); 1534 if (nlh == NULL)
1535 goto nlmsg_failure;
1567 1536
1568 nlh->nlmsg_flags = flags; 1537 nfmsg = nlmsg_data(nlh);
1569 nfmsg->nfgen_family = exp->tuple.src.l3num; 1538 nfmsg->nfgen_family = exp->tuple.src.l3num;
1570 nfmsg->version = NFNETLINK_V0; 1539 nfmsg->version = NFNETLINK_V0;
1571 nfmsg->res_id = 0; 1540 nfmsg->res_id = 0;
@@ -1575,17 +1544,19 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1575 goto nla_put_failure; 1544 goto nla_put_failure;
1576 rcu_read_unlock(); 1545 rcu_read_unlock();
1577 1546
1578 nlh->nlmsg_len = skb->tail - b; 1547 nlmsg_end(skb, nlh);
1579 nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report); 1548 nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
1580 return NOTIFY_DONE; 1549 item->report, GFP_ATOMIC);
1550 return 0;
1581 1551
1582nla_put_failure: 1552nla_put_failure:
1583 rcu_read_unlock(); 1553 rcu_read_unlock();
1554 nlmsg_cancel(skb, nlh);
1584nlmsg_failure: 1555nlmsg_failure:
1585 kfree_skb(skb); 1556 kfree_skb(skb);
1586errout: 1557errout:
1587 nfnetlink_set_err(0, 0, -ENOBUFS); 1558 nfnetlink_set_err(0, 0, -ENOBUFS);
1588 return NOTIFY_DONE; 1559 return 0;
1589} 1560}
1590#endif 1561#endif
1591static int ctnetlink_exp_done(struct netlink_callback *cb) 1562static int ctnetlink_exp_done(struct netlink_callback *cb)
@@ -1600,7 +1571,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
1600{ 1571{
1601 struct net *net = &init_net; 1572 struct net *net = &init_net;
1602 struct nf_conntrack_expect *exp, *last; 1573 struct nf_conntrack_expect *exp, *last;
1603 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); 1574 struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
1604 struct hlist_node *n; 1575 struct hlist_node *n;
1605 u_int8_t l3proto = nfmsg->nfgen_family; 1576 u_int8_t l3proto = nfmsg->nfgen_family;
1606 1577
@@ -1617,10 +1588,11 @@ restart:
1617 continue; 1588 continue;
1618 cb->args[1] = 0; 1589 cb->args[1] = 0;
1619 } 1590 }
1620 if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, 1591 if (ctnetlink_exp_fill_info(skb,
1592 NETLINK_CB(cb->skb).pid,
1621 cb->nlh->nlmsg_seq, 1593 cb->nlh->nlmsg_seq,
1622 IPCTNL_MSG_EXP_NEW, 1594 IPCTNL_MSG_EXP_NEW,
1623 1, exp) < 0) { 1595 exp) < 0) {
1624 if (!atomic_inc_not_zero(&exp->use)) 1596 if (!atomic_inc_not_zero(&exp->use))
1625 continue; 1597 continue;
1626 cb->args[1] = (unsigned long)exp; 1598 cb->args[1] = (unsigned long)exp;
@@ -1652,7 +1624,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1652 struct nf_conntrack_tuple tuple; 1624 struct nf_conntrack_tuple tuple;
1653 struct nf_conntrack_expect *exp; 1625 struct nf_conntrack_expect *exp;
1654 struct sk_buff *skb2; 1626 struct sk_buff *skb2;
1655 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 1627 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1656 u_int8_t u3 = nfmsg->nfgen_family; 1628 u_int8_t u3 = nfmsg->nfgen_family;
1657 int err = 0; 1629 int err = 0;
1658 1630
@@ -1683,14 +1655,13 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1683 } 1655 }
1684 1656
1685 err = -ENOMEM; 1657 err = -ENOMEM;
1686 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1658 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1687 if (!skb2) 1659 if (skb2 == NULL)
1688 goto out; 1660 goto out;
1689 1661
1690 rcu_read_lock(); 1662 rcu_read_lock();
1691 err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, 1663 err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
1692 nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, 1664 nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
1693 1, exp);
1694 rcu_read_unlock(); 1665 rcu_read_unlock();
1695 if (err <= 0) 1666 if (err <= 0)
1696 goto free; 1667 goto free;
@@ -1713,7 +1684,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1713 struct nf_conntrack_expect *exp; 1684 struct nf_conntrack_expect *exp;
1714 struct nf_conntrack_tuple tuple; 1685 struct nf_conntrack_tuple tuple;
1715 struct nf_conntrack_helper *h; 1686 struct nf_conntrack_helper *h;
1716 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 1687 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1717 struct hlist_node *n, *next; 1688 struct hlist_node *n, *next;
1718 u_int8_t u3 = nfmsg->nfgen_family; 1689 u_int8_t u3 = nfmsg->nfgen_family;
1719 unsigned int i; 1690 unsigned int i;
@@ -1854,7 +1825,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
1854{ 1825{
1855 struct nf_conntrack_tuple tuple; 1826 struct nf_conntrack_tuple tuple;
1856 struct nf_conntrack_expect *exp; 1827 struct nf_conntrack_expect *exp;
1857 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); 1828 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1858 u_int8_t u3 = nfmsg->nfgen_family; 1829 u_int8_t u3 = nfmsg->nfgen_family;
1859 int err = 0; 1830 int err = 0;
1860 1831
@@ -1891,12 +1862,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
1891} 1862}
1892 1863
1893#ifdef CONFIG_NF_CONNTRACK_EVENTS 1864#ifdef CONFIG_NF_CONNTRACK_EVENTS
1894static struct notifier_block ctnl_notifier = { 1865static struct nf_ct_event_notifier ctnl_notifier = {
1895 .notifier_call = ctnetlink_conntrack_event, 1866 .fcn = ctnetlink_conntrack_event,
1896}; 1867};
1897 1868
1898static struct notifier_block ctnl_notifier_exp = { 1869static struct nf_exp_event_notifier ctnl_notifier_exp = {
1899 .notifier_call = ctnetlink_expect_event, 1870 .fcn = ctnetlink_expect_event,
1900}; 1871};
1901#endif 1872#endif
1902 1873
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index aee0d6bea309..1b816a2ea813 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -25,8 +25,6 @@
25#include <net/netfilter/nf_conntrack_ecache.h> 25#include <net/netfilter/nf_conntrack_ecache.h>
26#include <net/netfilter/nf_log.h> 26#include <net/netfilter/nf_log.h>
27 27
28static DEFINE_RWLOCK(dccp_lock);
29
30/* Timeouts are based on values from RFC4340: 28/* Timeouts are based on values from RFC4340:
31 * 29 *
32 * - REQUEST: 30 * - REQUEST:
@@ -492,7 +490,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
492 return NF_ACCEPT; 490 return NF_ACCEPT;
493 } 491 }
494 492
495 write_lock_bh(&dccp_lock); 493 spin_lock_bh(&ct->lock);
496 494
497 role = ct->proto.dccp.role[dir]; 495 role = ct->proto.dccp.role[dir];
498 old_state = ct->proto.dccp.state; 496 old_state = ct->proto.dccp.state;
@@ -536,13 +534,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
536 ct->proto.dccp.last_dir = dir; 534 ct->proto.dccp.last_dir = dir;
537 ct->proto.dccp.last_pkt = type; 535 ct->proto.dccp.last_pkt = type;
538 536
539 write_unlock_bh(&dccp_lock); 537 spin_unlock_bh(&ct->lock);
540 if (LOG_INVALID(net, IPPROTO_DCCP)) 538 if (LOG_INVALID(net, IPPROTO_DCCP))
541 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 539 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
542 "nf_ct_dccp: invalid packet ignored "); 540 "nf_ct_dccp: invalid packet ignored ");
543 return NF_ACCEPT; 541 return NF_ACCEPT;
544 case CT_DCCP_INVALID: 542 case CT_DCCP_INVALID:
545 write_unlock_bh(&dccp_lock); 543 spin_unlock_bh(&ct->lock);
546 if (LOG_INVALID(net, IPPROTO_DCCP)) 544 if (LOG_INVALID(net, IPPROTO_DCCP))
547 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 545 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
548 "nf_ct_dccp: invalid state transition "); 546 "nf_ct_dccp: invalid state transition ");
@@ -552,7 +550,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
552 ct->proto.dccp.last_dir = dir; 550 ct->proto.dccp.last_dir = dir;
553 ct->proto.dccp.last_pkt = type; 551 ct->proto.dccp.last_pkt = type;
554 ct->proto.dccp.state = new_state; 552 ct->proto.dccp.state = new_state;
555 write_unlock_bh(&dccp_lock); 553 spin_unlock_bh(&ct->lock);
556 554
557 if (new_state != old_state) 555 if (new_state != old_state)
558 nf_conntrack_event_cache(IPCT_PROTOINFO, ct); 556 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
@@ -621,36 +619,39 @@ static int dccp_print_tuple(struct seq_file *s,
621 ntohs(tuple->dst.u.dccp.port)); 619 ntohs(tuple->dst.u.dccp.port));
622} 620}
623 621
624static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) 622static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
625{ 623{
626 return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); 624 return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
627} 625}
628 626
629#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 627#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
630static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, 628static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
631 const struct nf_conn *ct) 629 struct nf_conn *ct)
632{ 630{
633 struct nlattr *nest_parms; 631 struct nlattr *nest_parms;
634 632
635 read_lock_bh(&dccp_lock); 633 spin_lock_bh(&ct->lock);
636 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); 634 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
637 if (!nest_parms) 635 if (!nest_parms)
638 goto nla_put_failure; 636 goto nla_put_failure;
639 NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); 637 NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state);
640 NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, 638 NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE,
641 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); 639 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]);
640 NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
641 cpu_to_be64(ct->proto.dccp.handshake_seq));
642 nla_nest_end(skb, nest_parms); 642 nla_nest_end(skb, nest_parms);
643 read_unlock_bh(&dccp_lock); 643 spin_unlock_bh(&ct->lock);
644 return 0; 644 return 0;
645 645
646nla_put_failure: 646nla_put_failure:
647 read_unlock_bh(&dccp_lock); 647 spin_unlock_bh(&ct->lock);
648 return -1; 648 return -1;
649} 649}
650 650
651static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { 651static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
652 [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, 652 [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 },
653 [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, 653 [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 },
654 [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 },
654}; 655};
655 656
656static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) 657static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
@@ -674,7 +675,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
674 return -EINVAL; 675 return -EINVAL;
675 } 676 }
676 677
677 write_lock_bh(&dccp_lock); 678 spin_lock_bh(&ct->lock);
678 ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); 679 ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
679 if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { 680 if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
680 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; 681 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
@@ -683,7 +684,11 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
683 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; 684 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
684 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; 685 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
685 } 686 }
686 write_unlock_bh(&dccp_lock); 687 if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) {
688 ct->proto.dccp.handshake_seq =
689 be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
690 }
691 spin_unlock_bh(&ct->lock);
687 return 0; 692 return 0;
688} 693}
689 694
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a6d6ec320fbc..a54a0af0edba 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -219,8 +219,7 @@ static int gre_print_tuple(struct seq_file *s,
219} 219}
220 220
221/* print private data for conntrack */ 221/* print private data for conntrack */
222static int gre_print_conntrack(struct seq_file *s, 222static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
223 const struct nf_conn *ct)
224{ 223{
225 return seq_printf(s, "timeout=%u, stream_timeout=%u ", 224 return seq_printf(s, "timeout=%u, stream_timeout=%u ",
226 (ct->proto.gre.timeout / HZ), 225 (ct->proto.gre.timeout / HZ),
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 101b4ad9e817..c10e6f36e31e 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -25,9 +25,6 @@
25#include <net/netfilter/nf_conntrack_l4proto.h> 25#include <net/netfilter/nf_conntrack_l4proto.h>
26#include <net/netfilter/nf_conntrack_ecache.h> 26#include <net/netfilter/nf_conntrack_ecache.h>
27 27
28/* Protects ct->proto.sctp */
29static DEFINE_RWLOCK(sctp_lock);
30
31/* FIXME: Examine ipfilter's timeouts and conntrack transitions more 28/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
32 closely. They're more complex. --RR 29 closely. They're more complex. --RR
33 30
@@ -164,13 +161,13 @@ static int sctp_print_tuple(struct seq_file *s,
164} 161}
165 162
166/* Print out the private part of the conntrack. */ 163/* Print out the private part of the conntrack. */
167static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) 164static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
168{ 165{
169 enum sctp_conntrack state; 166 enum sctp_conntrack state;
170 167
171 read_lock_bh(&sctp_lock); 168 spin_lock_bh(&ct->lock);
172 state = ct->proto.sctp.state; 169 state = ct->proto.sctp.state;
173 read_unlock_bh(&sctp_lock); 170 spin_unlock_bh(&ct->lock);
174 171
175 return seq_printf(s, "%s ", sctp_conntrack_names[state]); 172 return seq_printf(s, "%s ", sctp_conntrack_names[state]);
176} 173}
@@ -318,7 +315,7 @@ static int sctp_packet(struct nf_conn *ct,
318 } 315 }
319 316
320 old_state = new_state = SCTP_CONNTRACK_NONE; 317 old_state = new_state = SCTP_CONNTRACK_NONE;
321 write_lock_bh(&sctp_lock); 318 spin_lock_bh(&ct->lock);
322 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 319 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
323 /* Special cases of Verification tag check (Sec 8.5.1) */ 320 /* Special cases of Verification tag check (Sec 8.5.1) */
324 if (sch->type == SCTP_CID_INIT) { 321 if (sch->type == SCTP_CID_INIT) {
@@ -371,7 +368,7 @@ static int sctp_packet(struct nf_conn *ct,
371 if (old_state != new_state) 368 if (old_state != new_state)
372 nf_conntrack_event_cache(IPCT_PROTOINFO, ct); 369 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
373 } 370 }
374 write_unlock_bh(&sctp_lock); 371 spin_unlock_bh(&ct->lock);
375 372
376 nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]); 373 nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
377 374
@@ -386,7 +383,7 @@ static int sctp_packet(struct nf_conn *ct,
386 return NF_ACCEPT; 383 return NF_ACCEPT;
387 384
388out_unlock: 385out_unlock:
389 write_unlock_bh(&sctp_lock); 386 spin_unlock_bh(&ct->lock);
390out: 387out:
391 return -NF_ACCEPT; 388 return -NF_ACCEPT;
392} 389}
@@ -469,11 +466,11 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
469#include <linux/netfilter/nfnetlink_conntrack.h> 466#include <linux/netfilter/nfnetlink_conntrack.h>
470 467
471static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, 468static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
472 const struct nf_conn *ct) 469 struct nf_conn *ct)
473{ 470{
474 struct nlattr *nest_parms; 471 struct nlattr *nest_parms;
475 472
476 read_lock_bh(&sctp_lock); 473 spin_lock_bh(&ct->lock);
477 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); 474 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
478 if (!nest_parms) 475 if (!nest_parms)
479 goto nla_put_failure; 476 goto nla_put_failure;
@@ -488,14 +485,14 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
488 CTA_PROTOINFO_SCTP_VTAG_REPLY, 485 CTA_PROTOINFO_SCTP_VTAG_REPLY,
489 ct->proto.sctp.vtag[IP_CT_DIR_REPLY]); 486 ct->proto.sctp.vtag[IP_CT_DIR_REPLY]);
490 487
491 read_unlock_bh(&sctp_lock); 488 spin_unlock_bh(&ct->lock);
492 489
493 nla_nest_end(skb, nest_parms); 490 nla_nest_end(skb, nest_parms);
494 491
495 return 0; 492 return 0;
496 493
497nla_put_failure: 494nla_put_failure:
498 read_unlock_bh(&sctp_lock); 495 spin_unlock_bh(&ct->lock);
499 return -1; 496 return -1;
500} 497}
501 498
@@ -527,13 +524,13 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
527 !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) 524 !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
528 return -EINVAL; 525 return -EINVAL;
529 526
530 write_lock_bh(&sctp_lock); 527 spin_lock_bh(&ct->lock);
531 ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); 528 ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
532 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = 529 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
533 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); 530 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
534 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = 531 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
535 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); 532 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
536 write_unlock_bh(&sctp_lock); 533 spin_unlock_bh(&ct->lock);
537 534
538 return 0; 535 return 0;
539} 536}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 97a6e93d742e..33fc0a443f3d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -29,9 +29,6 @@
29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31 31
32/* Protects ct->proto.tcp */
33static DEFINE_RWLOCK(tcp_lock);
34
35/* "Be conservative in what you do, 32/* "Be conservative in what you do,
36 be liberal in what you accept from others." 33 be liberal in what you accept from others."
37 If it's non-zero, we mark only out of window RST segments as INVALID. */ 34 If it's non-zero, we mark only out of window RST segments as INVALID. */
@@ -59,7 +56,7 @@ static const char *const tcp_conntrack_names[] = {
59 "LAST_ACK", 56 "LAST_ACK",
60 "TIME_WAIT", 57 "TIME_WAIT",
61 "CLOSE", 58 "CLOSE",
62 "LISTEN" 59 "SYN_SENT2",
63}; 60};
64 61
65#define SECS * HZ 62#define SECS * HZ
@@ -82,6 +79,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
82 [TCP_CONNTRACK_LAST_ACK] = 30 SECS, 79 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
83 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, 80 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
84 [TCP_CONNTRACK_CLOSE] = 10 SECS, 81 [TCP_CONNTRACK_CLOSE] = 10 SECS,
82 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
85}; 83};
86 84
87#define sNO TCP_CONNTRACK_NONE 85#define sNO TCP_CONNTRACK_NONE
@@ -93,7 +91,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
93#define sLA TCP_CONNTRACK_LAST_ACK 91#define sLA TCP_CONNTRACK_LAST_ACK
94#define sTW TCP_CONNTRACK_TIME_WAIT 92#define sTW TCP_CONNTRACK_TIME_WAIT
95#define sCL TCP_CONNTRACK_CLOSE 93#define sCL TCP_CONNTRACK_CLOSE
96#define sLI TCP_CONNTRACK_LISTEN 94#define sS2 TCP_CONNTRACK_SYN_SENT2
97#define sIV TCP_CONNTRACK_MAX 95#define sIV TCP_CONNTRACK_MAX
98#define sIG TCP_CONNTRACK_IGNORE 96#define sIG TCP_CONNTRACK_IGNORE
99 97
@@ -123,6 +121,7 @@ enum tcp_bit_set {
123 * 121 *
124 * NONE: initial state 122 * NONE: initial state
125 * SYN_SENT: SYN-only packet seen 123 * SYN_SENT: SYN-only packet seen
124 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
126 * SYN_RECV: SYN-ACK packet seen 125 * SYN_RECV: SYN-ACK packet seen
127 * ESTABLISHED: ACK packet seen 126 * ESTABLISHED: ACK packet seen
128 * FIN_WAIT: FIN packet seen 127 * FIN_WAIT: FIN packet seen
@@ -131,26 +130,24 @@ enum tcp_bit_set {
131 * TIME_WAIT: last ACK seen 130 * TIME_WAIT: last ACK seen
132 * CLOSE: closed connection (RST) 131 * CLOSE: closed connection (RST)
133 * 132 *
134 * LISTEN state is not used.
135 *
136 * Packets marked as IGNORED (sIG): 133 * Packets marked as IGNORED (sIG):
137 * if they may be either invalid or valid 134 * if they may be either invalid or valid
138 * and the receiver may send back a connection 135 * and the receiver may send back a connection
139 * closing RST or a SYN/ACK. 136 * closing RST or a SYN/ACK.
140 * 137 *
141 * Packets marked as INVALID (sIV): 138 * Packets marked as INVALID (sIV):
142 * if they are invalid 139 * if we regard them as truly invalid packets
143 * or we do not support the request (simultaneous open)
144 */ 140 */
145static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { 141static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
146 { 142 {
147/* ORIGINAL */ 143/* ORIGINAL */
148/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 144/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
149/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, 145/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
150/* 146/*
151 * sNO -> sSS Initialize a new connection 147 * sNO -> sSS Initialize a new connection
152 * sSS -> sSS Retransmitted SYN 148 * sSS -> sSS Retransmitted SYN
153 * sSR -> sIG Late retransmitted SYN? 149 * sS2 -> sS2 Late retransmitted SYN
150 * sSR -> sIG
154 * sES -> sIG Error: SYNs in window outside the SYN_SENT state 151 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
155 * are errors. Receiver will reply with RST 152 * are errors. Receiver will reply with RST
156 * and close the connection. 153 * and close the connection.
@@ -161,22 +158,30 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
161 * sTW -> sSS Reopened connection (RFC 1122). 158 * sTW -> sSS Reopened connection (RFC 1122).
162 * sCL -> sSS 159 * sCL -> sSS
163 */ 160 */
164/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 161/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
165/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, 162/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
166/* 163/*
167 * A SYN/ACK from the client is always invalid: 164 * sNO -> sIV Too late and no reason to do anything
168 * - either it tries to set up a simultaneous open, which is 165 * sSS -> sIV Client can't send SYN and then SYN/ACK
169 * not supported; 166 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
170 * - or the firewall has just been inserted between the two hosts 167 * sSR -> sIG
171 * during the session set-up. The SYN will be retransmitted 168 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
172 * by the true client (or it'll time out). 169 * are errors. Receiver will reply with RST
170 * and close the connection.
171 * Or we are not in sync and hold a dead connection.
172 * sFW -> sIG
173 * sCW -> sIG
174 * sLA -> sIG
175 * sTW -> sIG
176 * sCL -> sIG
173 */ 177 */
174/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 178/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
175/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, 179/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
176/* 180/*
177 * sNO -> sIV Too late and no reason to do anything... 181 * sNO -> sIV Too late and no reason to do anything...
178 * sSS -> sIV Client migth not send FIN in this state: 182 * sSS -> sIV Client migth not send FIN in this state:
179 * we enforce waiting for a SYN/ACK reply first. 183 * we enforce waiting for a SYN/ACK reply first.
184 * sS2 -> sIV
180 * sSR -> sFW Close started. 185 * sSR -> sFW Close started.
181 * sES -> sFW 186 * sES -> sFW
182 * sFW -> sLA FIN seen in both directions, waiting for 187 * sFW -> sLA FIN seen in both directions, waiting for
@@ -187,11 +192,12 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
187 * sTW -> sTW 192 * sTW -> sTW
188 * sCL -> sCL 193 * sCL -> sCL
189 */ 194 */
190/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 195/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
191/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, 196/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
192/* 197/*
193 * sNO -> sES Assumed. 198 * sNO -> sES Assumed.
194 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. 199 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
200 * sS2 -> sIV
195 * sSR -> sES Established state is reached. 201 * sSR -> sES Established state is reached.
196 * sES -> sES :-) 202 * sES -> sES :-)
197 * sFW -> sCW Normal close request answered by ACK. 203 * sFW -> sCW Normal close request answered by ACK.
@@ -200,29 +206,31 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
200 * sTW -> sTW Retransmitted last ACK. Remain in the same state. 206 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
201 * sCL -> sCL 207 * sCL -> sCL
202 */ 208 */
203/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 209/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
204/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, 210/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
205/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } 211/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
206 }, 212 },
207 { 213 {
208/* REPLY */ 214/* REPLY */
209/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 215/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
210/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, 216/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
211/* 217/*
212 * sNO -> sIV Never reached. 218 * sNO -> sIV Never reached.
213 * sSS -> sIV Simultaneous open, not supported 219 * sSS -> sS2 Simultaneous open
214 * sSR -> sIV Simultaneous open, not supported. 220 * sS2 -> sS2 Retransmitted simultaneous SYN
215 * sES -> sIV Server may not initiate a connection. 221 * sSR -> sIV Invalid SYN packets sent by the server
222 * sES -> sIV
216 * sFW -> sIV 223 * sFW -> sIV
217 * sCW -> sIV 224 * sCW -> sIV
218 * sLA -> sIV 225 * sLA -> sIV
219 * sTW -> sIV Reopened connection, but server may not do it. 226 * sTW -> sIV Reopened connection, but server may not do it.
220 * sCL -> sIV 227 * sCL -> sIV
221 */ 228 */
222/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 229/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
223/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, 230/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
224/* 231/*
225 * sSS -> sSR Standard open. 232 * sSS -> sSR Standard open.
233 * sS2 -> sSR Simultaneous open
226 * sSR -> sSR Retransmitted SYN/ACK. 234 * sSR -> sSR Retransmitted SYN/ACK.
227 * sES -> sIG Late retransmitted SYN/ACK? 235 * sES -> sIG Late retransmitted SYN/ACK?
228 * sFW -> sIG Might be SYN/ACK answering ignored SYN 236 * sFW -> sIG Might be SYN/ACK answering ignored SYN
@@ -231,10 +239,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
231 * sTW -> sIG 239 * sTW -> sIG
232 * sCL -> sIG 240 * sCL -> sIG
233 */ 241 */
234/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 242/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
235/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, 243/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
236/* 244/*
237 * sSS -> sIV Server might not send FIN in this state. 245 * sSS -> sIV Server might not send FIN in this state.
246 * sS2 -> sIV
238 * sSR -> sFW Close started. 247 * sSR -> sFW Close started.
239 * sES -> sFW 248 * sES -> sFW
240 * sFW -> sLA FIN seen in both directions. 249 * sFW -> sLA FIN seen in both directions.
@@ -243,10 +252,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
243 * sTW -> sTW 252 * sTW -> sTW
244 * sCL -> sCL 253 * sCL -> sCL
245 */ 254 */
246/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 255/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
247/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, 256/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
248/* 257/*
249 * sSS -> sIG Might be a half-open connection. 258 * sSS -> sIG Might be a half-open connection.
259 * sS2 -> sIG
250 * sSR -> sSR Might answer late resent SYN. 260 * sSR -> sSR Might answer late resent SYN.
251 * sES -> sES :-) 261 * sES -> sES :-)
252 * sFW -> sCW Normal close request answered by ACK. 262 * sFW -> sCW Normal close request answered by ACK.
@@ -255,8 +265,8 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
255 * sTW -> sTW Retransmitted last ACK. 265 * sTW -> sTW Retransmitted last ACK.
256 * sCL -> sCL 266 * sCL -> sCL
257 */ 267 */
258/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ 268/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
259/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, 269/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
260/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } 270/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
261 } 271 }
262}; 272};
@@ -296,13 +306,13 @@ static int tcp_print_tuple(struct seq_file *s,
296} 306}
297 307
298/* Print out the private part of the conntrack. */ 308/* Print out the private part of the conntrack. */
299static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) 309static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
300{ 310{
301 enum tcp_conntrack state; 311 enum tcp_conntrack state;
302 312
303 read_lock_bh(&tcp_lock); 313 spin_lock_bh(&ct->lock);
304 state = ct->proto.tcp.state; 314 state = ct->proto.tcp.state;
305 read_unlock_bh(&tcp_lock); 315 spin_unlock_bh(&ct->lock);
306 316
307 return seq_printf(s, "%s ", tcp_conntrack_names[state]); 317 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
308} 318}
@@ -521,13 +531,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
521 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 531 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
522 receiver->td_scale); 532 receiver->td_scale);
523 533
524 if (sender->td_end == 0) { 534 if (sender->td_maxwin == 0) {
525 /* 535 /*
526 * Initialize sender data. 536 * Initialize sender data.
527 */ 537 */
528 if (tcph->syn && tcph->ack) { 538 if (tcph->syn) {
529 /* 539 /*
530 * Outgoing SYN-ACK in reply to a SYN. 540 * SYN-ACK in reply to a SYN
541 * or SYN from reply direction in simultaneous open.
531 */ 542 */
532 sender->td_end = 543 sender->td_end =
533 sender->td_maxend = end; 544 sender->td_maxend = end;
@@ -543,6 +554,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
543 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) 554 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
544 sender->td_scale = 555 sender->td_scale =
545 receiver->td_scale = 0; 556 receiver->td_scale = 0;
557 if (!tcph->ack)
558 /* Simultaneous open */
559 return true;
546 } else { 560 } else {
547 /* 561 /*
548 * We are in the middle of a connection, 562 * We are in the middle of a connection,
@@ -716,14 +730,14 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb,
716 730
717 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); 731 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
718 732
719 write_lock_bh(&tcp_lock); 733 spin_lock_bh(&ct->lock);
720 /* 734 /*
721 * We have to worry for the ack in the reply packet only... 735 * We have to worry for the ack in the reply packet only...
722 */ 736 */
723 if (after(end, ct->proto.tcp.seen[dir].td_end)) 737 if (after(end, ct->proto.tcp.seen[dir].td_end))
724 ct->proto.tcp.seen[dir].td_end = end; 738 ct->proto.tcp.seen[dir].td_end = end;
725 ct->proto.tcp.last_end = end; 739 ct->proto.tcp.last_end = end;
726 write_unlock_bh(&tcp_lock); 740 spin_unlock_bh(&ct->lock);
727 pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " 741 pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
728 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 742 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
729 sender->td_end, sender->td_maxend, sender->td_maxwin, 743 sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -832,7 +846,7 @@ static int tcp_packet(struct nf_conn *ct,
832 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 846 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
833 BUG_ON(th == NULL); 847 BUG_ON(th == NULL);
834 848
835 write_lock_bh(&tcp_lock); 849 spin_lock_bh(&ct->lock);
836 old_state = ct->proto.tcp.state; 850 old_state = ct->proto.tcp.state;
837 dir = CTINFO2DIR(ctinfo); 851 dir = CTINFO2DIR(ctinfo);
838 index = get_conntrack_index(th); 852 index = get_conntrack_index(th);
@@ -862,7 +876,7 @@ static int tcp_packet(struct nf_conn *ct,
862 && ct->proto.tcp.last_index == TCP_RST_SET)) { 876 && ct->proto.tcp.last_index == TCP_RST_SET)) {
863 /* Attempt to reopen a closed/aborted connection. 877 /* Attempt to reopen a closed/aborted connection.
864 * Delete this connection and look up again. */ 878 * Delete this connection and look up again. */
865 write_unlock_bh(&tcp_lock); 879 spin_unlock_bh(&ct->lock);
866 880
867 /* Only repeat if we can actually remove the timer. 881 /* Only repeat if we can actually remove the timer.
868 * Destruction may already be in progress in process 882 * Destruction may already be in progress in process
@@ -898,7 +912,7 @@ static int tcp_packet(struct nf_conn *ct,
898 * that the client cannot but retransmit its SYN and 912 * that the client cannot but retransmit its SYN and
899 * thus initiate a clean new session. 913 * thus initiate a clean new session.
900 */ 914 */
901 write_unlock_bh(&tcp_lock); 915 spin_unlock_bh(&ct->lock);
902 if (LOG_INVALID(net, IPPROTO_TCP)) 916 if (LOG_INVALID(net, IPPROTO_TCP))
903 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 917 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
904 "nf_ct_tcp: killing out of sync session "); 918 "nf_ct_tcp: killing out of sync session ");
@@ -911,7 +925,7 @@ static int tcp_packet(struct nf_conn *ct,
911 ct->proto.tcp.last_end = 925 ct->proto.tcp.last_end =
912 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); 926 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
913 927
914 write_unlock_bh(&tcp_lock); 928 spin_unlock_bh(&ct->lock);
915 if (LOG_INVALID(net, IPPROTO_TCP)) 929 if (LOG_INVALID(net, IPPROTO_TCP))
916 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 930 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
917 "nf_ct_tcp: invalid packet ignored "); 931 "nf_ct_tcp: invalid packet ignored ");
@@ -920,7 +934,7 @@ static int tcp_packet(struct nf_conn *ct,
920 /* Invalid packet */ 934 /* Invalid packet */
921 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", 935 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
922 dir, get_conntrack_index(th), old_state); 936 dir, get_conntrack_index(th), old_state);
923 write_unlock_bh(&tcp_lock); 937 spin_unlock_bh(&ct->lock);
924 if (LOG_INVALID(net, IPPROTO_TCP)) 938 if (LOG_INVALID(net, IPPROTO_TCP))
925 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 939 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
926 "nf_ct_tcp: invalid state "); 940 "nf_ct_tcp: invalid state ");
@@ -930,7 +944,7 @@ static int tcp_packet(struct nf_conn *ct,
930 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) 944 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
931 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { 945 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
932 /* Invalid RST */ 946 /* Invalid RST */
933 write_unlock_bh(&tcp_lock); 947 spin_unlock_bh(&ct->lock);
934 if (LOG_INVALID(net, IPPROTO_TCP)) 948 if (LOG_INVALID(net, IPPROTO_TCP))
935 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 949 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
936 "nf_ct_tcp: invalid RST "); 950 "nf_ct_tcp: invalid RST ");
@@ -961,7 +975,7 @@ static int tcp_packet(struct nf_conn *ct,
961 975
962 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, 976 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
963 skb, dataoff, th, pf)) { 977 skb, dataoff, th, pf)) {
964 write_unlock_bh(&tcp_lock); 978 spin_unlock_bh(&ct->lock);
965 return -NF_ACCEPT; 979 return -NF_ACCEPT;
966 } 980 }
967 in_window: 981 in_window:
@@ -990,9 +1004,8 @@ static int tcp_packet(struct nf_conn *ct,
990 timeout = nf_ct_tcp_timeout_unacknowledged; 1004 timeout = nf_ct_tcp_timeout_unacknowledged;
991 else 1005 else
992 timeout = tcp_timeouts[new_state]; 1006 timeout = tcp_timeouts[new_state];
993 write_unlock_bh(&tcp_lock); 1007 spin_unlock_bh(&ct->lock);
994 1008
995 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
996 if (new_state != old_state) 1009 if (new_state != old_state)
997 nf_conntrack_event_cache(IPCT_PROTOINFO, ct); 1010 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
998 1011
@@ -1086,7 +1099,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1086 1099
1087 ct->proto.tcp.seen[1].td_end = 0; 1100 ct->proto.tcp.seen[1].td_end = 0;
1088 ct->proto.tcp.seen[1].td_maxend = 0; 1101 ct->proto.tcp.seen[1].td_maxend = 0;
1089 ct->proto.tcp.seen[1].td_maxwin = 1; 1102 ct->proto.tcp.seen[1].td_maxwin = 0;
1090 ct->proto.tcp.seen[1].td_scale = 0; 1103 ct->proto.tcp.seen[1].td_scale = 0;
1091 1104
1092 /* tcp_packet will set them */ 1105 /* tcp_packet will set them */
@@ -1108,12 +1121,12 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1108#include <linux/netfilter/nfnetlink_conntrack.h> 1121#include <linux/netfilter/nfnetlink_conntrack.h>
1109 1122
1110static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, 1123static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1111 const struct nf_conn *ct) 1124 struct nf_conn *ct)
1112{ 1125{
1113 struct nlattr *nest_parms; 1126 struct nlattr *nest_parms;
1114 struct nf_ct_tcp_flags tmp = {}; 1127 struct nf_ct_tcp_flags tmp = {};
1115 1128
1116 read_lock_bh(&tcp_lock); 1129 spin_lock_bh(&ct->lock);
1117 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); 1130 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1118 if (!nest_parms) 1131 if (!nest_parms)
1119 goto nla_put_failure; 1132 goto nla_put_failure;
@@ -1133,14 +1146,14 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1133 tmp.flags = ct->proto.tcp.seen[1].flags; 1146 tmp.flags = ct->proto.tcp.seen[1].flags;
1134 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, 1147 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1135 sizeof(struct nf_ct_tcp_flags), &tmp); 1148 sizeof(struct nf_ct_tcp_flags), &tmp);
1136 read_unlock_bh(&tcp_lock); 1149 spin_unlock_bh(&ct->lock);
1137 1150
1138 nla_nest_end(skb, nest_parms); 1151 nla_nest_end(skb, nest_parms);
1139 1152
1140 return 0; 1153 return 0;
1141 1154
1142nla_put_failure: 1155nla_put_failure:
1143 read_unlock_bh(&tcp_lock); 1156 spin_unlock_bh(&ct->lock);
1144 return -1; 1157 return -1;
1145} 1158}
1146 1159
@@ -1171,7 +1184,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1171 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) 1184 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1172 return -EINVAL; 1185 return -EINVAL;
1173 1186
1174 write_lock_bh(&tcp_lock); 1187 spin_lock_bh(&ct->lock);
1175 if (tb[CTA_PROTOINFO_TCP_STATE]) 1188 if (tb[CTA_PROTOINFO_TCP_STATE])
1176 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); 1189 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1177 1190
@@ -1198,7 +1211,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1198 ct->proto.tcp.seen[1].td_scale = 1211 ct->proto.tcp.seen[1].td_scale =
1199 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); 1212 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1200 } 1213 }
1201 write_unlock_bh(&tcp_lock); 1214 spin_unlock_bh(&ct->lock);
1202 1215
1203 return 0; 1216 return 0;
1204} 1217}
@@ -1328,6 +1341,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
1328 .proc_handler = proc_dointvec_jiffies, 1341 .proc_handler = proc_dointvec_jiffies,
1329 }, 1342 },
1330 { 1343 {
1344 .procname = "ip_conntrack_tcp_timeout_syn_sent2",
1345 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2],
1346 .maxlen = sizeof(unsigned int),
1347 .mode = 0644,
1348 .proc_handler = proc_dointvec_jiffies,
1349 },
1350 {
1331 .procname = "ip_conntrack_tcp_timeout_syn_recv", 1351 .procname = "ip_conntrack_tcp_timeout_syn_recv",
1332 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], 1352 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1333 .maxlen = sizeof(unsigned int), 1353 .maxlen = sizeof(unsigned int),
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4f2310c93e01..3a6fd77f7761 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -204,10 +204,10 @@ int nf_queue(struct sk_buff *skb,
204 queuenum); 204 queuenum);
205 205
206 switch (pf) { 206 switch (pf) {
207 case AF_INET: 207 case NFPROTO_IPV4:
208 skb->protocol = htons(ETH_P_IP); 208 skb->protocol = htons(ETH_P_IP);
209 break; 209 break;
210 case AF_INET6: 210 case NFPROTO_IPV6:
211 skb->protocol = htons(ETH_P_IPV6); 211 skb->protocol = htons(ETH_P_IPV6);
212 break; 212 break;
213 } 213 }
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b8ab37ad7ed5..92761a988375 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group)
107} 107}
108EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); 108EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
109 109
110int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) 110int nfnetlink_send(struct sk_buff *skb, u32 pid,
111 unsigned group, int echo, gfp_t flags)
111{ 112{
112 return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any()); 113 return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
113} 114}
114EXPORT_SYMBOL_GPL(nfnetlink_send); 115EXPORT_SYMBOL_GPL(nfnetlink_send);
115 116
@@ -136,7 +137,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
136 return -EPERM; 137 return -EPERM;
137 138
138 /* All the messages must at least contain nfgenmsg */ 139 /* All the messages must at least contain nfgenmsg */
139 if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) 140 if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg)))
140 return 0; 141 return 0;
141 142
142 type = nlh->nlmsg_type; 143 type = nlh->nlmsg_type;
@@ -160,19 +161,14 @@ replay:
160 { 161 {
161 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 162 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
162 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); 163 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
163 u_int16_t attr_count = ss->cb[cb_id].attr_count; 164 struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
164 struct nlattr *cda[attr_count+1]; 165 struct nlattr *attr = (void *)nlh + min_len;
165 166 int attrlen = nlh->nlmsg_len - min_len;
166 if (likely(nlh->nlmsg_len >= min_len)) { 167
167 struct nlattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); 168 err = nla_parse(cda, ss->cb[cb_id].attr_count,
168 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); 169 attr, attrlen, ss->cb[cb_id].policy);
169 170 if (err < 0)
170 err = nla_parse(cda, attr_count, attr, attrlen, 171 return err;
171 ss->cb[cb_id].policy);
172 if (err < 0)
173 return err;
174 } else
175 return -EINVAL;
176 172
177 err = nc->call(nfnl, skb, nlh, cda); 173 err = nc->call(nfnl, skb, nlh, cda);
178 if (err == -EAGAIN) 174 if (err == -EAGAIN)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 150e5cf62f85..46dba5f043d5 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -329,6 +329,32 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
329} 329}
330EXPORT_SYMBOL_GPL(xt_find_revision); 330EXPORT_SYMBOL_GPL(xt_find_revision);
331 331
332static char *textify_hooks(char *buf, size_t size, unsigned int mask)
333{
334 static const char *const names[] = {
335 "PREROUTING", "INPUT", "FORWARD",
336 "OUTPUT", "POSTROUTING", "BROUTING",
337 };
338 unsigned int i;
339 char *p = buf;
340 bool np = false;
341 int res;
342
343 *p = '\0';
344 for (i = 0; i < ARRAY_SIZE(names); ++i) {
345 if (!(mask & (1 << i)))
346 continue;
347 res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
348 if (res > 0) {
349 size -= res;
350 p += res;
351 }
352 np = true;
353 }
354
355 return buf;
356}
357
332int xt_check_match(struct xt_mtchk_param *par, 358int xt_check_match(struct xt_mtchk_param *par,
333 unsigned int size, u_int8_t proto, bool inv_proto) 359 unsigned int size, u_int8_t proto, bool inv_proto)
334{ 360{
@@ -351,9 +377,13 @@ int xt_check_match(struct xt_mtchk_param *par,
351 return -EINVAL; 377 return -EINVAL;
352 } 378 }
353 if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { 379 if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
354 printk("%s_tables: %s match: bad hook_mask %#x/%#x\n", 380 char used[64], allow[64];
381
382 printk("%s_tables: %s match: used from hooks %s, but only "
383 "valid from %s\n",
355 xt_prefix[par->family], par->match->name, 384 xt_prefix[par->family], par->match->name,
356 par->hook_mask, par->match->hooks); 385 textify_hooks(used, sizeof(used), par->hook_mask),
386 textify_hooks(allow, sizeof(allow), par->match->hooks));
357 return -EINVAL; 387 return -EINVAL;
358 } 388 }
359 if (par->match->proto && (par->match->proto != proto || inv_proto)) { 389 if (par->match->proto && (par->match->proto != proto || inv_proto)) {
@@ -497,9 +527,13 @@ int xt_check_target(struct xt_tgchk_param *par,
497 return -EINVAL; 527 return -EINVAL;
498 } 528 }
499 if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { 529 if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
500 printk("%s_tables: %s target: bad hook_mask %#x/%#x\n", 530 char used[64], allow[64];
531
532 printk("%s_tables: %s target: used from hooks %s, but only "
533 "usable from %s\n",
501 xt_prefix[par->family], par->target->name, 534 xt_prefix[par->family], par->target->name,
502 par->hook_mask, par->target->hooks); 535 textify_hooks(used, sizeof(used), par->hook_mask),
536 textify_hooks(allow, sizeof(allow), par->target->hooks));
503 return -EINVAL; 537 return -EINVAL;
504 } 538 }
505 if (par->target->proto && (par->target->proto != proto || inv_proto)) { 539 if (par->target->proto && (par->target->proto != proto || inv_proto)) {
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index f9977b3311f7..498b45101df7 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -11,6 +11,10 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13 13
14#include <linux/ip.h>
15#include <linux/ipv6.h>
16#include <linux/jhash.h>
17
14#include <linux/netfilter.h> 18#include <linux/netfilter.h>
15#include <linux/netfilter_arp.h> 19#include <linux/netfilter_arp.h>
16#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
@@ -23,6 +27,8 @@ MODULE_ALIAS("ipt_NFQUEUE");
23MODULE_ALIAS("ip6t_NFQUEUE"); 27MODULE_ALIAS("ip6t_NFQUEUE");
24MODULE_ALIAS("arpt_NFQUEUE"); 28MODULE_ALIAS("arpt_NFQUEUE");
25 29
30static u32 jhash_initval __read_mostly;
31
26static unsigned int 32static unsigned int
27nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) 33nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
28{ 34{
@@ -31,32 +37,105 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
31 return NF_QUEUE_NR(tinfo->queuenum); 37 return NF_QUEUE_NR(tinfo->queuenum);
32} 38}
33 39
40static u32 hash_v4(const struct sk_buff *skb)
41{
42 const struct iphdr *iph = ip_hdr(skb);
43 u32 ipaddr;
44
45 /* packets in either direction go into same queue */
46 ipaddr = iph->saddr ^ iph->daddr;
47
48 return jhash_2words(ipaddr, iph->protocol, jhash_initval);
49}
50
51static unsigned int
52nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
53{
54 const struct xt_NFQ_info_v1 *info = par->targinfo;
55 u32 queue = info->queuenum;
56
57 if (info->queues_total > 1)
58 queue = hash_v4(skb) % info->queues_total + queue;
59 return NF_QUEUE_NR(queue);
60}
61
62#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
63static u32 hash_v6(const struct sk_buff *skb)
64{
65 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
66 u32 addr[4];
67
68 addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0];
69 addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1];
70 addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2];
71 addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3];
72
73 return jhash2(addr, ARRAY_SIZE(addr), jhash_initval);
74}
75
76static unsigned int
77nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par)
78{
79 const struct xt_NFQ_info_v1 *info = par->targinfo;
80 u32 queue = info->queuenum;
81
82 if (info->queues_total > 1)
83 queue = hash_v6(skb) % info->queues_total + queue;
84 return NF_QUEUE_NR(queue);
85}
86#endif
87
88static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
89{
90 const struct xt_NFQ_info_v1 *info = par->targinfo;
91 u32 maxid;
92
93 if (info->queues_total == 0) {
94 pr_err("NFQUEUE: number of total queues is 0\n");
95 return false;
96 }
97 maxid = info->queues_total - 1 + info->queuenum;
98 if (maxid > 0xffff) {
99 pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
100 info->queues_total, maxid);
101 return false;
102 }
103 return true;
104}
105
34static struct xt_target nfqueue_tg_reg[] __read_mostly = { 106static struct xt_target nfqueue_tg_reg[] __read_mostly = {
35 { 107 {
36 .name = "NFQUEUE", 108 .name = "NFQUEUE",
37 .family = NFPROTO_IPV4, 109 .family = NFPROTO_UNSPEC,
38 .target = nfqueue_tg, 110 .target = nfqueue_tg,
39 .targetsize = sizeof(struct xt_NFQ_info), 111 .targetsize = sizeof(struct xt_NFQ_info),
40 .me = THIS_MODULE, 112 .me = THIS_MODULE,
41 }, 113 },
42 { 114 {
43 .name = "NFQUEUE", 115 .name = "NFQUEUE",
44 .family = NFPROTO_IPV6, 116 .revision = 1,
45 .target = nfqueue_tg, 117 .family = NFPROTO_IPV4,
46 .targetsize = sizeof(struct xt_NFQ_info), 118 .checkentry = nfqueue_tg_v1_check,
119 .target = nfqueue_tg4_v1,
120 .targetsize = sizeof(struct xt_NFQ_info_v1),
47 .me = THIS_MODULE, 121 .me = THIS_MODULE,
48 }, 122 },
123#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
49 { 124 {
50 .name = "NFQUEUE", 125 .name = "NFQUEUE",
51 .family = NFPROTO_ARP, 126 .revision = 1,
52 .target = nfqueue_tg, 127 .family = NFPROTO_IPV6,
53 .targetsize = sizeof(struct xt_NFQ_info), 128 .checkentry = nfqueue_tg_v1_check,
129 .target = nfqueue_tg6_v1,
130 .targetsize = sizeof(struct xt_NFQ_info_v1),
54 .me = THIS_MODULE, 131 .me = THIS_MODULE,
55 }, 132 },
133#endif
56}; 134};
57 135
58static int __init nfqueue_tg_init(void) 136static int __init nfqueue_tg_init(void)
59{ 137{
138 get_random_bytes(&jhash_initval, sizeof(jhash_initval));
60 return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); 139 return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
61} 140}
62 141
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
new file mode 100644
index 000000000000..863e40977a4d
--- /dev/null
+++ b/net/netfilter/xt_osf.c
@@ -0,0 +1,428 @@
1/*
2 * Copyright (c) 2003+ Evgeniy Polyakov <zbr@ioremap.net>
3 *
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/kernel.h>
22
23#include <linux/if.h>
24#include <linux/inetdevice.h>
25#include <linux/ip.h>
26#include <linux/list.h>
27#include <linux/rculist.h>
28#include <linux/skbuff.h>
29#include <linux/slab.h>
30#include <linux/tcp.h>
31
32#include <net/ip.h>
33#include <net/tcp.h>
34
35#include <linux/netfilter/nfnetlink.h>
36#include <linux/netfilter/x_tables.h>
37#include <net/netfilter/nf_log.h>
38#include <linux/netfilter/xt_osf.h>
39
40struct xt_osf_finger {
41 struct rcu_head rcu_head;
42 struct list_head finger_entry;
43 struct xt_osf_user_finger finger;
44};
45
46enum osf_fmatch_states {
47 /* Packet does not match the fingerprint */
48 FMATCH_WRONG = 0,
49 /* Packet matches the fingerprint */
50 FMATCH_OK,
51 /* Options do not match the fingerprint, but header does */
52 FMATCH_OPT_WRONG,
53};
54
55/*
56 * Indexed by dont-fragment bit.
57 * It is the only constant value in the fingerprint.
58 */
59static struct list_head xt_osf_fingers[2];
60
61static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
62 [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) },
63};
64
65static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head)
66{
67 struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head);
68
69 kfree(f);
70}
71
72static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
73 struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
74{
75 struct xt_osf_user_finger *f;
76 struct xt_osf_finger *kf = NULL, *sf;
77 int err = 0;
78
79 if (!osf_attrs[OSF_ATTR_FINGER])
80 return -EINVAL;
81
82 if (!(nlh->nlmsg_flags & NLM_F_CREATE))
83 return -EINVAL;
84
85 f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
86
87 kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL);
88 if (!kf)
89 return -ENOMEM;
90
91 memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger));
92
93 list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
94 if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
95 continue;
96
97 kfree(kf);
98 kf = NULL;
99
100 if (nlh->nlmsg_flags & NLM_F_EXCL)
101 err = -EEXIST;
102 break;
103 }
104
105 /*
106 * We are protected by nfnl mutex.
107 */
108 if (kf)
109 list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]);
110
111 return err;
112}
113
114static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
115 struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
116{
117 struct xt_osf_user_finger *f;
118 struct xt_osf_finger *sf;
119 int err = ENOENT;
120
121 if (!osf_attrs[OSF_ATTR_FINGER])
122 return -EINVAL;
123
124 f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
125
126 list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
127 if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
128 continue;
129
130 /*
131 * We are protected by nfnl mutex.
132 */
133 list_del_rcu(&sf->finger_entry);
134 call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu);
135
136 err = 0;
137 break;
138 }
139
140 return err;
141}
142
143static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = {
144 [OSF_MSG_ADD] = {
145 .call = xt_osf_add_callback,
146 .attr_count = OSF_ATTR_MAX,
147 .policy = xt_osf_policy,
148 },
149 [OSF_MSG_REMOVE] = {
150 .call = xt_osf_remove_callback,
151 .attr_count = OSF_ATTR_MAX,
152 .policy = xt_osf_policy,
153 },
154};
155
156static const struct nfnetlink_subsystem xt_osf_nfnetlink = {
157 .name = "osf",
158 .subsys_id = NFNL_SUBSYS_OSF,
159 .cb_count = OSF_MSG_MAX,
160 .cb = xt_osf_nfnetlink_callbacks,
161};
162
163static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info,
164 unsigned char f_ttl)
165{
166 const struct iphdr *ip = ip_hdr(skb);
167
168 if (info->flags & XT_OSF_TTL) {
169 if (info->ttl == XT_OSF_TTL_TRUE)
170 return ip->ttl == f_ttl;
171 if (info->ttl == XT_OSF_TTL_NOCHECK)
172 return 1;
173 else if (ip->ttl <= f_ttl)
174 return 1;
175 else {
176 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
177 int ret = 0;
178
179 for_ifa(in_dev) {
180 if (inet_ifa_match(ip->saddr, ifa)) {
181 ret = (ip->ttl == f_ttl);
182 break;
183 }
184 }
185 endfor_ifa(in_dev);
186
187 return ret;
188 }
189 }
190
191 return ip->ttl == f_ttl;
192}
193
194static bool xt_osf_match_packet(const struct sk_buff *skb,
195 const struct xt_match_param *p)
196{
197 const struct xt_osf_info *info = p->matchinfo;
198 const struct iphdr *ip = ip_hdr(skb);
199 const struct tcphdr *tcp;
200 struct tcphdr _tcph;
201 int fmatch = FMATCH_WRONG, fcount = 0;
202 unsigned int optsize = 0, check_WSS = 0;
203 u16 window, totlen, mss = 0;
204 bool df;
205 const unsigned char *optp = NULL, *_optp = NULL;
206 unsigned char opts[MAX_IPOPTLEN];
207 const struct xt_osf_finger *kf;
208 const struct xt_osf_user_finger *f;
209
210 if (!info)
211 return false;
212
213 tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
214 if (!tcp)
215 return false;
216
217 if (!tcp->syn)
218 return false;
219
220 totlen = ntohs(ip->tot_len);
221 df = ntohs(ip->frag_off) & IP_DF;
222 window = ntohs(tcp->window);
223
224 if (tcp->doff * 4 > sizeof(struct tcphdr)) {
225 optsize = tcp->doff * 4 - sizeof(struct tcphdr);
226
227 _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
228 sizeof(struct tcphdr), optsize, opts);
229 }
230
231 rcu_read_lock();
232 list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
233 f = &kf->finger;
234
235 if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre))
236 continue;
237
238 optp = _optp;
239 fmatch = FMATCH_WRONG;
240
241 if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) {
242 int foptsize, optnum;
243
244 /*
245 * Should not happen if userspace parser was written correctly.
246 */
247 if (f->wss.wc >= OSF_WSS_MAX)
248 continue;
249
250 /* Check options */
251
252 foptsize = 0;
253 for (optnum = 0; optnum < f->opt_num; ++optnum)
254 foptsize += f->opt[optnum].length;
255
256 if (foptsize > MAX_IPOPTLEN ||
257 optsize > MAX_IPOPTLEN ||
258 optsize != foptsize)
259 continue;
260
261 check_WSS = f->wss.wc;
262
263 for (optnum = 0; optnum < f->opt_num; ++optnum) {
264 if (f->opt[optnum].kind == (*optp)) {
265 __u32 len = f->opt[optnum].length;
266 const __u8 *optend = optp + len;
267 int loop_cont = 0;
268
269 fmatch = FMATCH_OK;
270
271 switch (*optp) {
272 case OSFOPT_MSS:
273 mss = optp[3];
274 mss <<= 8;
275 mss |= optp[2];
276
277 mss = ntohs(mss);
278 break;
279 case OSFOPT_TS:
280 loop_cont = 1;
281 break;
282 }
283
284 optp = optend;
285 } else
286 fmatch = FMATCH_OPT_WRONG;
287
288 if (fmatch != FMATCH_OK)
289 break;
290 }
291
292 if (fmatch != FMATCH_OPT_WRONG) {
293 fmatch = FMATCH_WRONG;
294
295 switch (check_WSS) {
296 case OSF_WSS_PLAIN:
297 if (f->wss.val == 0 || window == f->wss.val)
298 fmatch = FMATCH_OK;
299 break;
300 case OSF_WSS_MSS:
301 /*
302 * Some smart modems decrease mangle MSS to
303 * SMART_MSS_2, so we check standard, decreased
304 * and the one provided in the fingerprint MSS
305 * values.
306 */
307#define SMART_MSS_1 1460
308#define SMART_MSS_2 1448
309 if (window == f->wss.val * mss ||
310 window == f->wss.val * SMART_MSS_1 ||
311 window == f->wss.val * SMART_MSS_2)
312 fmatch = FMATCH_OK;
313 break;
314 case OSF_WSS_MTU:
315 if (window == f->wss.val * (mss + 40) ||
316 window == f->wss.val * (SMART_MSS_1 + 40) ||
317 window == f->wss.val * (SMART_MSS_2 + 40))
318 fmatch = FMATCH_OK;
319 break;
320 case OSF_WSS_MODULO:
321 if ((window % f->wss.val) == 0)
322 fmatch = FMATCH_OK;
323 break;
324 }
325 }
326
327 if (fmatch != FMATCH_OK)
328 continue;
329
330 fcount++;
331
332 if (info->flags & XT_OSF_LOG)
333 nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
334 "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n",
335 f->genre, f->version, f->subtype,
336 &ip->saddr, ntohs(tcp->source),
337 &ip->daddr, ntohs(tcp->dest),
338 f->ttl - ip->ttl);
339
340 if ((info->flags & XT_OSF_LOG) &&
341 info->loglevel == XT_OSF_LOGLEVEL_FIRST)
342 break;
343 }
344 }
345 rcu_read_unlock();
346
347 if (!fcount && (info->flags & XT_OSF_LOG))
348 nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
349 "Remote OS is not known: %pi4:%u -> %pi4:%u\n",
350 &ip->saddr, ntohs(tcp->source),
351 &ip->daddr, ntohs(tcp->dest));
352
353 if (fcount)
354 fmatch = FMATCH_OK;
355
356 return fmatch == FMATCH_OK;
357}
358
359static struct xt_match xt_osf_match = {
360 .name = "osf",
361 .revision = 0,
362 .family = NFPROTO_IPV4,
363 .proto = IPPROTO_TCP,
364 .hooks = (1 << NF_INET_LOCAL_IN) |
365 (1 << NF_INET_PRE_ROUTING) |
366 (1 << NF_INET_FORWARD),
367 .match = xt_osf_match_packet,
368 .matchsize = sizeof(struct xt_osf_info),
369 .me = THIS_MODULE,
370};
371
372static int __init xt_osf_init(void)
373{
374 int err = -EINVAL;
375 int i;
376
377 for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i)
378 INIT_LIST_HEAD(&xt_osf_fingers[i]);
379
380 err = nfnetlink_subsys_register(&xt_osf_nfnetlink);
381 if (err < 0) {
382 printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err);
383 goto err_out_exit;
384 }
385
386 err = xt_register_match(&xt_osf_match);
387 if (err) {
388 printk(KERN_ERR "Failed (%d) to register OS fingerprint "
389 "matching module.\n", err);
390 goto err_out_remove;
391 }
392
393 return 0;
394
395err_out_remove:
396 nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
397err_out_exit:
398 return err;
399}
400
401static void __exit xt_osf_fini(void)
402{
403 struct xt_osf_finger *f;
404 int i;
405
406 nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
407 xt_unregister_match(&xt_osf_match);
408
409 rcu_read_lock();
410 for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) {
411
412 list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) {
413 list_del_rcu(&f->finger_entry);
414 call_rcu(&f->rcu_head, xt_osf_finger_free_rcu);
415 }
416 }
417 rcu_read_unlock();
418
419 rcu_barrier();
420}
421
422module_init(xt_osf_init);
423module_exit(xt_osf_fini);
424
425MODULE_LICENSE("GPL");
426MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
427MODULE_DESCRIPTION("Passive OS fingerprint matching.");
428MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 1acc089be7e9..ebf00ad5b194 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,6 +22,8 @@
22#include <net/netfilter/nf_tproxy_core.h> 22#include <net/netfilter/nf_tproxy_core.h>
23#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 23#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24 24
25#include <linux/netfilter/xt_socket.h>
26
25#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 27#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
26#define XT_SOCKET_HAVE_CONNTRACK 1 28#define XT_SOCKET_HAVE_CONNTRACK 1
27#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
@@ -86,7 +88,8 @@ extract_icmp_fields(const struct sk_buff *skb,
86 88
87 89
88static bool 90static bool
89socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) 91socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
92 const struct xt_socket_mtinfo1 *info)
90{ 93{
91 const struct iphdr *iph = ip_hdr(skb); 94 const struct iphdr *iph = ip_hdr(skb);
92 struct udphdr _hdr, *hp = NULL; 95 struct udphdr _hdr, *hp = NULL;
@@ -141,10 +144,24 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
141 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, 144 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
142 saddr, daddr, sport, dport, par->in, false); 145 saddr, daddr, sport, dport, par->in, false);
143 if (sk != NULL) { 146 if (sk != NULL) {
144 bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0); 147 bool wildcard;
148 bool transparent = true;
149
150 /* Ignore sockets listening on INADDR_ANY */
151 wildcard = (sk->sk_state != TCP_TIME_WAIT &&
152 inet_sk(sk)->rcv_saddr == 0);
153
154 /* Ignore non-transparent sockets,
155 if XT_SOCKET_TRANSPARENT is used */
156 if (info && info->flags & XT_SOCKET_TRANSPARENT)
157 transparent = ((sk->sk_state != TCP_TIME_WAIT &&
158 inet_sk(sk)->transparent) ||
159 (sk->sk_state == TCP_TIME_WAIT &&
160 inet_twsk(sk)->tw_transparent));
145 161
146 nf_tproxy_put_sock(sk); 162 nf_tproxy_put_sock(sk);
147 if (wildcard) 163
164 if (wildcard || !transparent)
148 sk = NULL; 165 sk = NULL;
149 } 166 }
150 167
@@ -157,23 +174,47 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
157 return (sk != NULL); 174 return (sk != NULL);
158} 175}
159 176
160static struct xt_match socket_mt_reg __read_mostly = { 177static bool
161 .name = "socket", 178socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
162 .family = AF_INET, 179{
163 .match = socket_mt, 180 return socket_match(skb, par, NULL);
164 .hooks = 1 << NF_INET_PRE_ROUTING, 181}
165 .me = THIS_MODULE, 182
183static bool
184socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
185{
186 return socket_match(skb, par, par->matchinfo);
187}
188
189static struct xt_match socket_mt_reg[] __read_mostly = {
190 {
191 .name = "socket",
192 .revision = 0,
193 .family = NFPROTO_IPV4,
194 .match = socket_mt_v0,
195 .hooks = 1 << NF_INET_PRE_ROUTING,
196 .me = THIS_MODULE,
197 },
198 {
199 .name = "socket",
200 .revision = 1,
201 .family = NFPROTO_IPV4,
202 .match = socket_mt_v1,
203 .matchsize = sizeof(struct xt_socket_mtinfo1),
204 .hooks = 1 << NF_INET_PRE_ROUTING,
205 .me = THIS_MODULE,
206 },
166}; 207};
167 208
168static int __init socket_mt_init(void) 209static int __init socket_mt_init(void)
169{ 210{
170 nf_defrag_ipv4_enable(); 211 nf_defrag_ipv4_enable();
171 return xt_register_match(&socket_mt_reg); 212 return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
172} 213}
173 214
174static void __exit socket_mt_exit(void) 215static void __exit socket_mt_exit(void)
175{ 216{
176 xt_unregister_match(&socket_mt_reg); 217 xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
177} 218}
178 219
179module_init(socket_mt_init); 220module_init(socket_mt_init);