aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-12-04 13:01:19 -0500
committerDavid S. Miller <davem@davemloft.net>2012-12-04 13:01:19 -0500
commite8ad1a8fab6f550aba1f1fe7ba26749ff5460751 (patch)
treedf5ed3d6660f929ecd3194f93cc1e48d69f872c9
parent099f7aa7400df0ffcc8d3c30dcde1ce0595d8a32 (diff)
parenta0ecb85a2c3af73c63b6d44ce82aea52347ccf55 (diff)
Merge branch 'master' of git://1984.lsi.us.es/nf-next
Pablo Neira Ayuso says: ==================== * Remove limitation in the maximum number of supported sets in ipset. Now ipset automagically increments the number of slots in the array of sets by 64 new spare slots, from Jozsef Kadlecsik. * Partially remove the generic queue infrastructure now that ip_queue is gone. Its only client is nfnetlink_queue now, from Florian Westphal. * Add missing attribute policy checkings in ctnetlink, from Florian Westphal. * Automagically kill conntrack entries that use the wrong output interface for the masquerading case in case of routing changes, from Jozsef Kadlecsik. * Two patches two improve ct object traceability. Now ct objects are always placed in any of the existing lists. This allows us to dump the content of unconfirmed and dying conntracks via ctnetlink as a way to provide more instrumentation in case you suspect leaks, from myself. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/net/netfilter/nf_nat.h15
-rw-r--r--include/net/netfilter/nf_queue.h8
-rw-r--r--include/uapi/linux/netfilter/nfnetlink_conntrack.h2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c4
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c4
-rw-r--r--net/netfilter/core.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c243
-rw-r--r--net/netfilter/nf_conntrack_core.c25
-rw-r--r--net/netfilter/nf_conntrack_netlink.c118
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c2
-rw-r--r--net/netfilter/nf_queue.c152
-rw-r--r--net/netfilter/nfnetlink_queue_core.c14
13 files changed, 332 insertions, 259 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f1494feba79f..caca0c4d6b4b 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -182,7 +182,7 @@ __nf_conntrack_find(struct net *net, u16 zone,
182 182
183extern int nf_conntrack_hash_check_insert(struct nf_conn *ct); 183extern int nf_conntrack_hash_check_insert(struct nf_conn *ct);
184extern void nf_ct_delete_from_lists(struct nf_conn *ct); 184extern void nf_ct_delete_from_lists(struct nf_conn *ct);
185extern void nf_ct_insert_dying_list(struct nf_conn *ct); 185extern void nf_ct_dying_timeout(struct nf_conn *ct);
186 186
187extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report); 187extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);
188 188
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index bd8eea720f2e..ad14a799fd2e 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -68,4 +68,19 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
68#endif 68#endif
69} 69}
70 70
71static inline bool nf_nat_oif_changed(unsigned int hooknum,
72 enum ip_conntrack_info ctinfo,
73 struct nf_conn_nat *nat,
74 const struct net_device *out)
75{
76#if IS_ENABLED(CONFIG_IP_NF_TARGET_MASQUERADE) || \
77 IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE)
78 return nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
79 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
80 nat->masq_index != out->ifindex;
81#else
82 return false;
83#endif
84}
85
71#endif 86#endif
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 252fd1010b77..fb1c0be38b6d 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -21,14 +21,10 @@ struct nf_queue_entry {
21struct nf_queue_handler { 21struct nf_queue_handler {
22 int (*outfn)(struct nf_queue_entry *entry, 22 int (*outfn)(struct nf_queue_entry *entry,
23 unsigned int queuenum); 23 unsigned int queuenum);
24 char *name;
25}; 24};
26 25
27extern int nf_register_queue_handler(u_int8_t pf, 26void nf_register_queue_handler(const struct nf_queue_handler *qh);
28 const struct nf_queue_handler *qh); 27void nf_unregister_queue_handler(void);
29extern int nf_unregister_queue_handler(u_int8_t pf,
30 const struct nf_queue_handler *qh);
31extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
32extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); 28extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
33 29
34#endif /* _NF_QUEUE_H */ 30#endif /* _NF_QUEUE_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 43bfe3e1685b..86e930cf3dfb 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -9,6 +9,8 @@ enum cntl_msg_types {
9 IPCTNL_MSG_CT_GET_CTRZERO, 9 IPCTNL_MSG_CT_GET_CTRZERO,
10 IPCTNL_MSG_CT_GET_STATS_CPU, 10 IPCTNL_MSG_CT_GET_STATS_CPU,
11 IPCTNL_MSG_CT_GET_STATS, 11 IPCTNL_MSG_CT_GET_STATS,
12 IPCTNL_MSG_CT_GET_DYING,
13 IPCTNL_MSG_CT_GET_UNCONFIRMED,
12 14
13 IPCTNL_MSG_MAX 15 IPCTNL_MSG_MAX
14}; 16};
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ac635a7b4416..da2c8a368f68 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -134,6 +134,10 @@ nf_nat_ipv4_fn(unsigned int hooknum,
134 /* ESTABLISHED */ 134 /* ESTABLISHED */
135 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 135 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
136 ctinfo == IP_CT_ESTABLISHED_REPLY); 136 ctinfo == IP_CT_ESTABLISHED_REPLY);
137 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
138 nf_ct_kill_acct(ct, ctinfo, skb);
139 return NF_DROP;
140 }
137 } 141 }
138 142
139 return nf_nat_packet(ct, ctinfo, hooknum, skb); 143 return nf_nat_packet(ct, ctinfo, hooknum, skb);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index fa84cf8ec6bc..6c8ae24b85eb 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -137,6 +137,10 @@ nf_nat_ipv6_fn(unsigned int hooknum,
137 /* ESTABLISHED */ 137 /* ESTABLISHED */
138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
139 ctinfo == IP_CT_ESTABLISHED_REPLY); 139 ctinfo == IP_CT_ESTABLISHED_REPLY);
140 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
141 nf_ct_kill_acct(ct, ctinfo, skb);
142 return NF_DROP;
143 }
140 } 144 }
141 145
142 return nf_nat_packet(ct, ctinfo, hooknum, skb); 146 return nf_nat_packet(ct, ctinfo, hooknum, skb);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 68912dadf13d..a9c488b6c50d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -295,8 +295,6 @@ void __init netfilter_init(void)
295 panic("cannot create netfilter proc entry"); 295 panic("cannot create netfilter proc entry");
296#endif 296#endif
297 297
298 if (netfilter_queue_init() < 0)
299 panic("cannot initialize nf_queue");
300 if (netfilter_log_init() < 0) 298 if (netfilter_log_init() < 0)
301 panic("cannot initialize nf_log"); 299 panic("cannot initialize nf_log");
302} 300}
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index fed899f600b2..6d6d8f2b033e 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -28,9 +28,10 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */
28static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ 28static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
29static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ 29static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
30 30
31static struct ip_set **ip_set_list; /* all individual sets */ 31static struct ip_set * __rcu *ip_set_list; /* all individual sets */
32static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ 32static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
33 33
34#define IP_SET_INC 64
34#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) 35#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
35 36
36static unsigned int max_sets; 37static unsigned int max_sets;
@@ -42,6 +43,12 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
42MODULE_DESCRIPTION("core IP set support"); 43MODULE_DESCRIPTION("core IP set support");
43MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); 44MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
44 45
46/* When the nfnl mutex is held: */
47#define nfnl_dereference(p) \
48 rcu_dereference_protected(p, 1)
49#define nfnl_set(id) \
50 nfnl_dereference(ip_set_list)[id]
51
45/* 52/*
46 * The set types are implemented in modules and registered set types 53 * The set types are implemented in modules and registered set types
47 * can be found in ip_set_type_list. Adding/deleting types is 54 * can be found in ip_set_type_list. Adding/deleting types is
@@ -321,19 +328,19 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
321 */ 328 */
322 329
323static inline void 330static inline void
324__ip_set_get(ip_set_id_t index) 331__ip_set_get(struct ip_set *set)
325{ 332{
326 write_lock_bh(&ip_set_ref_lock); 333 write_lock_bh(&ip_set_ref_lock);
327 ip_set_list[index]->ref++; 334 set->ref++;
328 write_unlock_bh(&ip_set_ref_lock); 335 write_unlock_bh(&ip_set_ref_lock);
329} 336}
330 337
331static inline void 338static inline void
332__ip_set_put(ip_set_id_t index) 339__ip_set_put(struct ip_set *set)
333{ 340{
334 write_lock_bh(&ip_set_ref_lock); 341 write_lock_bh(&ip_set_ref_lock);
335 BUG_ON(ip_set_list[index]->ref == 0); 342 BUG_ON(set->ref == 0);
336 ip_set_list[index]->ref--; 343 set->ref--;
337 write_unlock_bh(&ip_set_ref_lock); 344 write_unlock_bh(&ip_set_ref_lock);
338} 345}
339 346
@@ -344,12 +351,25 @@ __ip_set_put(ip_set_id_t index)
344 * so it can't be destroyed (or changed) under our foot. 351 * so it can't be destroyed (or changed) under our foot.
345 */ 352 */
346 353
354static inline struct ip_set *
355ip_set_rcu_get(ip_set_id_t index)
356{
357 struct ip_set *set;
358
359 rcu_read_lock();
360 /* ip_set_list itself needs to be protected */
361 set = rcu_dereference(ip_set_list)[index];
362 rcu_read_unlock();
363
364 return set;
365}
366
347int 367int
348ip_set_test(ip_set_id_t index, const struct sk_buff *skb, 368ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
349 const struct xt_action_param *par, 369 const struct xt_action_param *par,
350 const struct ip_set_adt_opt *opt) 370 const struct ip_set_adt_opt *opt)
351{ 371{
352 struct ip_set *set = ip_set_list[index]; 372 struct ip_set *set = ip_set_rcu_get(index);
353 int ret = 0; 373 int ret = 0;
354 374
355 BUG_ON(set == NULL); 375 BUG_ON(set == NULL);
@@ -388,7 +408,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
388 const struct xt_action_param *par, 408 const struct xt_action_param *par,
389 const struct ip_set_adt_opt *opt) 409 const struct ip_set_adt_opt *opt)
390{ 410{
391 struct ip_set *set = ip_set_list[index]; 411 struct ip_set *set = ip_set_rcu_get(index);
392 int ret; 412 int ret;
393 413
394 BUG_ON(set == NULL); 414 BUG_ON(set == NULL);
@@ -411,7 +431,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
411 const struct xt_action_param *par, 431 const struct xt_action_param *par,
412 const struct ip_set_adt_opt *opt) 432 const struct ip_set_adt_opt *opt)
413{ 433{
414 struct ip_set *set = ip_set_list[index]; 434 struct ip_set *set = ip_set_rcu_get(index);
415 int ret = 0; 435 int ret = 0;
416 436
417 BUG_ON(set == NULL); 437 BUG_ON(set == NULL);
@@ -440,14 +460,17 @@ ip_set_get_byname(const char *name, struct ip_set **set)
440 ip_set_id_t i, index = IPSET_INVALID_ID; 460 ip_set_id_t i, index = IPSET_INVALID_ID;
441 struct ip_set *s; 461 struct ip_set *s;
442 462
463 rcu_read_lock();
443 for (i = 0; i < ip_set_max; i++) { 464 for (i = 0; i < ip_set_max; i++) {
444 s = ip_set_list[i]; 465 s = rcu_dereference(ip_set_list)[i];
445 if (s != NULL && STREQ(s->name, name)) { 466 if (s != NULL && STREQ(s->name, name)) {
446 __ip_set_get(i); 467 __ip_set_get(s);
447 index = i; 468 index = i;
448 *set = s; 469 *set = s;
470 break;
449 } 471 }
450 } 472 }
473 rcu_read_unlock();
451 474
452 return index; 475 return index;
453} 476}
@@ -462,8 +485,13 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
462void 485void
463ip_set_put_byindex(ip_set_id_t index) 486ip_set_put_byindex(ip_set_id_t index)
464{ 487{
465 if (ip_set_list[index] != NULL) 488 struct ip_set *set;
466 __ip_set_put(index); 489
490 rcu_read_lock();
491 set = rcu_dereference(ip_set_list)[index];
492 if (set != NULL)
493 __ip_set_put(set);
494 rcu_read_unlock();
467} 495}
468EXPORT_SYMBOL_GPL(ip_set_put_byindex); 496EXPORT_SYMBOL_GPL(ip_set_put_byindex);
469 497
@@ -477,7 +505,7 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex);
477const char * 505const char *
478ip_set_name_byindex(ip_set_id_t index) 506ip_set_name_byindex(ip_set_id_t index)
479{ 507{
480 const struct ip_set *set = ip_set_list[index]; 508 const struct ip_set *set = ip_set_rcu_get(index);
481 509
482 BUG_ON(set == NULL); 510 BUG_ON(set == NULL);
483 BUG_ON(set->ref == 0); 511 BUG_ON(set->ref == 0);
@@ -501,11 +529,18 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex);
501ip_set_id_t 529ip_set_id_t
502ip_set_nfnl_get(const char *name) 530ip_set_nfnl_get(const char *name)
503{ 531{
532 ip_set_id_t i, index = IPSET_INVALID_ID;
504 struct ip_set *s; 533 struct ip_set *s;
505 ip_set_id_t index;
506 534
507 nfnl_lock(); 535 nfnl_lock();
508 index = ip_set_get_byname(name, &s); 536 for (i = 0; i < ip_set_max; i++) {
537 s = nfnl_set(i);
538 if (s != NULL && STREQ(s->name, name)) {
539 __ip_set_get(s);
540 index = i;
541 break;
542 }
543 }
509 nfnl_unlock(); 544 nfnl_unlock();
510 545
511 return index; 546 return index;
@@ -521,12 +556,15 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
521ip_set_id_t 556ip_set_id_t
522ip_set_nfnl_get_byindex(ip_set_id_t index) 557ip_set_nfnl_get_byindex(ip_set_id_t index)
523{ 558{
559 struct ip_set *set;
560
524 if (index > ip_set_max) 561 if (index > ip_set_max)
525 return IPSET_INVALID_ID; 562 return IPSET_INVALID_ID;
526 563
527 nfnl_lock(); 564 nfnl_lock();
528 if (ip_set_list[index]) 565 set = nfnl_set(index);
529 __ip_set_get(index); 566 if (set)
567 __ip_set_get(set);
530 else 568 else
531 index = IPSET_INVALID_ID; 569 index = IPSET_INVALID_ID;
532 nfnl_unlock(); 570 nfnl_unlock();
@@ -545,8 +583,11 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
545void 583void
546ip_set_nfnl_put(ip_set_id_t index) 584ip_set_nfnl_put(ip_set_id_t index)
547{ 585{
586 struct ip_set *set;
548 nfnl_lock(); 587 nfnl_lock();
549 ip_set_put_byindex(index); 588 set = nfnl_set(index);
589 if (set != NULL)
590 __ip_set_put(set);
550 nfnl_unlock(); 591 nfnl_unlock();
551} 592}
552EXPORT_SYMBOL_GPL(ip_set_nfnl_put); 593EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@@ -603,41 +644,46 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
603 [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, 644 [IPSET_ATTR_DATA] = { .type = NLA_NESTED },
604}; 645};
605 646
606static ip_set_id_t 647static struct ip_set *
607find_set_id(const char *name) 648find_set_and_id(const char *name, ip_set_id_t *id)
608{ 649{
609 ip_set_id_t i, index = IPSET_INVALID_ID; 650 struct ip_set *set = NULL;
610 const struct ip_set *set; 651 ip_set_id_t i;
611 652
612 for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { 653 *id = IPSET_INVALID_ID;
613 set = ip_set_list[i]; 654 for (i = 0; i < ip_set_max; i++) {
614 if (set != NULL && STREQ(set->name, name)) 655 set = nfnl_set(i);
615 index = i; 656 if (set != NULL && STREQ(set->name, name)) {
657 *id = i;
658 break;
659 }
616 } 660 }
617 return index; 661 return (*id == IPSET_INVALID_ID ? NULL : set);
618} 662}
619 663
620static inline struct ip_set * 664static inline struct ip_set *
621find_set(const char *name) 665find_set(const char *name)
622{ 666{
623 ip_set_id_t index = find_set_id(name); 667 ip_set_id_t id;
624 668
625 return index == IPSET_INVALID_ID ? NULL : ip_set_list[index]; 669 return find_set_and_id(name, &id);
626} 670}
627 671
628static int 672static int
629find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) 673find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
630{ 674{
675 struct ip_set *s;
631 ip_set_id_t i; 676 ip_set_id_t i;
632 677
633 *index = IPSET_INVALID_ID; 678 *index = IPSET_INVALID_ID;
634 for (i = 0; i < ip_set_max; i++) { 679 for (i = 0; i < ip_set_max; i++) {
635 if (ip_set_list[i] == NULL) { 680 s = nfnl_set(i);
681 if (s == NULL) {
636 if (*index == IPSET_INVALID_ID) 682 if (*index == IPSET_INVALID_ID)
637 *index = i; 683 *index = i;
638 } else if (STREQ(name, ip_set_list[i]->name)) { 684 } else if (STREQ(name, s->name)) {
639 /* Name clash */ 685 /* Name clash */
640 *set = ip_set_list[i]; 686 *set = s;
641 return -EEXIST; 687 return -EEXIST;
642 } 688 }
643 } 689 }
@@ -730,10 +776,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
730 * and check clashing. 776 * and check clashing.
731 */ 777 */
732 ret = find_free_id(set->name, &index, &clash); 778 ret = find_free_id(set->name, &index, &clash);
733 if (ret != 0) { 779 if (ret == -EEXIST) {
734 /* If this is the same set and requested, ignore error */ 780 /* If this is the same set and requested, ignore error */
735 if (ret == -EEXIST && 781 if ((flags & IPSET_FLAG_EXIST) &&
736 (flags & IPSET_FLAG_EXIST) &&
737 STREQ(set->type->name, clash->type->name) && 782 STREQ(set->type->name, clash->type->name) &&
738 set->type->family == clash->type->family && 783 set->type->family == clash->type->family &&
739 set->type->revision_min == clash->type->revision_min && 784 set->type->revision_min == clash->type->revision_min &&
@@ -741,13 +786,36 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
741 set->variant->same_set(set, clash)) 786 set->variant->same_set(set, clash))
742 ret = 0; 787 ret = 0;
743 goto cleanup; 788 goto cleanup;
744 } 789 } else if (ret == -IPSET_ERR_MAX_SETS) {
790 struct ip_set **list, **tmp;
791 ip_set_id_t i = ip_set_max + IP_SET_INC;
792
793 if (i < ip_set_max || i == IPSET_INVALID_ID)
794 /* Wraparound */
795 goto cleanup;
796
797 list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
798 if (!list)
799 goto cleanup;
800 /* nfnl mutex is held, both lists are valid */
801 tmp = nfnl_dereference(ip_set_list);
802 memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max);
803 rcu_assign_pointer(ip_set_list, list);
804 /* Make sure all current packets have passed through */
805 synchronize_net();
806 /* Use new list */
807 index = ip_set_max;
808 ip_set_max = i;
809 kfree(tmp);
810 ret = 0;
811 } else if (ret)
812 goto cleanup;
745 813
746 /* 814 /*
747 * Finally! Add our shiny new set to the list, and be done. 815 * Finally! Add our shiny new set to the list, and be done.
748 */ 816 */
749 pr_debug("create: '%s' created with index %u!\n", set->name, index); 817 pr_debug("create: '%s' created with index %u!\n", set->name, index);
750 ip_set_list[index] = set; 818 nfnl_set(index) = set;
751 819
752 return ret; 820 return ret;
753 821
@@ -772,10 +840,10 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
772static void 840static void
773ip_set_destroy_set(ip_set_id_t index) 841ip_set_destroy_set(ip_set_id_t index)
774{ 842{
775 struct ip_set *set = ip_set_list[index]; 843 struct ip_set *set = nfnl_set(index);
776 844
777 pr_debug("set: %s\n", set->name); 845 pr_debug("set: %s\n", set->name);
778 ip_set_list[index] = NULL; 846 nfnl_set(index) = NULL;
779 847
780 /* Must call it without holding any lock */ 848 /* Must call it without holding any lock */
781 set->variant->destroy(set); 849 set->variant->destroy(set);
@@ -788,6 +856,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
788 const struct nlmsghdr *nlh, 856 const struct nlmsghdr *nlh,
789 const struct nlattr * const attr[]) 857 const struct nlattr * const attr[])
790{ 858{
859 struct ip_set *s;
791 ip_set_id_t i; 860 ip_set_id_t i;
792 int ret = 0; 861 int ret = 0;
793 862
@@ -807,22 +876,24 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
807 read_lock_bh(&ip_set_ref_lock); 876 read_lock_bh(&ip_set_ref_lock);
808 if (!attr[IPSET_ATTR_SETNAME]) { 877 if (!attr[IPSET_ATTR_SETNAME]) {
809 for (i = 0; i < ip_set_max; i++) { 878 for (i = 0; i < ip_set_max; i++) {
810 if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { 879 s = nfnl_set(i);
880 if (s != NULL && s->ref) {
811 ret = -IPSET_ERR_BUSY; 881 ret = -IPSET_ERR_BUSY;
812 goto out; 882 goto out;
813 } 883 }
814 } 884 }
815 read_unlock_bh(&ip_set_ref_lock); 885 read_unlock_bh(&ip_set_ref_lock);
816 for (i = 0; i < ip_set_max; i++) { 886 for (i = 0; i < ip_set_max; i++) {
817 if (ip_set_list[i] != NULL) 887 s = nfnl_set(i);
888 if (s != NULL)
818 ip_set_destroy_set(i); 889 ip_set_destroy_set(i);
819 } 890 }
820 } else { 891 } else {
821 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 892 s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i);
822 if (i == IPSET_INVALID_ID) { 893 if (s == NULL) {
823 ret = -ENOENT; 894 ret = -ENOENT;
824 goto out; 895 goto out;
825 } else if (ip_set_list[i]->ref) { 896 } else if (s->ref) {
826 ret = -IPSET_ERR_BUSY; 897 ret = -IPSET_ERR_BUSY;
827 goto out; 898 goto out;
828 } 899 }
@@ -853,21 +924,24 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
853 const struct nlmsghdr *nlh, 924 const struct nlmsghdr *nlh,
854 const struct nlattr * const attr[]) 925 const struct nlattr * const attr[])
855{ 926{
927 struct ip_set *s;
856 ip_set_id_t i; 928 ip_set_id_t i;
857 929
858 if (unlikely(protocol_failed(attr))) 930 if (unlikely(protocol_failed(attr)))
859 return -IPSET_ERR_PROTOCOL; 931 return -IPSET_ERR_PROTOCOL;
860 932
861 if (!attr[IPSET_ATTR_SETNAME]) { 933 if (!attr[IPSET_ATTR_SETNAME]) {
862 for (i = 0; i < ip_set_max; i++) 934 for (i = 0; i < ip_set_max; i++) {
863 if (ip_set_list[i] != NULL) 935 s = nfnl_set(i);
864 ip_set_flush_set(ip_set_list[i]); 936 if (s != NULL)
937 ip_set_flush_set(s);
938 }
865 } else { 939 } else {
866 i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 940 s = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
867 if (i == IPSET_INVALID_ID) 941 if (s == NULL)
868 return -ENOENT; 942 return -ENOENT;
869 943
870 ip_set_flush_set(ip_set_list[i]); 944 ip_set_flush_set(s);
871 } 945 }
872 946
873 return 0; 947 return 0;
@@ -889,7 +963,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
889 const struct nlmsghdr *nlh, 963 const struct nlmsghdr *nlh,
890 const struct nlattr * const attr[]) 964 const struct nlattr * const attr[])
891{ 965{
892 struct ip_set *set; 966 struct ip_set *set, *s;
893 const char *name2; 967 const char *name2;
894 ip_set_id_t i; 968 ip_set_id_t i;
895 int ret = 0; 969 int ret = 0;
@@ -911,8 +985,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
911 985
912 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); 986 name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
913 for (i = 0; i < ip_set_max; i++) { 987 for (i = 0; i < ip_set_max; i++) {
914 if (ip_set_list[i] != NULL && 988 s = nfnl_set(i);
915 STREQ(ip_set_list[i]->name, name2)) { 989 if (s != NULL && STREQ(s->name, name2)) {
916 ret = -IPSET_ERR_EXIST_SETNAME2; 990 ret = -IPSET_ERR_EXIST_SETNAME2;
917 goto out; 991 goto out;
918 } 992 }
@@ -947,17 +1021,14 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
947 attr[IPSET_ATTR_SETNAME2] == NULL)) 1021 attr[IPSET_ATTR_SETNAME2] == NULL))
948 return -IPSET_ERR_PROTOCOL; 1022 return -IPSET_ERR_PROTOCOL;
949 1023
950 from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 1024 from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id);
951 if (from_id == IPSET_INVALID_ID) 1025 if (from == NULL)
952 return -ENOENT; 1026 return -ENOENT;
953 1027
954 to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2])); 1028 to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id);
955 if (to_id == IPSET_INVALID_ID) 1029 if (to == NULL)
956 return -IPSET_ERR_EXIST_SETNAME2; 1030 return -IPSET_ERR_EXIST_SETNAME2;
957 1031
958 from = ip_set_list[from_id];
959 to = ip_set_list[to_id];
960
961 /* Features must not change. 1032 /* Features must not change.
962 * Not an artificial restriction anymore, as we must prevent 1033 * Not an artificial restriction anymore, as we must prevent
963 * possible loops created by swapping in setlist type of sets. */ 1034 * possible loops created by swapping in setlist type of sets. */
@@ -971,8 +1042,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
971 1042
972 write_lock_bh(&ip_set_ref_lock); 1043 write_lock_bh(&ip_set_ref_lock);
973 swap(from->ref, to->ref); 1044 swap(from->ref, to->ref);
974 ip_set_list[from_id] = to; 1045 nfnl_set(from_id) = to;
975 ip_set_list[to_id] = from; 1046 nfnl_set(to_id) = from;
976 write_unlock_bh(&ip_set_ref_lock); 1047 write_unlock_bh(&ip_set_ref_lock);
977 1048
978 return 0; 1049 return 0;
@@ -992,7 +1063,7 @@ static int
992ip_set_dump_done(struct netlink_callback *cb) 1063ip_set_dump_done(struct netlink_callback *cb)
993{ 1064{
994 if (cb->args[2]) { 1065 if (cb->args[2]) {
995 pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); 1066 pr_debug("release set %s\n", nfnl_set(cb->args[1])->name);
996 ip_set_put_byindex((ip_set_id_t) cb->args[1]); 1067 ip_set_put_byindex((ip_set_id_t) cb->args[1]);
997 } 1068 }
998 return 0; 1069 return 0;
@@ -1030,8 +1101,11 @@ dump_init(struct netlink_callback *cb)
1030 */ 1101 */
1031 1102
1032 if (cda[IPSET_ATTR_SETNAME]) { 1103 if (cda[IPSET_ATTR_SETNAME]) {
1033 index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); 1104 struct ip_set *set;
1034 if (index == IPSET_INVALID_ID) 1105
1106 set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]),
1107 &index);
1108 if (set == NULL)
1035 return -ENOENT; 1109 return -ENOENT;
1036 1110
1037 dump_type = DUMP_ONE; 1111 dump_type = DUMP_ONE;
@@ -1081,7 +1155,7 @@ dump_last:
1081 dump_type, dump_flags, cb->args[1]); 1155 dump_type, dump_flags, cb->args[1]);
1082 for (; cb->args[1] < max; cb->args[1]++) { 1156 for (; cb->args[1] < max; cb->args[1]++) {
1083 index = (ip_set_id_t) cb->args[1]; 1157 index = (ip_set_id_t) cb->args[1];
1084 set = ip_set_list[index]; 1158 set = nfnl_set(index);
1085 if (set == NULL) { 1159 if (set == NULL) {
1086 if (dump_type == DUMP_ONE) { 1160 if (dump_type == DUMP_ONE) {
1087 ret = -ENOENT; 1161 ret = -ENOENT;
@@ -1100,7 +1174,7 @@ dump_last:
1100 if (!cb->args[2]) { 1174 if (!cb->args[2]) {
1101 /* Start listing: make sure set won't be destroyed */ 1175 /* Start listing: make sure set won't be destroyed */
1102 pr_debug("reference set\n"); 1176 pr_debug("reference set\n");
1103 __ip_set_get(index); 1177 __ip_set_get(set);
1104 } 1178 }
1105 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, 1179 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
1106 cb->nlh->nlmsg_seq, flags, 1180 cb->nlh->nlmsg_seq, flags,
@@ -1159,7 +1233,7 @@ next_set:
1159release_refcount: 1233release_refcount:
1160 /* If there was an error or set is done, release set */ 1234 /* If there was an error or set is done, release set */
1161 if (ret || !cb->args[2]) { 1235 if (ret || !cb->args[2]) {
1162 pr_debug("release set %s\n", ip_set_list[index]->name); 1236 pr_debug("release set %s\n", nfnl_set(index)->name);
1163 ip_set_put_byindex(index); 1237 ip_set_put_byindex(index);
1164 cb->args[2] = 0; 1238 cb->args[2] = 0;
1165 } 1239 }
@@ -1409,17 +1483,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
1409 const struct ip_set *set; 1483 const struct ip_set *set;
1410 struct sk_buff *skb2; 1484 struct sk_buff *skb2;
1411 struct nlmsghdr *nlh2; 1485 struct nlmsghdr *nlh2;
1412 ip_set_id_t index;
1413 int ret = 0; 1486 int ret = 0;
1414 1487
1415 if (unlikely(protocol_failed(attr) || 1488 if (unlikely(protocol_failed(attr) ||
1416 attr[IPSET_ATTR_SETNAME] == NULL)) 1489 attr[IPSET_ATTR_SETNAME] == NULL))
1417 return -IPSET_ERR_PROTOCOL; 1490 return -IPSET_ERR_PROTOCOL;
1418 1491
1419 index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); 1492 set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
1420 if (index == IPSET_INVALID_ID) 1493 if (set == NULL)
1421 return -ENOENT; 1494 return -ENOENT;
1422 set = ip_set_list[index];
1423 1495
1424 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1496 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1425 if (skb2 == NULL) 1497 if (skb2 == NULL)
@@ -1684,6 +1756,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1684 } 1756 }
1685 case IP_SET_OP_GET_BYNAME: { 1757 case IP_SET_OP_GET_BYNAME: {
1686 struct ip_set_req_get_set *req_get = data; 1758 struct ip_set_req_get_set *req_get = data;
1759 ip_set_id_t id;
1687 1760
1688 if (*len != sizeof(struct ip_set_req_get_set)) { 1761 if (*len != sizeof(struct ip_set_req_get_set)) {
1689 ret = -EINVAL; 1762 ret = -EINVAL;
@@ -1691,12 +1764,14 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1691 } 1764 }
1692 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; 1765 req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
1693 nfnl_lock(); 1766 nfnl_lock();
1694 req_get->set.index = find_set_id(req_get->set.name); 1767 find_set_and_id(req_get->set.name, &id);
1768 req_get->set.index = id;
1695 nfnl_unlock(); 1769 nfnl_unlock();
1696 goto copy; 1770 goto copy;
1697 } 1771 }
1698 case IP_SET_OP_GET_BYINDEX: { 1772 case IP_SET_OP_GET_BYINDEX: {
1699 struct ip_set_req_get_set *req_get = data; 1773 struct ip_set_req_get_set *req_get = data;
1774 struct ip_set *set;
1700 1775
1701 if (*len != sizeof(struct ip_set_req_get_set) || 1776 if (*len != sizeof(struct ip_set_req_get_set) ||
1702 req_get->set.index >= ip_set_max) { 1777 req_get->set.index >= ip_set_max) {
@@ -1704,9 +1779,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
1704 goto done; 1779 goto done;
1705 } 1780 }
1706 nfnl_lock(); 1781 nfnl_lock();
1707 strncpy(req_get->set.name, 1782 set = nfnl_set(req_get->set.index);
1708 ip_set_list[req_get->set.index] 1783 strncpy(req_get->set.name, set ? set->name : "",
1709 ? ip_set_list[req_get->set.index]->name : "",
1710 IPSET_MAXNAMELEN); 1784 IPSET_MAXNAMELEN);
1711 nfnl_unlock(); 1785 nfnl_unlock();
1712 goto copy; 1786 goto copy;
@@ -1737,6 +1811,7 @@ static struct nf_sockopt_ops so_set __read_mostly = {
1737static int __init 1811static int __init
1738ip_set_init(void) 1812ip_set_init(void)
1739{ 1813{
1814 struct ip_set **list;
1740 int ret; 1815 int ret;
1741 1816
1742 if (max_sets) 1817 if (max_sets)
@@ -1744,22 +1819,22 @@ ip_set_init(void)
1744 if (ip_set_max >= IPSET_INVALID_ID) 1819 if (ip_set_max >= IPSET_INVALID_ID)
1745 ip_set_max = IPSET_INVALID_ID - 1; 1820 ip_set_max = IPSET_INVALID_ID - 1;
1746 1821
1747 ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, 1822 list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
1748 GFP_KERNEL); 1823 if (!list)
1749 if (!ip_set_list)
1750 return -ENOMEM; 1824 return -ENOMEM;
1751 1825
1826 rcu_assign_pointer(ip_set_list, list);
1752 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); 1827 ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
1753 if (ret != 0) { 1828 if (ret != 0) {
1754 pr_err("ip_set: cannot register with nfnetlink.\n"); 1829 pr_err("ip_set: cannot register with nfnetlink.\n");
1755 kfree(ip_set_list); 1830 kfree(list);
1756 return ret; 1831 return ret;
1757 } 1832 }
1758 ret = nf_register_sockopt(&so_set); 1833 ret = nf_register_sockopt(&so_set);
1759 if (ret != 0) { 1834 if (ret != 0) {
1760 pr_err("SO_SET registry failed: %d\n", ret); 1835 pr_err("SO_SET registry failed: %d\n", ret);
1761 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 1836 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1762 kfree(ip_set_list); 1837 kfree(list);
1763 return ret; 1838 return ret;
1764 } 1839 }
1765 1840
@@ -1770,10 +1845,12 @@ ip_set_init(void)
1770static void __exit 1845static void __exit
1771ip_set_fini(void) 1846ip_set_fini(void)
1772{ 1847{
1848 struct ip_set **list = rcu_dereference_protected(ip_set_list, 1);
1849
1773 /* There can't be any existing set */ 1850 /* There can't be any existing set */
1774 nf_unregister_sockopt(&so_set); 1851 nf_unregister_sockopt(&so_set);
1775 nfnetlink_subsys_unregister(&ip_set_netlink_subsys); 1852 nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
1776 kfree(ip_set_list); 1853 kfree(list);
1777 pr_debug("these are the famous last words\n"); 1854 pr_debug("these are the famous last words\n");
1778} 1855}
1779 1856
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0f241be28f9e..af175166fffa 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -221,11 +221,9 @@ destroy_conntrack(struct nf_conntrack *nfct)
221 * too. */ 221 * too. */
222 nf_ct_remove_expectations(ct); 222 nf_ct_remove_expectations(ct);
223 223
224 /* We overload first tuple to link into unconfirmed list. */ 224 /* We overload first tuple to link into unconfirmed or dying list.*/
225 if (!nf_ct_is_confirmed(ct)) { 225 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
226 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); 226 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
227 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
228 }
229 227
230 NF_CT_STAT_INC(net, delete); 228 NF_CT_STAT_INC(net, delete);
231 spin_unlock_bh(&nf_conntrack_lock); 229 spin_unlock_bh(&nf_conntrack_lock);
@@ -247,6 +245,9 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
247 * Otherwise we can get spurious warnings. */ 245 * Otherwise we can get spurious warnings. */
248 NF_CT_STAT_INC(net, delete_list); 246 NF_CT_STAT_INC(net, delete_list);
249 clean_from_lists(ct); 247 clean_from_lists(ct);
248 /* add this conntrack to the dying list */
249 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
250 &net->ct.dying);
250 spin_unlock_bh(&nf_conntrack_lock); 251 spin_unlock_bh(&nf_conntrack_lock);
251} 252}
252EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists); 253EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
@@ -268,31 +269,23 @@ static void death_by_event(unsigned long ul_conntrack)
268 } 269 }
269 /* we've got the event delivered, now it's dying */ 270 /* we've got the event delivered, now it's dying */
270 set_bit(IPS_DYING_BIT, &ct->status); 271 set_bit(IPS_DYING_BIT, &ct->status);
271 spin_lock(&nf_conntrack_lock);
272 hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
273 spin_unlock(&nf_conntrack_lock);
274 nf_ct_put(ct); 272 nf_ct_put(ct);
275} 273}
276 274
277void nf_ct_insert_dying_list(struct nf_conn *ct) 275void nf_ct_dying_timeout(struct nf_conn *ct)
278{ 276{
279 struct net *net = nf_ct_net(ct); 277 struct net *net = nf_ct_net(ct);
280 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); 278 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
281 279
282 BUG_ON(ecache == NULL); 280 BUG_ON(ecache == NULL);
283 281
284 /* add this conntrack to the dying list */
285 spin_lock_bh(&nf_conntrack_lock);
286 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
287 &net->ct.dying);
288 spin_unlock_bh(&nf_conntrack_lock);
289 /* set a new timer to retry event delivery */ 282 /* set a new timer to retry event delivery */
290 setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); 283 setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
291 ecache->timeout.expires = jiffies + 284 ecache->timeout.expires = jiffies +
292 (random32() % net->ct.sysctl_events_retry_timeout); 285 (random32() % net->ct.sysctl_events_retry_timeout);
293 add_timer(&ecache->timeout); 286 add_timer(&ecache->timeout);
294} 287}
295EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); 288EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
296 289
297static void death_by_timeout(unsigned long ul_conntrack) 290static void death_by_timeout(unsigned long ul_conntrack)
298{ 291{
@@ -307,7 +300,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
307 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { 300 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
308 /* destroy event was not delivered */ 301 /* destroy event was not delivered */
309 nf_ct_delete_from_lists(ct); 302 nf_ct_delete_from_lists(ct);
310 nf_ct_insert_dying_list(ct); 303 nf_ct_dying_timeout(ct);
311 return; 304 return;
312 } 305 }
313 set_bit(IPS_DYING_BIT, &ct->status); 306 set_bit(IPS_DYING_BIT, &ct->status);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7bbfb3deea30..4e078cd84d83 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -898,7 +898,8 @@ ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
898} 898}
899 899
900static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { 900static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
901 [CTA_HELP_NAME] = { .type = NLA_NUL_STRING }, 901 [CTA_HELP_NAME] = { .type = NLA_NUL_STRING,
902 .len = NF_CT_HELPER_NAME_LEN - 1 },
902}; 903};
903 904
904static inline int 905static inline int
@@ -932,6 +933,8 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
932 [CTA_ID] = { .type = NLA_U32 }, 933 [CTA_ID] = { .type = NLA_U32 },
933 [CTA_NAT_DST] = { .type = NLA_NESTED }, 934 [CTA_NAT_DST] = { .type = NLA_NESTED },
934 [CTA_TUPLE_MASTER] = { .type = NLA_NESTED }, 935 [CTA_TUPLE_MASTER] = { .type = NLA_NESTED },
936 [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NLA_NESTED },
937 [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED },
935 [CTA_ZONE] = { .type = NLA_U16 }, 938 [CTA_ZONE] = { .type = NLA_U16 },
936 [CTA_MARK_MASK] = { .type = NLA_U32 }, 939 [CTA_MARK_MASK] = { .type = NLA_U32 },
937}; 940};
@@ -989,7 +992,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
989 nlmsg_report(nlh)) < 0) { 992 nlmsg_report(nlh)) < 0) {
990 nf_ct_delete_from_lists(ct); 993 nf_ct_delete_from_lists(ct);
991 /* we failed to report the event, try later */ 994 /* we failed to report the event, try later */
992 nf_ct_insert_dying_list(ct); 995 nf_ct_dying_timeout(ct);
993 nf_ct_put(ct); 996 nf_ct_put(ct);
994 return 0; 997 return 0;
995 } 998 }
@@ -1089,6 +1092,112 @@ out:
1089 return err == -EAGAIN ? -ENOBUFS : err; 1092 return err == -EAGAIN ? -ENOBUFS : err;
1090} 1093}
1091 1094
1095static int ctnetlink_done_list(struct netlink_callback *cb)
1096{
1097 if (cb->args[1])
1098 nf_ct_put((struct nf_conn *)cb->args[1]);
1099 return 0;
1100}
1101
1102static int
1103ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb,
1104 struct hlist_nulls_head *list)
1105{
1106 struct nf_conn *ct, *last;
1107 struct nf_conntrack_tuple_hash *h;
1108 struct hlist_nulls_node *n;
1109 struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
1110 u_int8_t l3proto = nfmsg->nfgen_family;
1111 int res;
1112
1113 if (cb->args[2])
1114 return 0;
1115
1116 spin_lock_bh(&nf_conntrack_lock);
1117 last = (struct nf_conn *)cb->args[1];
1118restart:
1119 hlist_nulls_for_each_entry(h, n, list, hnnode) {
1120 ct = nf_ct_tuplehash_to_ctrack(h);
1121 if (l3proto && nf_ct_l3num(ct) != l3proto)
1122 continue;
1123 if (cb->args[1]) {
1124 if (ct != last)
1125 continue;
1126 cb->args[1] = 0;
1127 }
1128 rcu_read_lock();
1129 res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
1130 cb->nlh->nlmsg_seq,
1131 NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
1132 ct);
1133 rcu_read_unlock();
1134 if (res < 0) {
1135 nf_conntrack_get(&ct->ct_general);
1136 cb->args[1] = (unsigned long)ct;
1137 goto out;
1138 }
1139 }
1140 if (cb->args[1]) {
1141 cb->args[1] = 0;
1142 goto restart;
1143 } else
1144 cb->args[2] = 1;
1145out:
1146 spin_unlock_bh(&nf_conntrack_lock);
1147 if (last)
1148 nf_ct_put(last);
1149
1150 return skb->len;
1151}
1152
1153static int
1154ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
1155{
1156 struct net *net = sock_net(skb->sk);
1157
1158 return ctnetlink_dump_list(skb, cb, &net->ct.dying);
1159}
1160
1161static int
1162ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb,
1163 const struct nlmsghdr *nlh,
1164 const struct nlattr * const cda[])
1165{
1166 if (nlh->nlmsg_flags & NLM_F_DUMP) {
1167 struct netlink_dump_control c = {
1168 .dump = ctnetlink_dump_dying,
1169 .done = ctnetlink_done_list,
1170 };
1171 return netlink_dump_start(ctnl, skb, nlh, &c);
1172 }
1173
1174 return -EOPNOTSUPP;
1175}
1176
1177static int
1178ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
1179{
1180 struct net *net = sock_net(skb->sk);
1181
1182 return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed);
1183}
1184
1185static int
1186ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb,
1187 const struct nlmsghdr *nlh,
1188 const struct nlattr * const cda[])
1189{
1190 if (nlh->nlmsg_flags & NLM_F_DUMP) {
1191 struct netlink_dump_control c = {
1192 .dump = ctnetlink_dump_unconfirmed,
1193 .done = ctnetlink_done_list,
1194 };
1195 return netlink_dump_start(ctnl, skb, nlh, &c);
1196 }
1197
1198 return -EOPNOTSUPP;
1199}
1200
1092#ifdef CONFIG_NF_NAT_NEEDED 1201#ifdef CONFIG_NF_NAT_NEEDED
1093static int 1202static int
1094ctnetlink_parse_nat_setup(struct nf_conn *ct, 1203ctnetlink_parse_nat_setup(struct nf_conn *ct,
@@ -2216,7 +2325,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
2216 [CTA_EXPECT_MASK] = { .type = NLA_NESTED }, 2325 [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
2217 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, 2326 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
2218 [CTA_EXPECT_ID] = { .type = NLA_U32 }, 2327 [CTA_EXPECT_ID] = { .type = NLA_U32 },
2219 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING }, 2328 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
2329 .len = NF_CT_HELPER_NAME_LEN - 1 },
2220 [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, 2330 [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
2221 [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, 2331 [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
2222 [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, 2332 [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
@@ -2712,6 +2822,8 @@ static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
2712 .policy = ct_nla_policy }, 2822 .policy = ct_nla_policy },
2713 [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu }, 2823 [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu },
2714 [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct }, 2824 [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct },
2825 [IPCTNL_MSG_CT_GET_DYING] = { .call = ctnetlink_get_ct_dying },
2826 [IPCTNL_MSG_CT_GET_UNCONFIRMED] = { .call = ctnetlink_get_ct_unconfirmed },
2715}; 2827};
2716 2828
2717static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { 2829static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 61f9285111d1..83876e9877f1 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1353,6 +1353,8 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1353 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 }, 1353 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 },
1354 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 }, 1354 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 },
1355 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 }, 1355 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 },
1356 [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 },
1357 [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 },
1356}; 1358};
1357#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 1359#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1358 1360
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 8d2cf9ec37a8..d812c1235b30 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -14,84 +14,32 @@
14#include "nf_internals.h" 14#include "nf_internals.h"
15 15
16/* 16/*
17 * A queue handler may be registered for each protocol. Each is protected by 17 * Hook for nfnetlink_queue to register its queue handler.
18 * long term mutex. The handler must provide an an outfn() to accept packets 18 * We do this so that most of the NFQUEUE code can be modular.
19 * for queueing and must reinject all packets it receives, no matter what. 19 *
20 * Once the queue is registered it must reinject all packets it
21 * receives, no matter what.
20 */ 22 */
21static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; 23static const struct nf_queue_handler __rcu *queue_handler __read_mostly;
22
23static DEFINE_MUTEX(queue_handler_mutex);
24 24
25/* return EBUSY when somebody else is registered, return EEXIST if the 25/* return EBUSY when somebody else is registered, return EEXIST if the
26 * same handler is registered, return 0 in case of success. */ 26 * same handler is registered, return 0 in case of success. */
27int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 27void nf_register_queue_handler(const struct nf_queue_handler *qh)
28{ 28{
29 int ret; 29 /* should never happen, we only have one queueing backend in kernel */
30 const struct nf_queue_handler *old; 30 WARN_ON(rcu_access_pointer(queue_handler));
31 31 rcu_assign_pointer(queue_handler, qh);
32 if (pf >= ARRAY_SIZE(queue_handler))
33 return -EINVAL;
34
35 mutex_lock(&queue_handler_mutex);
36 old = rcu_dereference_protected(queue_handler[pf],
37 lockdep_is_held(&queue_handler_mutex));
38 if (old == qh)
39 ret = -EEXIST;
40 else if (old)
41 ret = -EBUSY;
42 else {
43 rcu_assign_pointer(queue_handler[pf], qh);
44 ret = 0;
45 }
46 mutex_unlock(&queue_handler_mutex);
47
48 return ret;
49} 32}
50EXPORT_SYMBOL(nf_register_queue_handler); 33EXPORT_SYMBOL(nf_register_queue_handler);
51 34
52/* The caller must flush their queue before this */ 35/* The caller must flush their queue before this */
53int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 36void nf_unregister_queue_handler(void)
54{ 37{
55 const struct nf_queue_handler *old; 38 RCU_INIT_POINTER(queue_handler, NULL);
56
57 if (pf >= ARRAY_SIZE(queue_handler))
58 return -EINVAL;
59
60 mutex_lock(&queue_handler_mutex);
61 old = rcu_dereference_protected(queue_handler[pf],
62 lockdep_is_held(&queue_handler_mutex));
63 if (old && old != qh) {
64 mutex_unlock(&queue_handler_mutex);
65 return -EINVAL;
66 }
67
68 RCU_INIT_POINTER(queue_handler[pf], NULL);
69 mutex_unlock(&queue_handler_mutex);
70
71 synchronize_rcu(); 39 synchronize_rcu();
72
73 return 0;
74} 40}
75EXPORT_SYMBOL(nf_unregister_queue_handler); 41EXPORT_SYMBOL(nf_unregister_queue_handler);
76 42
77void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
78{
79 u_int8_t pf;
80
81 mutex_lock(&queue_handler_mutex);
82 for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) {
83 if (rcu_dereference_protected(
84 queue_handler[pf],
85 lockdep_is_held(&queue_handler_mutex)
86 ) == qh)
87 RCU_INIT_POINTER(queue_handler[pf], NULL);
88 }
89 mutex_unlock(&queue_handler_mutex);
90
91 synchronize_rcu();
92}
93EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
94
95static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) 43static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
96{ 44{
97 /* Release those devices we held, or Alexey will kill me. */ 45 /* Release those devices we held, or Alexey will kill me. */
@@ -137,7 +85,7 @@ static int __nf_queue(struct sk_buff *skb,
137 /* QUEUE == DROP if no one is waiting, to be safe. */ 85 /* QUEUE == DROP if no one is waiting, to be safe. */
138 rcu_read_lock(); 86 rcu_read_lock();
139 87
140 qh = rcu_dereference(queue_handler[pf]); 88 qh = rcu_dereference(queue_handler);
141 if (!qh) { 89 if (!qh) {
142 status = -ESRCH; 90 status = -ESRCH;
143 goto err_unlock; 91 goto err_unlock;
@@ -344,77 +292,3 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
344 kfree(entry); 292 kfree(entry);
345} 293}
346EXPORT_SYMBOL(nf_reinject); 294EXPORT_SYMBOL(nf_reinject);
347
348#ifdef CONFIG_PROC_FS
349static void *seq_start(struct seq_file *seq, loff_t *pos)
350{
351 if (*pos >= ARRAY_SIZE(queue_handler))
352 return NULL;
353
354 return pos;
355}
356
357static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
358{
359 (*pos)++;
360
361 if (*pos >= ARRAY_SIZE(queue_handler))
362 return NULL;
363
364 return pos;
365}
366
367static void seq_stop(struct seq_file *s, void *v)
368{
369
370}
371
372static int seq_show(struct seq_file *s, void *v)
373{
374 int ret;
375 loff_t *pos = v;
376 const struct nf_queue_handler *qh;
377
378 rcu_read_lock();
379 qh = rcu_dereference(queue_handler[*pos]);
380 if (!qh)
381 ret = seq_printf(s, "%2lld NONE\n", *pos);
382 else
383 ret = seq_printf(s, "%2lld %s\n", *pos, qh->name);
384 rcu_read_unlock();
385
386 return ret;
387}
388
389static const struct seq_operations nfqueue_seq_ops = {
390 .start = seq_start,
391 .next = seq_next,
392 .stop = seq_stop,
393 .show = seq_show,
394};
395
396static int nfqueue_open(struct inode *inode, struct file *file)
397{
398 return seq_open(file, &nfqueue_seq_ops);
399}
400
401static const struct file_operations nfqueue_file_ops = {
402 .owner = THIS_MODULE,
403 .open = nfqueue_open,
404 .read = seq_read,
405 .llseek = seq_lseek,
406 .release = seq_release,
407};
408#endif /* PROC_FS */
409
410
411int __init netfilter_queue_init(void)
412{
413#ifdef CONFIG_PROC_FS
414 if (!proc_create("nf_queue", S_IRUGO,
415 proc_net_netfilter, &nfqueue_file_ops))
416 return -1;
417#endif
418 return 0;
419}
420
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index e12d44e75b21..3158d87b56a8 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -809,7 +809,6 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
809}; 809};
810 810
811static const struct nf_queue_handler nfqh = { 811static const struct nf_queue_handler nfqh = {
812 .name = "nf_queue",
813 .outfn = &nfqnl_enqueue_packet, 812 .outfn = &nfqnl_enqueue_packet,
814}; 813};
815 814
@@ -827,14 +826,10 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
827 if (nfqa[NFQA_CFG_CMD]) { 826 if (nfqa[NFQA_CFG_CMD]) {
828 cmd = nla_data(nfqa[NFQA_CFG_CMD]); 827 cmd = nla_data(nfqa[NFQA_CFG_CMD]);
829 828
830 /* Commands without queue context - might sleep */ 829 /* Obsolete commands without queue context */
831 switch (cmd->command) { 830 switch (cmd->command) {
832 case NFQNL_CFG_CMD_PF_BIND: 831 case NFQNL_CFG_CMD_PF_BIND: return 0;
833 return nf_register_queue_handler(ntohs(cmd->pf), 832 case NFQNL_CFG_CMD_PF_UNBIND: return 0;
834 &nfqh);
835 case NFQNL_CFG_CMD_PF_UNBIND:
836 return nf_unregister_queue_handler(ntohs(cmd->pf),
837 &nfqh);
838 } 833 }
839 } 834 }
840 835
@@ -1074,6 +1069,7 @@ static int __init nfnetlink_queue_init(void)
1074#endif 1069#endif
1075 1070
1076 register_netdevice_notifier(&nfqnl_dev_notifier); 1071 register_netdevice_notifier(&nfqnl_dev_notifier);
1072 nf_register_queue_handler(&nfqh);
1077 return status; 1073 return status;
1078 1074
1079#ifdef CONFIG_PROC_FS 1075#ifdef CONFIG_PROC_FS
@@ -1087,7 +1083,7 @@ cleanup_netlink_notifier:
1087 1083
1088static void __exit nfnetlink_queue_fini(void) 1084static void __exit nfnetlink_queue_fini(void)
1089{ 1085{
1090 nf_unregister_queue_handlers(&nfqh); 1086 nf_unregister_queue_handler();
1091 unregister_netdevice_notifier(&nfqnl_dev_notifier); 1087 unregister_netdevice_notifier(&nfqnl_dev_notifier);
1092#ifdef CONFIG_PROC_FS 1088#ifdef CONFIG_PROC_FS
1093 remove_proc_entry("nfnetlink_queue", proc_net_netfilter); 1089 remove_proc_entry("nfnetlink_queue", proc_net_netfilter);