aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJozsef Kadlecsik <kadlec@blackhole.kfki.hu>2012-02-24 05:45:49 -0500
committerPablo Neira Ayuso <pablo@netfilter.org>2012-02-24 06:24:15 -0500
commit7d367e06688dc7a2cc98c2ace04e1296e1d987e2 (patch)
treebf4a9a6497dec37fdd4a31999cb218277e2ba656
parent279072882dc0149e5740dace075e1a49f087046d (diff)
netfilter: ctnetlink: fix soft lockup when netlink adds new entries (v2)
Marcell Zambo and Janos Farago noticed and reported that when new conntrack entries are added via netlink and the conntrack table gets full, soft lockup happens. This is because the nf_conntrack_lock is held while nf_conntrack_alloc is called, which is in turn wants to lock nf_conntrack_lock while evicting entries from the full table. The patch fixes the soft lockup with limiting the holding of the nf_conntrack_lock to the minimum, where it's absolutely required. It required to extend (and thus change) nf_conntrack_hash_insert so that it makes sure conntrack and ctnetlink do not add the same entry twice to the conntrack table. Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--net/netfilter/nf_conntrack_core.c38
-rw-r--r--net/netfilter/nf_conntrack_netlink.c46
3 files changed, 51 insertions, 35 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 8a2b0ae7dbd2..ab86036bbf0c 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -209,7 +209,7 @@ extern struct nf_conntrack_tuple_hash *
209__nf_conntrack_find(struct net *net, u16 zone, 209__nf_conntrack_find(struct net *net, u16 zone,
210 const struct nf_conntrack_tuple *tuple); 210 const struct nf_conntrack_tuple *tuple);
211 211
212extern void nf_conntrack_hash_insert(struct nf_conn *ct); 212extern int nf_conntrack_hash_check_insert(struct nf_conn *ct);
213extern void nf_ct_delete_from_lists(struct nf_conn *ct); 213extern void nf_ct_delete_from_lists(struct nf_conn *ct);
214extern void nf_ct_insert_dying_list(struct nf_conn *ct); 214extern void nf_ct_insert_dying_list(struct nf_conn *ct);
215 215
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 76613f5a55c0..ed86a3be678e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -404,19 +404,49 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
404 &net->ct.hash[repl_hash]); 404 &net->ct.hash[repl_hash]);
405} 405}
406 406
407void nf_conntrack_hash_insert(struct nf_conn *ct) 407int
408nf_conntrack_hash_check_insert(struct nf_conn *ct)
408{ 409{
409 struct net *net = nf_ct_net(ct); 410 struct net *net = nf_ct_net(ct);
410 unsigned int hash, repl_hash; 411 unsigned int hash, repl_hash;
412 struct nf_conntrack_tuple_hash *h;
413 struct hlist_nulls_node *n;
411 u16 zone; 414 u16 zone;
412 415
413 zone = nf_ct_zone(ct); 416 zone = nf_ct_zone(ct);
414 hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 417 hash = hash_conntrack(net, zone,
415 repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 418 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
419 repl_hash = hash_conntrack(net, zone,
420 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
421
422 spin_lock_bh(&nf_conntrack_lock);
416 423
424 /* See if there's one in the list already, including reverse */
425 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
426 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
427 &h->tuple) &&
428 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
429 goto out;
430 hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
431 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
432 &h->tuple) &&
433 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
434 goto out;
435
436 add_timer(&ct->timeout);
437 nf_conntrack_get(&ct->ct_general);
417 __nf_conntrack_hash_insert(ct, hash, repl_hash); 438 __nf_conntrack_hash_insert(ct, hash, repl_hash);
439 NF_CT_STAT_INC(net, insert);
440 spin_unlock_bh(&nf_conntrack_lock);
441
442 return 0;
443
444out:
445 NF_CT_STAT_INC(net, insert_failed);
446 spin_unlock_bh(&nf_conntrack_lock);
447 return -EEXIST;
418} 448}
419EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); 449EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
420 450
421/* Confirm a connection given skb; places it in hash table */ 451/* Confirm a connection given skb; places it in hash table */
422int 452int
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9307b033c0c9..30c9d4ca0218 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1367,15 +1367,12 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1367 nf_ct_protonum(ct)); 1367 nf_ct_protonum(ct));
1368 if (helper == NULL) { 1368 if (helper == NULL) {
1369 rcu_read_unlock(); 1369 rcu_read_unlock();
1370 spin_unlock_bh(&nf_conntrack_lock);
1371#ifdef CONFIG_MODULES 1370#ifdef CONFIG_MODULES
1372 if (request_module("nfct-helper-%s", helpname) < 0) { 1371 if (request_module("nfct-helper-%s", helpname) < 0) {
1373 spin_lock_bh(&nf_conntrack_lock);
1374 err = -EOPNOTSUPP; 1372 err = -EOPNOTSUPP;
1375 goto err1; 1373 goto err1;
1376 } 1374 }
1377 1375
1378 spin_lock_bh(&nf_conntrack_lock);
1379 rcu_read_lock(); 1376 rcu_read_lock();
1380 helper = __nf_conntrack_helper_find(helpname, 1377 helper = __nf_conntrack_helper_find(helpname,
1381 nf_ct_l3num(ct), 1378 nf_ct_l3num(ct),
@@ -1468,8 +1465,10 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1468 if (tstamp) 1465 if (tstamp)
1469 tstamp->start = ktime_to_ns(ktime_get_real()); 1466 tstamp->start = ktime_to_ns(ktime_get_real());
1470 1467
1471 add_timer(&ct->timeout); 1468 err = nf_conntrack_hash_check_insert(ct);
1472 nf_conntrack_hash_insert(ct); 1469 if (err < 0)
1470 goto err2;
1471
1473 rcu_read_unlock(); 1472 rcu_read_unlock();
1474 1473
1475 return ct; 1474 return ct;
@@ -1490,6 +1489,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1490 struct nf_conntrack_tuple otuple, rtuple; 1489 struct nf_conntrack_tuple otuple, rtuple;
1491 struct nf_conntrack_tuple_hash *h = NULL; 1490 struct nf_conntrack_tuple_hash *h = NULL;
1492 struct nfgenmsg *nfmsg = nlmsg_data(nlh); 1491 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1492 struct nf_conn *ct;
1493 u_int8_t u3 = nfmsg->nfgen_family; 1493 u_int8_t u3 = nfmsg->nfgen_family;
1494 u16 zone; 1494 u16 zone;
1495 int err; 1495 int err;
@@ -1510,27 +1510,22 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1510 return err; 1510 return err;
1511 } 1511 }
1512 1512
1513 spin_lock_bh(&nf_conntrack_lock);
1514 if (cda[CTA_TUPLE_ORIG]) 1513 if (cda[CTA_TUPLE_ORIG])
1515 h = __nf_conntrack_find(net, zone, &otuple); 1514 h = nf_conntrack_find_get(net, zone, &otuple);
1516 else if (cda[CTA_TUPLE_REPLY]) 1515 else if (cda[CTA_TUPLE_REPLY])
1517 h = __nf_conntrack_find(net, zone, &rtuple); 1516 h = nf_conntrack_find_get(net, zone, &rtuple);
1518 1517
1519 if (h == NULL) { 1518 if (h == NULL) {
1520 err = -ENOENT; 1519 err = -ENOENT;
1521 if (nlh->nlmsg_flags & NLM_F_CREATE) { 1520 if (nlh->nlmsg_flags & NLM_F_CREATE) {
1522 struct nf_conn *ct;
1523 enum ip_conntrack_events events; 1521 enum ip_conntrack_events events;
1524 1522
1525 ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, 1523 ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
1526 &rtuple, u3); 1524 &rtuple, u3);
1527 if (IS_ERR(ct)) { 1525 if (IS_ERR(ct))
1528 err = PTR_ERR(ct); 1526 return PTR_ERR(ct);
1529 goto out_unlock; 1527
1530 }
1531 err = 0; 1528 err = 0;
1532 nf_conntrack_get(&ct->ct_general);
1533 spin_unlock_bh(&nf_conntrack_lock);
1534 if (test_bit(IPS_EXPECTED_BIT, &ct->status)) 1529 if (test_bit(IPS_EXPECTED_BIT, &ct->status))
1535 events = IPCT_RELATED; 1530 events = IPCT_RELATED;
1536 else 1531 else
@@ -1545,23 +1540,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1545 ct, NETLINK_CB(skb).pid, 1540 ct, NETLINK_CB(skb).pid,
1546 nlmsg_report(nlh)); 1541 nlmsg_report(nlh));
1547 nf_ct_put(ct); 1542 nf_ct_put(ct);
1548 } else 1543 }
1549 spin_unlock_bh(&nf_conntrack_lock);
1550 1544
1551 return err; 1545 return err;
1552 } 1546 }
1553 /* implicit 'else' */ 1547 /* implicit 'else' */
1554 1548
1555 /* We manipulate the conntrack inside the global conntrack table lock,
1556 * so there's no need to increase the refcount */
1557 err = -EEXIST; 1549 err = -EEXIST;
1550 ct = nf_ct_tuplehash_to_ctrack(h);
1558 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { 1551 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
1559 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 1552 spin_lock_bh(&nf_conntrack_lock);
1560
1561 err = ctnetlink_change_conntrack(ct, cda); 1553 err = ctnetlink_change_conntrack(ct, cda);
1554 spin_unlock_bh(&nf_conntrack_lock);
1562 if (err == 0) { 1555 if (err == 0) {
1563 nf_conntrack_get(&ct->ct_general);
1564 spin_unlock_bh(&nf_conntrack_lock);
1565 nf_conntrack_eventmask_report((1 << IPCT_REPLY) | 1556 nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
1566 (1 << IPCT_ASSURED) | 1557 (1 << IPCT_ASSURED) |
1567 (1 << IPCT_HELPER) | 1558 (1 << IPCT_HELPER) |
@@ -1570,15 +1561,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1570 (1 << IPCT_MARK), 1561 (1 << IPCT_MARK),
1571 ct, NETLINK_CB(skb).pid, 1562 ct, NETLINK_CB(skb).pid,
1572 nlmsg_report(nlh)); 1563 nlmsg_report(nlh));
1573 nf_ct_put(ct); 1564 }
1574 } else
1575 spin_unlock_bh(&nf_conntrack_lock);
1576
1577 return err;
1578 } 1565 }
1579 1566
1580out_unlock: 1567 nf_ct_put(ct);
1581 spin_unlock_bh(&nf_conntrack_lock);
1582 return err; 1568 return err;
1583} 1569}
1584 1570