aboutsummaryrefslogtreecommitdiffstats
path: root/net/xfrm
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-04-06 20:30:05 -0400
committerDavid S. Miller <davem@davemloft.net>2010-04-07 06:43:19 -0400
commit80c802f3073e84c956846e921e8a0b02dfa3755f (patch)
tree895dc92dcf6b658d78838e0a23db3dd29c8be695 /net/xfrm
parentfe1a5f031e76bd8761a7803d75b95ee96e84a574 (diff)
xfrm: cache bundles instead of policies for outgoing flows
__xfrm_lookup() is called for each packet transmitted out of system. The xfrm_find_bundle() does a linear search which can kill system performance depending on how many bundles are required per policy. This modifies __xfrm_lookup() to store bundles directly in the flow cache. If we did not get a hit, we just create a new bundle instead of doing slow search. This means that we can now get multiple xfrm_dst's for same flow (on per-cpu basis). Signed-off-by: Timo Teras <timo.teras@iki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/xfrm')
-rw-r--r--net/xfrm/xfrm_policy.c711
1 files changed, 376 insertions, 335 deletions
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7722baeb140d..06ccc71c871f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -37,6 +37,8 @@
37DEFINE_MUTEX(xfrm_cfg_mutex); 37DEFINE_MUTEX(xfrm_cfg_mutex);
38EXPORT_SYMBOL(xfrm_cfg_mutex); 38EXPORT_SYMBOL(xfrm_cfg_mutex);
39 39
40static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
41static struct dst_entry *xfrm_policy_sk_bundles;
40static DEFINE_RWLOCK(xfrm_policy_lock); 42static DEFINE_RWLOCK(xfrm_policy_lock);
41 43
42static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); 44static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
@@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
50static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 52static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
51static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 53static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
52static void xfrm_init_pmtu(struct dst_entry *dst); 54static void xfrm_init_pmtu(struct dst_entry *dst);
55static int stale_bundle(struct dst_entry *dst);
53 56
54static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 57static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
55 int dir); 58 int dir);
@@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
277{ 280{
278 BUG_ON(!policy->walk.dead); 281 BUG_ON(!policy->walk.dead);
279 282
280 BUG_ON(policy->bundles);
281
282 if (del_timer(&policy->timer)) 283 if (del_timer(&policy->timer))
283 BUG(); 284 BUG();
284 285
@@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
289 290
290static void xfrm_policy_gc_kill(struct xfrm_policy *policy) 291static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
291{ 292{
292 struct dst_entry *dst; 293 atomic_inc(&policy->genid);
293
294 while ((dst = policy->bundles) != NULL) {
295 policy->bundles = dst->next;
296 dst_free(dst);
297 }
298 294
299 if (del_timer(&policy->timer)) 295 if (del_timer(&policy->timer))
300 atomic_dec(&policy->refcnt); 296 atomic_dec(&policy->refcnt);
@@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
572 struct xfrm_policy *delpol; 568 struct xfrm_policy *delpol;
573 struct hlist_head *chain; 569 struct hlist_head *chain;
574 struct hlist_node *entry, *newpos; 570 struct hlist_node *entry, *newpos;
575 struct dst_entry *gc_list;
576 u32 mark = policy->mark.v & policy->mark.m; 571 u32 mark = policy->mark.v & policy->mark.m;
577 572
578 write_lock_bh(&xfrm_policy_lock); 573 write_lock_bh(&xfrm_policy_lock);
@@ -622,34 +617,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
622 else if (xfrm_bydst_should_resize(net, dir, NULL)) 617 else if (xfrm_bydst_should_resize(net, dir, NULL))
623 schedule_work(&net->xfrm.policy_hash_work); 618 schedule_work(&net->xfrm.policy_hash_work);
624 619
625 read_lock_bh(&xfrm_policy_lock);
626 gc_list = NULL;
627 entry = &policy->bydst;
628 hlist_for_each_entry_continue(policy, entry, bydst) {
629 struct dst_entry *dst;
630
631 write_lock(&policy->lock);
632 dst = policy->bundles;
633 if (dst) {
634 struct dst_entry *tail = dst;
635 while (tail->next)
636 tail = tail->next;
637 tail->next = gc_list;
638 gc_list = dst;
639
640 policy->bundles = NULL;
641 }
642 write_unlock(&policy->lock);
643 }
644 read_unlock_bh(&xfrm_policy_lock);
645
646 while (gc_list) {
647 struct dst_entry *dst = gc_list;
648
649 gc_list = dst->next;
650 dst_free(dst);
651 }
652
653 return 0; 620 return 0;
654} 621}
655EXPORT_SYMBOL(xfrm_policy_insert); 622EXPORT_SYMBOL(xfrm_policy_insert);
@@ -998,6 +965,19 @@ fail:
998 return ret; 965 return ret;
999} 966}
1000 967
968static struct xfrm_policy *
969__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
970{
971#ifdef CONFIG_XFRM_SUB_POLICY
972 struct xfrm_policy *pol;
973
974 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
975 if (pol != NULL)
976 return pol;
977#endif
978 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
979}
980
1001static struct flow_cache_object * 981static struct flow_cache_object *
1002xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, 982xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
1003 u8 dir, struct flow_cache_object *old_obj, void *ctx) 983 u8 dir, struct flow_cache_object *old_obj, void *ctx)
@@ -1007,21 +987,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
1007 if (old_obj) 987 if (old_obj)
1008 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); 988 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1009 989
1010#ifdef CONFIG_XFRM_SUB_POLICY 990 pol = __xfrm_policy_lookup(net, fl, family, dir);
1011 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); 991 if (IS_ERR_OR_NULL(pol))
1012 if (IS_ERR(pol))
1013 return ERR_CAST(pol); 992 return ERR_CAST(pol);
1014 if (pol)
1015 goto found;
1016#endif
1017 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1018 if (IS_ERR(pol))
1019 return ERR_CAST(pol);
1020 if (pol)
1021 goto found;
1022 return NULL;
1023 993
1024found:
1025 /* Resolver returns two references: 994 /* Resolver returns two references:
1026 * one for cache and one for caller of flow_cache_lookup() */ 995 * one for cache and one for caller of flow_cache_lookup() */
1027 xfrm_pol_hold(pol); 996 xfrm_pol_hold(pol);
@@ -1313,18 +1282,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1313 * still valid. 1282 * still valid.
1314 */ 1283 */
1315 1284
1316static struct dst_entry *
1317xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1318{
1319 struct dst_entry *x;
1320 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1321 if (unlikely(afinfo == NULL))
1322 return ERR_PTR(-EINVAL);
1323 x = afinfo->find_bundle(fl, policy);
1324 xfrm_policy_put_afinfo(afinfo);
1325 return x;
1326}
1327
1328static inline int xfrm_get_tos(struct flowi *fl, int family) 1285static inline int xfrm_get_tos(struct flowi *fl, int family)
1329{ 1286{
1330 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1287 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1340,6 +1297,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
1340 return tos; 1297 return tos;
1341} 1298}
1342 1299
1300static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1301{
1302 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1303 struct dst_entry *dst = &xdst->u.dst;
1304
1305 if (xdst->route == NULL) {
1306 /* Dummy bundle - if it has xfrms we were not
1307 * able to build bundle as template resolution failed.
1308 * It means we need to try again resolving. */
1309 if (xdst->num_xfrms > 0)
1310 return NULL;
1311 } else {
1312 /* Real bundle */
1313 if (stale_bundle(dst))
1314 return NULL;
1315 }
1316
1317 dst_hold(dst);
1318 return flo;
1319}
1320
1321static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1322{
1323 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1324 struct dst_entry *dst = &xdst->u.dst;
1325
1326 if (!xdst->route)
1327 return 0;
1328 if (stale_bundle(dst))
1329 return 0;
1330
1331 return 1;
1332}
1333
1334static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1335{
1336 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1337 struct dst_entry *dst = &xdst->u.dst;
1338
1339 dst_free(dst);
1340}
1341
1342static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1343 .get = xfrm_bundle_flo_get,
1344 .check = xfrm_bundle_flo_check,
1345 .delete = xfrm_bundle_flo_delete,
1346};
1347
1343static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) 1348static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1344{ 1349{
1345 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1350 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1362,9 +1367,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1362 BUG(); 1367 BUG();
1363 } 1368 }
1364 xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); 1369 xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
1365
1366 xfrm_policy_put_afinfo(afinfo); 1370 xfrm_policy_put_afinfo(afinfo);
1367 1371
1372 xdst->flo.ops = &xfrm_bundle_fc_ops;
1373
1368 return xdst; 1374 return xdst;
1369} 1375}
1370 1376
@@ -1402,6 +1408,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1402 return err; 1408 return err;
1403} 1409}
1404 1410
1411
1405/* Allocate chain of dst_entry's, attach known xfrm's, calculate 1412/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1406 * all the metrics... Shortly, bundle a bundle. 1413 * all the metrics... Shortly, bundle a bundle.
1407 */ 1414 */
@@ -1465,7 +1472,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1465 dst_hold(dst); 1472 dst_hold(dst);
1466 1473
1467 dst1->xfrm = xfrm[i]; 1474 dst1->xfrm = xfrm[i];
1468 xdst->genid = xfrm[i]->genid; 1475 xdst->xfrm_genid = xfrm[i]->genid;
1469 1476
1470 dst1->obsolete = -1; 1477 dst1->obsolete = -1;
1471 dst1->flags |= DST_HOST; 1478 dst1->flags |= DST_HOST;
@@ -1558,7 +1565,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1558#endif 1565#endif
1559} 1566}
1560 1567
1561static int stale_bundle(struct dst_entry *dst); 1568static int xfrm_expand_policies(struct flowi *fl, u16 family,
1569 struct xfrm_policy **pols,
1570 int *num_pols, int *num_xfrms)
1571{
1572 int i;
1573
1574 if (*num_pols == 0 || !pols[0]) {
1575 *num_pols = 0;
1576 *num_xfrms = 0;
1577 return 0;
1578 }
1579 if (IS_ERR(pols[0]))
1580 return PTR_ERR(pols[0]);
1581
1582 *num_xfrms = pols[0]->xfrm_nr;
1583
1584#ifdef CONFIG_XFRM_SUB_POLICY
1585 if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1586 pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1587 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1588 XFRM_POLICY_TYPE_MAIN,
1589 fl, family,
1590 XFRM_POLICY_OUT);
1591 if (pols[1]) {
1592 if (IS_ERR(pols[1])) {
1593 xfrm_pols_put(pols, *num_pols);
1594 return PTR_ERR(pols[1]);
1595 }
1596 (*num_pols) ++;
1597 (*num_xfrms) += pols[1]->xfrm_nr;
1598 }
1599 }
1600#endif
1601 for (i = 0; i < *num_pols; i++) {
1602 if (pols[i]->action != XFRM_POLICY_ALLOW) {
1603 *num_xfrms = -1;
1604 break;
1605 }
1606 }
1607
1608 return 0;
1609
1610}
1611
1612static struct xfrm_dst *
1613xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1614 struct flowi *fl, u16 family,
1615 struct dst_entry *dst_orig)
1616{
1617 struct net *net = xp_net(pols[0]);
1618 struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1619 struct dst_entry *dst;
1620 struct xfrm_dst *xdst;
1621 int err;
1622
1623 /* Try to instantiate a bundle */
1624 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1625 if (err < 0) {
1626 if (err != -EAGAIN)
1627 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1628 return ERR_PTR(err);
1629 }
1630
1631 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1632 if (IS_ERR(dst)) {
1633 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1634 return ERR_CAST(dst);
1635 }
1636
1637 xdst = (struct xfrm_dst *)dst;
1638 xdst->num_xfrms = err;
1639 if (num_pols > 1)
1640 err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1641 else
1642 err = xfrm_dst_update_origin(dst, fl);
1643 if (unlikely(err)) {
1644 dst_free(dst);
1645 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1646 return ERR_PTR(err);
1647 }
1648
1649 xdst->num_pols = num_pols;
1650 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
1651 xdst->policy_genid = atomic_read(&pols[0]->genid);
1652
1653 return xdst;
1654}
1655
1656static struct flow_cache_object *
1657xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
1658 struct flow_cache_object *oldflo, void *ctx)
1659{
1660 struct dst_entry *dst_orig = (struct dst_entry *)ctx;
1661 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1662 struct xfrm_dst *xdst, *new_xdst;
1663 int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
1664
1665 /* Check if the policies from old bundle are usable */
1666 xdst = NULL;
1667 if (oldflo) {
1668 xdst = container_of(oldflo, struct xfrm_dst, flo);
1669 num_pols = xdst->num_pols;
1670 num_xfrms = xdst->num_xfrms;
1671 pol_dead = 0;
1672 for (i = 0; i < num_pols; i++) {
1673 pols[i] = xdst->pols[i];
1674 pol_dead |= pols[i]->walk.dead;
1675 }
1676 if (pol_dead) {
1677 dst_free(&xdst->u.dst);
1678 xdst = NULL;
1679 num_pols = 0;
1680 num_xfrms = 0;
1681 oldflo = NULL;
1682 }
1683 }
1684
1685 /* Resolve policies to use if we couldn't get them from
1686 * previous cache entry */
1687 if (xdst == NULL) {
1688 num_pols = 1;
1689 pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
1690 err = xfrm_expand_policies(fl, family, pols,
1691 &num_pols, &num_xfrms);
1692 if (err < 0)
1693 goto inc_error;
1694 if (num_pols == 0)
1695 return NULL;
1696 if (num_xfrms <= 0)
1697 goto make_dummy_bundle;
1698 }
1699
1700 new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
1701 if (IS_ERR(new_xdst)) {
1702 err = PTR_ERR(new_xdst);
1703 if (err != -EAGAIN)
1704 goto error;
1705 if (oldflo == NULL)
1706 goto make_dummy_bundle;
1707 dst_hold(&xdst->u.dst);
1708 return oldflo;
1709 }
1710
1711 /* Kill the previous bundle */
1712 if (xdst) {
1713 /* The policies were stolen for newly generated bundle */
1714 xdst->num_pols = 0;
1715 dst_free(&xdst->u.dst);
1716 }
1717
1718 /* Flow cache does not have reference, it dst_free()'s,
1719 * but we do need to return one reference for original caller */
1720 dst_hold(&new_xdst->u.dst);
1721 return &new_xdst->flo;
1722
1723make_dummy_bundle:
1724 /* We found policies, but there's no bundles to instantiate:
1725 * either because the policy blocks, has no transformations or
1726 * we could not build template (no xfrm_states).*/
1727 xdst = xfrm_alloc_dst(net, family);
1728 if (IS_ERR(xdst)) {
1729 xfrm_pols_put(pols, num_pols);
1730 return ERR_CAST(xdst);
1731 }
1732 xdst->num_pols = num_pols;
1733 xdst->num_xfrms = num_xfrms;
1734 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
1735
1736 dst_hold(&xdst->u.dst);
1737 return &xdst->flo;
1738
1739inc_error:
1740 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1741error:
1742 if (xdst != NULL)
1743 dst_free(&xdst->u.dst);
1744 else
1745 xfrm_pols_put(pols, num_pols);
1746 return ERR_PTR(err);
1747}
1562 1748
1563/* Main function: finds/creates a bundle for given flow. 1749/* Main function: finds/creates a bundle for given flow.
1564 * 1750 *
@@ -1568,248 +1754,152 @@ static int stale_bundle(struct dst_entry *dst);
1568int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, 1754int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
1569 struct sock *sk, int flags) 1755 struct sock *sk, int flags)
1570{ 1756{
1571 struct xfrm_policy *policy;
1572 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1757 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1573 int npols; 1758 struct flow_cache_object *flo;
1574 int pol_dead; 1759 struct xfrm_dst *xdst;
1575 int xfrm_nr; 1760 struct dst_entry *dst, *dst_orig = *dst_p, *route;
1576 int pi; 1761 u16 family = dst_orig->ops->family;
1577 struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1578 struct dst_entry *dst, *dst_orig = *dst_p;
1579 int nx = 0;
1580 int err;
1581 u32 genid;
1582 u16 family;
1583 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 1762 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1763 int i, err, num_pols, num_xfrms, drop_pols = 0;
1584 1764
1585restart: 1765restart:
1586 genid = atomic_read(&flow_cache_genid); 1766 dst = NULL;
1587 policy = NULL; 1767 xdst = NULL;
1588 for (pi = 0; pi < ARRAY_SIZE(pols); pi++) 1768 route = NULL;
1589 pols[pi] = NULL;
1590 npols = 0;
1591 pol_dead = 0;
1592 xfrm_nr = 0;
1593 1769
1594 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 1770 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1595 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 1771 num_pols = 1;
1596 err = PTR_ERR(policy); 1772 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1597 if (IS_ERR(policy)) { 1773 err = xfrm_expand_policies(fl, family, pols,
1598 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1774 &num_pols, &num_xfrms);
1775 if (err < 0)
1599 goto dropdst; 1776 goto dropdst;
1777
1778 if (num_pols) {
1779 if (num_xfrms <= 0) {
1780 drop_pols = num_pols;
1781 goto no_transform;
1782 }
1783
1784 xdst = xfrm_resolve_and_create_bundle(
1785 pols, num_pols, fl,
1786 family, dst_orig);
1787 if (IS_ERR(xdst)) {
1788 xfrm_pols_put(pols, num_pols);
1789 err = PTR_ERR(xdst);
1790 goto dropdst;
1791 }
1792
1793 spin_lock_bh(&xfrm_policy_sk_bundle_lock);
1794 xdst->u.dst.next = xfrm_policy_sk_bundles;
1795 xfrm_policy_sk_bundles = &xdst->u.dst;
1796 spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
1797
1798 route = xdst->route;
1600 } 1799 }
1601 } 1800 }
1602 1801
1603 if (!policy) { 1802 if (xdst == NULL) {
1604 struct flow_cache_object *flo;
1605
1606 /* To accelerate a bit... */ 1803 /* To accelerate a bit... */
1607 if ((dst_orig->flags & DST_NOXFRM) || 1804 if ((dst_orig->flags & DST_NOXFRM) ||
1608 !net->xfrm.policy_count[XFRM_POLICY_OUT]) 1805 !net->xfrm.policy_count[XFRM_POLICY_OUT])
1609 goto nopol; 1806 goto nopol;
1610 1807
1611 flo = flow_cache_lookup(net, fl, dst_orig->ops->family, 1808 flo = flow_cache_lookup(net, fl, family, dir,
1612 dir, xfrm_policy_lookup, NULL); 1809 xfrm_bundle_lookup, dst_orig);
1613 err = PTR_ERR(flo); 1810 if (flo == NULL)
1811 goto nopol;
1614 if (IS_ERR(flo)) { 1812 if (IS_ERR(flo)) {
1615 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1813 err = PTR_ERR(flo);
1616 goto dropdst; 1814 goto dropdst;
1617 } 1815 }
1618 if (flo) 1816 xdst = container_of(flo, struct xfrm_dst, flo);
1619 policy = container_of(flo, struct xfrm_policy, flo); 1817
1620 else 1818 num_pols = xdst->num_pols;
1621 policy = NULL; 1819 num_xfrms = xdst->num_xfrms;
1820 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
1821 route = xdst->route;
1822 }
1823
1824 dst = &xdst->u.dst;
1825 if (route == NULL && num_xfrms > 0) {
1826 /* The only case when xfrm_bundle_lookup() returns a
1827 * bundle with null route, is when the template could
1828 * not be resolved. It means policies are there, but
1829 * bundle could not be created, since we don't yet
1830 * have the xfrm_state's. We need to wait for KM to
1831 * negotiate new SA's or bail out with error.*/
1832 if (net->xfrm.sysctl_larval_drop) {
1833 /* EREMOTE tells the caller to generate
1834 * a one-shot blackhole route. */
1835 dst_release(dst);
1836 xfrm_pols_put(pols, num_pols);
1837 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1838 return -EREMOTE;
1839 }
1840 if (flags & XFRM_LOOKUP_WAIT) {
1841 DECLARE_WAITQUEUE(wait, current);
1842
1843 add_wait_queue(&net->xfrm.km_waitq, &wait);
1844 set_current_state(TASK_INTERRUPTIBLE);
1845 schedule();
1846 set_current_state(TASK_RUNNING);
1847 remove_wait_queue(&net->xfrm.km_waitq, &wait);
1848
1849 if (!signal_pending(current)) {
1850 dst_release(dst);
1851 goto restart;
1852 }
1853
1854 err = -ERESTART;
1855 } else
1856 err = -EAGAIN;
1857
1858 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1859 goto error;
1622 } 1860 }
1623 1861
1624 if (!policy) 1862no_transform:
1863 if (num_pols == 0)
1625 goto nopol; 1864 goto nopol;
1626 1865
1627 family = dst_orig->ops->family; 1866 if ((flags & XFRM_LOOKUP_ICMP) &&
1628 pols[0] = policy; 1867 !(pols[0]->flags & XFRM_POLICY_ICMP)) {
1629 npols ++; 1868 err = -ENOENT;
1630 xfrm_nr += pols[0]->xfrm_nr;
1631
1632 err = -ENOENT;
1633 if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1634 goto error; 1869 goto error;
1870 }
1635 1871
1636 policy->curlft.use_time = get_seconds(); 1872 for (i = 0; i < num_pols; i++)
1873 pols[i]->curlft.use_time = get_seconds();
1637 1874
1638 switch (policy->action) { 1875 if (num_xfrms < 0) {
1639 default:
1640 case XFRM_POLICY_BLOCK:
1641 /* Prohibit the flow */ 1876 /* Prohibit the flow */
1642 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); 1877 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1643 err = -EPERM; 1878 err = -EPERM;
1644 goto error; 1879 goto error;
1645 1880 } else if (num_xfrms > 0) {
1646 case XFRM_POLICY_ALLOW: 1881 /* Flow transformed */
1647#ifndef CONFIG_XFRM_SUB_POLICY 1882 *dst_p = dst;
1648 if (policy->xfrm_nr == 0) { 1883 dst_release(dst_orig);
1649 /* Flow passes not transformed. */ 1884 } else {
1650 xfrm_pol_put(policy); 1885 /* Flow passes untransformed */
1651 return 0; 1886 dst_release(dst);
1652 }
1653#endif
1654
1655 /* Try to find matching bundle.
1656 *
1657 * LATER: help from flow cache. It is optional, this
1658 * is required only for output policy.
1659 */
1660 dst = xfrm_find_bundle(fl, policy, family);
1661 if (IS_ERR(dst)) {
1662 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1663 err = PTR_ERR(dst);
1664 goto error;
1665 }
1666
1667 if (dst)
1668 break;
1669
1670#ifdef CONFIG_XFRM_SUB_POLICY
1671 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1672 pols[1] = xfrm_policy_lookup_bytype(net,
1673 XFRM_POLICY_TYPE_MAIN,
1674 fl, family,
1675 XFRM_POLICY_OUT);
1676 if (pols[1]) {
1677 if (IS_ERR(pols[1])) {
1678 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1679 err = PTR_ERR(pols[1]);
1680 goto error;
1681 }
1682 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1683 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1684 err = -EPERM;
1685 goto error;
1686 }
1687 npols ++;
1688 xfrm_nr += pols[1]->xfrm_nr;
1689 }
1690 }
1691
1692 /*
1693 * Because neither flowi nor bundle information knows about
1694 * transformation template size. On more than one policy usage
1695 * we can realize whether all of them is bypass or not after
1696 * they are searched. See above not-transformed bypass
1697 * is surrounded by non-sub policy configuration, too.
1698 */
1699 if (xfrm_nr == 0) {
1700 /* Flow passes not transformed. */
1701 xfrm_pols_put(pols, npols);
1702 return 0;
1703 }
1704
1705#endif
1706 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1707
1708 if (unlikely(nx<0)) {
1709 err = nx;
1710 if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
1711 /* EREMOTE tells the caller to generate
1712 * a one-shot blackhole route.
1713 */
1714 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1715 xfrm_pol_put(policy);
1716 return -EREMOTE;
1717 }
1718 if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1719 DECLARE_WAITQUEUE(wait, current);
1720
1721 add_wait_queue(&net->xfrm.km_waitq, &wait);
1722 set_current_state(TASK_INTERRUPTIBLE);
1723 schedule();
1724 set_current_state(TASK_RUNNING);
1725 remove_wait_queue(&net->xfrm.km_waitq, &wait);
1726
1727 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1728
1729 if (nx == -EAGAIN && signal_pending(current)) {
1730 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1731 err = -ERESTART;
1732 goto error;
1733 }
1734 if (nx == -EAGAIN ||
1735 genid != atomic_read(&flow_cache_genid)) {
1736 xfrm_pols_put(pols, npols);
1737 goto restart;
1738 }
1739 err = nx;
1740 }
1741 if (err < 0) {
1742 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1743 goto error;
1744 }
1745 }
1746 if (nx == 0) {
1747 /* Flow passes not transformed. */
1748 xfrm_pols_put(pols, npols);
1749 return 0;
1750 }
1751
1752 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1753 err = PTR_ERR(dst);
1754 if (IS_ERR(dst)) {
1755 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1756 goto error;
1757 }
1758
1759 for (pi = 0; pi < npols; pi++)
1760 pol_dead |= pols[pi]->walk.dead;
1761
1762 write_lock_bh(&policy->lock);
1763 if (unlikely(pol_dead || stale_bundle(dst))) {
1764 /* Wow! While we worked on resolving, this
1765 * policy has gone. Retry. It is not paranoia,
1766 * we just cannot enlist new bundle to dead object.
1767 * We can't enlist stable bundles either.
1768 */
1769 write_unlock_bh(&policy->lock);
1770 dst_free(dst);
1771
1772 if (pol_dead)
1773 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
1774 else
1775 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1776 err = -EHOSTUNREACH;
1777 goto error;
1778 }
1779
1780 if (npols > 1)
1781 err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1782 else
1783 err = xfrm_dst_update_origin(dst, fl);
1784 if (unlikely(err)) {
1785 write_unlock_bh(&policy->lock);
1786 dst_free(dst);
1787 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1788 goto error;
1789 }
1790
1791 dst->next = policy->bundles;
1792 policy->bundles = dst;
1793 dst_hold(dst);
1794 write_unlock_bh(&policy->lock);
1795 } 1887 }
1796 *dst_p = dst; 1888ok:
1797 dst_release(dst_orig); 1889 xfrm_pols_put(pols, drop_pols);
1798 xfrm_pols_put(pols, npols);
1799 return 0; 1890 return 0;
1800 1891
1892nopol:
1893 if (!(flags & XFRM_LOOKUP_ICMP))
1894 goto ok;
1895 err = -ENOENT;
1801error: 1896error:
1802 xfrm_pols_put(pols, npols); 1897 dst_release(dst);
1803dropdst: 1898dropdst:
1804 dst_release(dst_orig); 1899 dst_release(dst_orig);
1805 *dst_p = NULL; 1900 *dst_p = NULL;
1901 xfrm_pols_put(pols, drop_pols);
1806 return err; 1902 return err;
1807
1808nopol:
1809 err = -ENOENT;
1810 if (flags & XFRM_LOOKUP_ICMP)
1811 goto dropdst;
1812 return 0;
1813} 1903}
1814EXPORT_SYMBOL(__xfrm_lookup); 1904EXPORT_SYMBOL(__xfrm_lookup);
1815 1905
@@ -2161,71 +2251,24 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2161 return dst; 2251 return dst;
2162} 2252}
2163 2253
2164static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) 2254static void __xfrm_garbage_collect(struct net *net)
2165{
2166 struct dst_entry *dst, **dstp;
2167
2168 write_lock(&pol->lock);
2169 dstp = &pol->bundles;
2170 while ((dst=*dstp) != NULL) {
2171 if (func(dst)) {
2172 *dstp = dst->next;
2173 dst->next = *gc_list_p;
2174 *gc_list_p = dst;
2175 } else {
2176 dstp = &dst->next;
2177 }
2178 }
2179 write_unlock(&pol->lock);
2180}
2181
2182static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
2183{ 2255{
2184 struct dst_entry *gc_list = NULL; 2256 struct dst_entry *head, *next;
2185 int dir;
2186 2257
2187 read_lock_bh(&xfrm_policy_lock); 2258 flow_cache_flush();
2188 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2189 struct xfrm_policy *pol;
2190 struct hlist_node *entry;
2191 struct hlist_head *table;
2192 int i;
2193 2259
2194 hlist_for_each_entry(pol, entry, 2260 spin_lock_bh(&xfrm_policy_sk_bundle_lock);
2195 &net->xfrm.policy_inexact[dir], bydst) 2261 head = xfrm_policy_sk_bundles;
2196 prune_one_bundle(pol, func, &gc_list); 2262 xfrm_policy_sk_bundles = NULL;
2263 spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
2197 2264
2198 table = net->xfrm.policy_bydst[dir].table; 2265 while (head) {
2199 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 2266 next = head->next;
2200 hlist_for_each_entry(pol, entry, table + i, bydst) 2267 dst_free(head);
2201 prune_one_bundle(pol, func, &gc_list); 2268 head = next;
2202 }
2203 }
2204 read_unlock_bh(&xfrm_policy_lock);
2205
2206 while (gc_list) {
2207 struct dst_entry *dst = gc_list;
2208 gc_list = dst->next;
2209 dst_free(dst);
2210 } 2269 }
2211} 2270}
2212 2271
2213static int unused_bundle(struct dst_entry *dst)
2214{
2215 return !atomic_read(&dst->__refcnt);
2216}
2217
2218static void __xfrm_garbage_collect(struct net *net)
2219{
2220 xfrm_prune_bundles(net, unused_bundle);
2221}
2222
2223static int xfrm_flush_bundles(struct net *net)
2224{
2225 xfrm_prune_bundles(net, stale_bundle);
2226 return 0;
2227}
2228
2229static void xfrm_init_pmtu(struct dst_entry *dst) 2272static void xfrm_init_pmtu(struct dst_entry *dst)
2230{ 2273{
2231 do { 2274 do {
@@ -2283,7 +2326,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2283 return 0; 2326 return 0;
2284 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2327 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2285 return 0; 2328 return 0;
2286 if (xdst->genid != dst->xfrm->genid) 2329 if (xdst->xfrm_genid != dst->xfrm->genid)
2330 return 0;
2331 if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2287 return 0; 2332 return 0;
2288 2333
2289 if (strict && fl && 2334 if (strict && fl &&
@@ -2448,7 +2493,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
2448 2493
2449 switch (event) { 2494 switch (event) {
2450 case NETDEV_DOWN: 2495 case NETDEV_DOWN:
2451 xfrm_flush_bundles(dev_net(dev)); 2496 __xfrm_garbage_collect(dev_net(dev));
2452 } 2497 }
2453 return NOTIFY_DONE; 2498 return NOTIFY_DONE;
2454} 2499}
@@ -2780,7 +2825,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
2780 struct xfrm_migrate *m, int num_migrate) 2825 struct xfrm_migrate *m, int num_migrate)
2781{ 2826{
2782 struct xfrm_migrate *mp; 2827 struct xfrm_migrate *mp;
2783 struct dst_entry *dst;
2784 int i, j, n = 0; 2828 int i, j, n = 0;
2785 2829
2786 write_lock_bh(&pol->lock); 2830 write_lock_bh(&pol->lock);
@@ -2805,10 +2849,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
2805 sizeof(pol->xfrm_vec[i].saddr)); 2849 sizeof(pol->xfrm_vec[i].saddr));
2806 pol->xfrm_vec[i].encap_family = mp->new_family; 2850 pol->xfrm_vec[i].encap_family = mp->new_family;
2807 /* flush bundles */ 2851 /* flush bundles */
2808 while ((dst = pol->bundles) != NULL) { 2852 atomic_inc(&pol->genid);
2809 pol->bundles = dst->next;
2810 dst_free(dst);
2811 }
2812 } 2853 }
2813 } 2854 }
2814 2855