aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@redhat.com>2015-03-06 16:47:00 -0500
committerDavid S. Miller <davem@davemloft.net>2015-03-11 16:22:14 -0400
commit0ddcf43d5d4a03ded1ee3f6b3b72a0cbed4e90b1 (patch)
tree28c93b0c6eb9389182f60736103b6bfc0b61cedc /net
parent169bf9121b19dd6029e0a354d33513f61bfbe3d3 (diff)
ipv4: FIB Local/MAIN table collapse
This patch is meant to collapse local and main into one by converting tb_data from an array to a pointer. Doing this allows us to point the local table into the main while maintaining the same variables in the table. As such the tb_data was converted from an array to a pointer, and a new array called data is added in order to still provide an object for tb_data to point to. In order to track the origin of the fib aliases a tb_id value was added in a hole that existed on 64b systems. Using this we can also reverse the merge in the event that custom FIB rules are enabled. With this patch I am seeing an improvement of 20ns to 30ns for routing lookups as long as custom rules are not enabled, with custom rules enabled we fall back to split tables and the original behavior. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/fib_rules.c8
-rw-r--r--net/ipv4/fib_frontend.c59
-rw-r--r--net/ipv4/fib_lookup.h1
-rw-r--r--net/ipv4/fib_rules.c20
-rw-r--r--net/ipv4/fib_trie.c172
5 files changed, 240 insertions, 20 deletions
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 44706e81b2e0..b55677fed1c8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -492,6 +492,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
492 goto errout; 492 goto errout;
493 } 493 }
494 494
495 if (ops->delete) {
496 err = ops->delete(rule);
497 if (err)
498 goto errout;
499 }
500
495 list_del_rcu(&rule->list); 501 list_del_rcu(&rule->list);
496 502
497 if (rule->action == FR_ACT_GOTO) { 503 if (rule->action == FR_ACT_GOTO) {
@@ -517,8 +523,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
517 523
518 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 524 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
519 NETLINK_CB(skb).portid); 525 NETLINK_CB(skb).portid);
520 if (ops->delete)
521 ops->delete(rule);
522 fib_rule_put(rule); 526 fib_rule_put(rule);
523 flush_route_cache(ops); 527 flush_route_cache(ops);
524 rules_ops_put(ops); 528 rules_ops_put(ops);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e067770235bf..7cda3b0521d8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -52,14 +52,14 @@ static int __net_init fib4_rules_init(struct net *net)
52{ 52{
53 struct fib_table *local_table, *main_table; 53 struct fib_table *local_table, *main_table;
54 54
55 local_table = fib_trie_table(RT_TABLE_LOCAL); 55 main_table = fib_trie_table(RT_TABLE_MAIN, NULL);
56 if (local_table == NULL)
57 return -ENOMEM;
58
59 main_table = fib_trie_table(RT_TABLE_MAIN);
60 if (main_table == NULL) 56 if (main_table == NULL)
61 goto fail; 57 goto fail;
62 58
59 local_table = fib_trie_table(RT_TABLE_LOCAL, main_table);
60 if (local_table == NULL)
61 return -ENOMEM;
62
63 hlist_add_head_rcu(&local_table->tb_hlist, 63 hlist_add_head_rcu(&local_table->tb_hlist,
64 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 64 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
65 hlist_add_head_rcu(&main_table->tb_hlist, 65 hlist_add_head_rcu(&main_table->tb_hlist,
@@ -74,7 +74,7 @@ fail:
74 74
75struct fib_table *fib_new_table(struct net *net, u32 id) 75struct fib_table *fib_new_table(struct net *net, u32 id)
76{ 76{
77 struct fib_table *tb; 77 struct fib_table *tb, *alias = NULL;
78 unsigned int h; 78 unsigned int h;
79 79
80 if (id == 0) 80 if (id == 0)
@@ -83,7 +83,10 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
83 if (tb) 83 if (tb)
84 return tb; 84 return tb;
85 85
86 tb = fib_trie_table(id); 86 if (id == RT_TABLE_LOCAL)
87 alias = fib_new_table(net, RT_TABLE_MAIN);
88
89 tb = fib_trie_table(id, alias);
87 if (!tb) 90 if (!tb)
88 return NULL; 91 return NULL;
89 92
@@ -126,6 +129,48 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
126} 129}
127#endif /* CONFIG_IP_MULTIPLE_TABLES */ 130#endif /* CONFIG_IP_MULTIPLE_TABLES */
128 131
132static void fib_replace_table(struct net *net, struct fib_table *old,
133 struct fib_table *new)
134{
135#ifdef CONFIG_IP_MULTIPLE_TABLES
136 switch (new->tb_id) {
137 case RT_TABLE_LOCAL:
138 rcu_assign_pointer(net->ipv4.fib_local, new);
139 break;
140 case RT_TABLE_MAIN:
141 rcu_assign_pointer(net->ipv4.fib_main, new);
142 break;
143 case RT_TABLE_DEFAULT:
144 rcu_assign_pointer(net->ipv4.fib_default, new);
145 break;
146 default:
147 break;
148 }
149
150#endif
151 /* replace the old table in the hlist */
152 hlist_replace_rcu(&old->tb_hlist, &new->tb_hlist);
153}
154
155int fib_unmerge(struct net *net)
156{
157 struct fib_table *old, *new;
158
159 old = fib_get_table(net, RT_TABLE_LOCAL);
160 new = fib_trie_unmerge(old);
161
162 if (!new)
163 return -ENOMEM;
164
165 /* replace merged table with clean table */
166 if (new != old) {
167 fib_replace_table(net, old, new);
168 fib_free_table(old);
169 }
170
171 return 0;
172}
173
129static void fib_flush(struct net *net) 174static void fib_flush(struct net *net)
130{ 175{
131 int flushed = 0; 176 int flushed = 0;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ae2e6eede46e..c6211ed60b03 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -12,6 +12,7 @@ struct fib_alias {
12 u8 fa_type; 12 u8 fa_type;
13 u8 fa_state; 13 u8 fa_state;
14 u8 fa_slen; 14 u8 fa_slen;
15 u32 tb_id;
15 struct rcu_head rcu; 16 struct rcu_head rcu;
16}; 17};
17 18
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 190d0d00d744..e9bc5e42cf43 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -174,6 +174,11 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
174 if (frh->tos & ~IPTOS_TOS_MASK) 174 if (frh->tos & ~IPTOS_TOS_MASK)
175 goto errout; 175 goto errout;
176 176
177 /* split local/main if they are not already split */
178 err = fib_unmerge(net);
179 if (err)
180 goto errout;
181
177 if (rule->table == RT_TABLE_UNSPEC) { 182 if (rule->table == RT_TABLE_UNSPEC) {
178 if (rule->action == FR_ACT_TO_TBL) { 183 if (rule->action == FR_ACT_TO_TBL) {
179 struct fib_table *table; 184 struct fib_table *table;
@@ -216,17 +221,24 @@ errout:
216 return err; 221 return err;
217} 222}
218 223
219static void fib4_rule_delete(struct fib_rule *rule) 224static int fib4_rule_delete(struct fib_rule *rule)
220{ 225{
221 struct net *net = rule->fr_net; 226 struct net *net = rule->fr_net;
222#ifdef CONFIG_IP_ROUTE_CLASSID 227 int err;
223 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
224 228
225 if (rule4->tclassid) 229 /* split local/main if they are not already split */
230 err = fib_unmerge(net);
231 if (err)
232 goto errout;
233
234#ifdef CONFIG_IP_ROUTE_CLASSID
235 if (((struct fib4_rule *)rule)->tclassid)
226 net->ipv4.fib_num_tclassid_users--; 236 net->ipv4.fib_num_tclassid_users--;
227#endif 237#endif
228 net->ipv4.fib_has_custom_rules = true; 238 net->ipv4.fib_has_custom_rules = true;
229 fib_flush_external(rule->fr_net); 239 fib_flush_external(rule->fr_net);
240errout:
241 return err;
230} 242}
231 243
232static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 244static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 83290beaf7cf..7b2badd74ad8 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1120,6 +1120,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1120 break; 1120 break;
1121 if (fa->fa_info->fib_priority != fi->fib_priority) 1121 if (fa->fa_info->fib_priority != fi->fib_priority)
1122 break; 1122 break;
1123 /* duplicate entry from another table */
1124 if (WARN_ON(fa->tb_id != tb->tb_id))
1125 continue;
1123 if (fa->fa_type == cfg->fc_type && 1126 if (fa->fa_type == cfg->fc_type &&
1124 fa->fa_info == fi) { 1127 fa->fa_info == fi) {
1125 fa_match = fa; 1128 fa_match = fa;
@@ -1197,6 +1200,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1197 new_fa->fa_type = cfg->fc_type; 1200 new_fa->fa_type = cfg->fc_type;
1198 new_fa->fa_state = 0; 1201 new_fa->fa_state = 0;
1199 new_fa->fa_slen = slen; 1202 new_fa->fa_slen = slen;
1203 new_fa->tb_id = tb->tb_id;
1200 1204
1201 /* (Optionally) offload fib entry to switch hardware. */ 1205 /* (Optionally) offload fib entry to switch hardware. */
1202 err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, 1206 err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
@@ -1217,7 +1221,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1217 tb->tb_num_default++; 1221 tb->tb_num_default++;
1218 1222
1219 rt_cache_flush(cfg->fc_nlinfo.nl_net); 1223 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1220 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, 1224 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1221 &cfg->fc_nlinfo, 0); 1225 &cfg->fc_nlinfo, 0);
1222succeeded: 1226succeeded:
1223 return 0; 1227 return 0;
@@ -1243,7 +1247,7 @@ static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
1243int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, 1247int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1244 struct fib_result *res, int fib_flags) 1248 struct fib_result *res, int fib_flags)
1245{ 1249{
1246 struct trie *t = (struct trie *)tb->tb_data; 1250 struct trie *t = (struct trie *) tb->tb_data;
1247#ifdef CONFIG_IP_FIB_TRIE_STATS 1251#ifdef CONFIG_IP_FIB_TRIE_STATS
1248 struct trie_use_stats __percpu *stats = t->stats; 1252 struct trie_use_stats __percpu *stats = t->stats;
1249#endif 1253#endif
@@ -1483,6 +1487,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1483 if ((fa->fa_slen != slen) || (fa->fa_tos != tos)) 1487 if ((fa->fa_slen != slen) || (fa->fa_tos != tos))
1484 break; 1488 break;
1485 1489
1490 if (fa->tb_id != tb->tb_id)
1491 continue;
1492
1486 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 1493 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1487 (cfg->fc_scope == RT_SCOPE_NOWHERE || 1494 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1488 fa->fa_info->fib_scope == cfg->fc_scope) && 1495 fa->fa_info->fib_scope == cfg->fc_scope) &&
@@ -1576,6 +1583,120 @@ found:
1576 return n; 1583 return n;
1577} 1584}
1578 1585
1586static void fib_trie_free(struct fib_table *tb)
1587{
1588 struct trie *t = (struct trie *)tb->tb_data;
1589 struct key_vector *pn = t->kv;
1590 unsigned long cindex = 1;
1591 struct hlist_node *tmp;
1592 struct fib_alias *fa;
1593
1594 /* walk trie in reverse order and free everything */
1595 for (;;) {
1596 struct key_vector *n;
1597
1598 if (!(cindex--)) {
1599 t_key pkey = pn->key;
1600
1601 if (IS_TRIE(pn))
1602 break;
1603
1604 n = pn;
1605 pn = node_parent(pn);
1606
1607 /* drop emptied tnode */
1608 put_child_root(pn, n->key, NULL);
1609 node_free(n);
1610
1611 cindex = get_index(pkey, pn);
1612
1613 continue;
1614 }
1615
1616 /* grab the next available node */
1617 n = get_child(pn, cindex);
1618 if (!n)
1619 continue;
1620
1621 if (IS_TNODE(n)) {
1622 /* record pn and cindex for leaf walking */
1623 pn = n;
1624 cindex = 1ul << n->bits;
1625
1626 continue;
1627 }
1628
1629 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1630 hlist_del_rcu(&fa->fa_list);
1631 alias_free_mem_rcu(fa);
1632 }
1633
1634 put_child_root(pn, n->key, NULL);
1635 node_free(n);
1636 }
1637
1638#ifdef CONFIG_IP_FIB_TRIE_STATS
1639 free_percpu(t->stats);
1640#endif
1641 kfree(tb);
1642}
1643
1644struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
1645{
1646 struct trie *ot = (struct trie *)oldtb->tb_data;
1647 struct key_vector *l, *tp = ot->kv;
1648 struct fib_table *local_tb;
1649 struct fib_alias *fa;
1650 struct trie *lt;
1651 t_key key = 0;
1652
1653 if (oldtb->tb_data == oldtb->__data)
1654 return oldtb;
1655
1656 local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
1657 if (!local_tb)
1658 return NULL;
1659
1660 lt = (struct trie *)local_tb->tb_data;
1661
1662 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1663 struct key_vector *local_l = NULL, *local_tp;
1664
1665 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1666 struct fib_alias *new_fa;
1667
1668 if (local_tb->tb_id != fa->tb_id)
1669 continue;
1670
1671 /* clone fa for new local table */
1672 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1673 if (!new_fa)
1674 goto out;
1675
1676 memcpy(new_fa, fa, sizeof(*fa));
1677
1678 /* insert clone into table */
1679 if (!local_l)
1680 local_l = fib_find_node(lt, &local_tp, l->key);
1681
1682 if (fib_insert_alias(lt, local_tp, local_l, new_fa,
1683 NULL, l->key))
1684 goto out;
1685 }
1686
1687 /* stop loop if key wrapped back to 0 */
1688 key = l->key + 1;
1689 if (key < l->key)
1690 break;
1691 }
1692
1693 return local_tb;
1694out:
1695 fib_trie_free(local_tb);
1696
1697 return NULL;
1698}
1699
1579/* Caller must hold RTNL */ 1700/* Caller must hold RTNL */
1580void fib_table_flush_external(struct fib_table *tb) 1701void fib_table_flush_external(struct fib_table *tb)
1581{ 1702{
@@ -1587,6 +1708,7 @@ void fib_table_flush_external(struct fib_table *tb)
1587 1708
1588 /* walk trie in reverse order */ 1709 /* walk trie in reverse order */
1589 for (;;) { 1710 for (;;) {
1711 unsigned char slen = 0;
1590 struct key_vector *n; 1712 struct key_vector *n;
1591 1713
1592 if (!(cindex--)) { 1714 if (!(cindex--)) {
@@ -1596,8 +1718,8 @@ void fib_table_flush_external(struct fib_table *tb)
1596 if (IS_TRIE(pn)) 1718 if (IS_TRIE(pn))
1597 break; 1719 break;
1598 1720
1599 /* no need to resize like in flush below */ 1721 /* resize completed node */
1600 pn = node_parent(pn); 1722 pn = resize(t, pn);
1601 cindex = get_index(pkey, pn); 1723 cindex = get_index(pkey, pn);
1602 1724
1603 continue; 1725 continue;
@@ -1619,6 +1741,18 @@ void fib_table_flush_external(struct fib_table *tb)
1619 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { 1741 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1620 struct fib_info *fi = fa->fa_info; 1742 struct fib_info *fi = fa->fa_info;
1621 1743
1744 /* if alias was cloned to local then we just
1745 * need to remove the local copy from main
1746 */
1747 if (tb->tb_id != fa->tb_id) {
1748 hlist_del_rcu(&fa->fa_list);
1749 alias_free_mem_rcu(fa);
1750 continue;
1751 }
1752
1753 /* record local slen */
1754 slen = fa->fa_slen;
1755
1622 if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL)) 1756 if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL))
1623 continue; 1757 continue;
1624 1758
@@ -1627,6 +1761,16 @@ void fib_table_flush_external(struct fib_table *tb)
1627 fi, fa->fa_tos, 1761 fi, fa->fa_tos,
1628 fa->fa_type, tb->tb_id); 1762 fa->fa_type, tb->tb_id);
1629 } 1763 }
1764
1765 /* update leaf slen */
1766 n->slen = slen;
1767
1768 if (hlist_empty(&n->leaf)) {
1769 put_child_root(pn, n->key, NULL);
1770 node_free(n);
1771 } else {
1772 leaf_pull_suffix(pn, n);
1773 }
1630 } 1774 }
1631} 1775}
1632 1776
@@ -1711,7 +1855,8 @@ static void __trie_free_rcu(struct rcu_head *head)
1711#ifdef CONFIG_IP_FIB_TRIE_STATS 1855#ifdef CONFIG_IP_FIB_TRIE_STATS
1712 struct trie *t = (struct trie *)tb->tb_data; 1856 struct trie *t = (struct trie *)tb->tb_data;
1713 1857
1714 free_percpu(t->stats); 1858 if (tb->tb_data == tb->__data)
1859 free_percpu(t->stats);
1715#endif /* CONFIG_IP_FIB_TRIE_STATS */ 1860#endif /* CONFIG_IP_FIB_TRIE_STATS */
1716 kfree(tb); 1861 kfree(tb);
1717} 1862}
@@ -1738,6 +1883,11 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
1738 continue; 1883 continue;
1739 } 1884 }
1740 1885
1886 if (tb->tb_id != fa->tb_id) {
1887 i++;
1888 continue;
1889 }
1890
1741 if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, 1891 if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
1742 cb->nlh->nlmsg_seq, 1892 cb->nlh->nlmsg_seq,
1743 RTM_NEWROUTE, 1893 RTM_NEWROUTE,
@@ -1804,18 +1954,26 @@ void __init fib_trie_init(void)
1804 0, SLAB_PANIC, NULL); 1954 0, SLAB_PANIC, NULL);
1805} 1955}
1806 1956
1807struct fib_table *fib_trie_table(u32 id) 1957struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
1808{ 1958{
1809 struct fib_table *tb; 1959 struct fib_table *tb;
1810 struct trie *t; 1960 struct trie *t;
1961 size_t sz = sizeof(*tb);
1962
1963 if (!alias)
1964 sz += sizeof(struct trie);
1811 1965
1812 tb = kzalloc(sizeof(*tb) + sizeof(struct trie), GFP_KERNEL); 1966 tb = kzalloc(sz, GFP_KERNEL);
1813 if (tb == NULL) 1967 if (tb == NULL)
1814 return NULL; 1968 return NULL;
1815 1969
1816 tb->tb_id = id; 1970 tb->tb_id = id;
1817 tb->tb_default = -1; 1971 tb->tb_default = -1;
1818 tb->tb_num_default = 0; 1972 tb->tb_num_default = 0;
1973 tb->tb_data = (alias ? alias->__data : tb->__data);
1974
1975 if (alias)
1976 return tb;
1819 1977
1820 t = (struct trie *) tb->tb_data; 1978 t = (struct trie *) tb->tb_data;
1821 t->kv[0].pos = KEYLENGTH; 1979 t->kv[0].pos = KEYLENGTH;