summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2019-06-21 11:45:23 -0400
committerDavid S. Miller <davem@davemloft.net>2019-06-24 13:18:48 -0400
commitee28906fd7a1437ca77a60a99b6b9c6d676220f8 (patch)
treeb35c574c607a2426a9b32591ddabc26d0c8812f7 /net/ipv4
parentd948974ccc6613b30636014f76700de3aad7e9b7 (diff)
ipv4: Dump route exceptions if requested
Since commit 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions."), cached exception routes are stored as a separate entity, so they are not dumped on a FIB dump, even if the RTM_F_CLONED flag is passed. This implies that the command 'ip route list cache' doesn't return any result anymore. If the RTM_F_CLONED is passed, and strict checking requested, retrieve nexthop exception routes and dump them. If no strict checking is requested, filtering can't be performed consistently: dump everything in that case. With this, we need to add an argument to the netlink callback in order to track how many entries were already dumped for the last leaf included in a partial netlink dump. A single additional argument is sufficient, even if we traverse logically nested structures (nexthop objects, hash table buckets, bucket chains): it doesn't matter if we stop in the middle of any of those, because they are always traversed the same way. As an example, s_i values in [], s_fa values in (): node (fa) #1 [1] nexthop #1 bucket #1 -> #0 in chain (1) bucket #2 -> #0 in chain (2) -> #1 in chain (3) -> #2 in chain (4) bucket #3 -> #0 in chain (5) -> #1 in chain (6) nexthop #2 bucket #1 -> #0 in chain (7) -> #1 in chain (8) bucket #2 -> #0 in chain (9) -- node (fa) #2 [2] nexthop #1 bucket #1 -> #0 in chain (1) -> #1 in chain (2) bucket #2 -> #0 in chain (3) it doesn't matter if we stop at (3), (4), (7) for "node #1", or at (2) for "node #2": walking flattens all that. It would even be possible to drop the distinction between the in-tree (s_i) and in-node (s_fa) counter, but a further improvement might advise against this. This is only as accurate as the existing tracking mechanism for leaves: if a partial dump is restarted after exceptions are removed or expired, we might skip some non-dumped entries. To improve this, we could attach a 'sernum' attribute (similar to the one used for IPv6) to nexthop entities, and bump this counter whenever exceptions change: having a distinction between the two counters would make this more convenient. Listing of exception routes (modified routes pre-3.5) was tested against these versions of kernel and iproute2: iproute2 kernel 4.14.0 4.15.0 4.19.0 5.0.0 5.1.0 3.5-rc4 + + + + + 4.4 4.9 4.14 4.15 4.19 5.0 5.1 fixed + + + + + v7: - Move loop over nexthop objects to route.c, and pass struct fib_info and table ID to it, not a struct fib_alias (suggested by David Ahern) - While at it, note that the NULL check on fa->fa_info is redundant, and the check on RTNH_F_DEAD is also not consistent with what's done with regular route listing: just keep it for nhc_flags - Rename entry point function for dumping exceptions to fib_dump_info_fnhe(), and rearrange arguments for consistency with fib_dump_info() - Rename fnhe_dump_buckets() to fnhe_dump_bucket() and make it handle one bucket at a time - Expand commit message to describe why we can have a single "skip" counter for all exceptions stored in bucket chains in nexthop objects (suggested by David Ahern) v6: - Rebased onto net-next - Loop over nexthop paths too. Move loop over fnhe buckets to route.c, avoids need to export rt_fill_info() and to touch exceptions from fib_trie.c. Pass NULL as flow to rt_fill_info(), it now allows that (suggested by David Ahern) Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.") Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Reviewed-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_trie.c44
-rw-r--r--net/ipv4/route.c73
2 files changed, 104 insertions, 13 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 90f0fc8c87bd..4400f5051977 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2090,22 +2090,26 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
2090{ 2090{
2091 unsigned int flags = NLM_F_MULTI; 2091 unsigned int flags = NLM_F_MULTI;
2092 __be32 xkey = htonl(l->key); 2092 __be32 xkey = htonl(l->key);
2093 int i, s_i, i_fa, s_fa, err;
2093 struct fib_alias *fa; 2094 struct fib_alias *fa;
2094 int i, s_i;
2095 2095
2096 if (filter->filter_set) 2096 if (filter->filter_set ||
2097 !filter->dump_exceptions || !filter->dump_routes)
2097 flags |= NLM_F_DUMP_FILTERED; 2098 flags |= NLM_F_DUMP_FILTERED;
2098 2099
2099 s_i = cb->args[4]; 2100 s_i = cb->args[4];
2101 s_fa = cb->args[5];
2100 i = 0; 2102 i = 0;
2101 2103
2102 /* rcu_read_lock is hold by caller */ 2104 /* rcu_read_lock is hold by caller */
2103 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { 2105 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2104 int err; 2106 struct fib_info *fi = fa->fa_info;
2105 2107
2106 if (i < s_i) 2108 if (i < s_i)
2107 goto next; 2109 goto next;
2108 2110
2111 i_fa = 0;
2112
2109 if (tb->tb_id != fa->tb_id) 2113 if (tb->tb_id != fa->tb_id)
2110 goto next; 2114 goto next;
2111 2115
@@ -2114,29 +2118,43 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
2114 goto next; 2118 goto next;
2115 2119
2116 if ((filter->protocol && 2120 if ((filter->protocol &&
2117 fa->fa_info->fib_protocol != filter->protocol)) 2121 fi->fib_protocol != filter->protocol))
2118 goto next; 2122 goto next;
2119 2123
2120 if (filter->dev && 2124 if (filter->dev &&
2121 !fib_info_nh_uses_dev(fa->fa_info, filter->dev)) 2125 !fib_info_nh_uses_dev(fi, filter->dev))
2122 goto next; 2126 goto next;
2123 } 2127 }
2124 2128
2125 err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, 2129 if (filter->dump_routes && !s_fa) {
2126 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2130 err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
2127 tb->tb_id, fa->fa_type, 2131 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2128 xkey, KEYLENGTH - fa->fa_slen, 2132 tb->tb_id, fa->fa_type,
2129 fa->fa_tos, fa->fa_info, flags); 2133 xkey, KEYLENGTH - fa->fa_slen,
2130 if (err < 0) { 2134 fa->fa_tos, fi, flags);
2131 cb->args[4] = i; 2135 if (err < 0)
2132 return err; 2136 goto stop;
2137 i_fa++;
2133 } 2138 }
2139
2140 if (filter->dump_exceptions) {
2141 err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
2142 &i_fa, s_fa);
2143 if (err < 0)
2144 goto stop;
2145 }
2146
2134next: 2147next:
2135 i++; 2148 i++;
2136 } 2149 }
2137 2150
2138 cb->args[4] = i; 2151 cb->args[4] = i;
2139 return skb->len; 2152 return skb->len;
2153
2154stop:
2155 cb->args[4] = i;
2156 cb->args[5] = i_fa;
2157 return err;
2140} 2158}
2141 2159
2142/* rcu_read_lock needs to be hold by caller from readside */ 2160/* rcu_read_lock needs to be hold by caller from readside */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b1628d25e828..6aee412a68bd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2812,6 +2812,79 @@ nla_put_failure:
2812 return -EMSGSIZE; 2812 return -EMSGSIZE;
2813} 2813}
2814 2814
2815static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
2816 struct netlink_callback *cb, u32 table_id,
2817 struct fnhe_hash_bucket *bucket, int genid,
2818 int *fa_index, int fa_start)
2819{
2820 int i;
2821
2822 for (i = 0; i < FNHE_HASH_SIZE; i++) {
2823 struct fib_nh_exception *fnhe;
2824
2825 for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
2826 fnhe = rcu_dereference(fnhe->fnhe_next)) {
2827 struct rtable *rt;
2828 int err;
2829
2830 if (*fa_index < fa_start)
2831 goto next;
2832
2833 if (fnhe->fnhe_genid != genid)
2834 goto next;
2835
2836 if (fnhe->fnhe_expires &&
2837 time_after(jiffies, fnhe->fnhe_expires))
2838 goto next;
2839
2840 rt = rcu_dereference(fnhe->fnhe_rth_input);
2841 if (!rt)
2842 rt = rcu_dereference(fnhe->fnhe_rth_output);
2843 if (!rt)
2844 goto next;
2845
2846 err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
2847 table_id, NULL, skb,
2848 NETLINK_CB(cb->skb).portid,
2849 cb->nlh->nlmsg_seq);
2850 if (err)
2851 return err;
2852next:
2853 (*fa_index)++;
2854 }
2855 }
2856
2857 return 0;
2858}
2859
2860int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
2861 u32 table_id, struct fib_info *fi,
2862 int *fa_index, int fa_start)
2863{
2864 struct net *net = sock_net(cb->skb->sk);
2865 int nhsel, genid = fnhe_genid(net);
2866
2867 for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
2868 struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
2869 struct fnhe_hash_bucket *bucket;
2870 int err;
2871
2872 if (nhc->nhc_flags & RTNH_F_DEAD)
2873 continue;
2874
2875 bucket = rcu_dereference(nhc->nhc_exceptions);
2876 if (!bucket)
2877 continue;
2878
2879 err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, genid,
2880 fa_index, fa_start);
2881 if (err)
2882 return err;
2883 }
2884
2885 return 0;
2886}
2887
2815static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, 2888static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
2816 u8 ip_proto, __be16 sport, 2889 u8 ip_proto, __be16 sport,
2817 __be16 dport) 2890 __be16 dport)