summaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 13:15:09 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 13:15:09 -0500
commit3051bf36c25d5153051704291782f8d44e744d36 (patch)
tree72dfc8a1d12675c6f2981d13102df954b678f11b /net/ipv6/route.c
parent1e74a2eb1f5cc7f2f2b5aa9c9eeecbcf352220a3 (diff)
parent005c3490e9db23738d91e02788606c0fe4734723 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Support TX_RING in AF_PACKET TPACKET_V3 mode, from Sowmini Varadhan. 2) Simplify classifier state on sk_buff in order to shrink it a bit. From Willem de Bruijn. 3) Introduce SIPHASH and it's usage for secure sequence numbers and syncookies. From Jason A. Donenfeld. 4) Reduce CPU usage for ICMP replies we are going to limit or suppress, from Jesper Dangaard Brouer. 5) Introduce Shared Memory Communications socket layer, from Ursula Braun. 6) Add RACK loss detection and allow it to actually trigger fast recovery instead of just assisting after other algorithms have triggered it. From Yuchung Cheng. 7) Add xmit_more and BQL support to mvneta driver, from Simon Guinot. 8) skb_cow_data avoidance in esp4 and esp6, from Steffen Klassert. 9) Export MPLS packet stats via netlink, from Robert Shearman. 10) Significantly improve inet port bind conflict handling, especially when an application is restarted and changes it's setting of reuseport. From Josef Bacik. 11) Implement TX batching in vhost_net, from Jason Wang. 12) Extend the dummy device so that VF (virtual function) features, such as configuration, can be more easily tested. From Phil Sutter. 13) Avoid two atomic ops per page on x86 in bnx2x driver, from Eric Dumazet. 14) Add new bpf MAP, implementing a longest prefix match trie. From Daniel Mack. 15) Packet sample offloading support in mlxsw driver, from Yotam Gigi. 16) Add new aquantia driver, from David VomLehn. 17) Add bpf tracepoints, from Daniel Borkmann. 18) Add support for port mirroring to b53 and bcm_sf2 drivers, from Florian Fainelli. 19) Remove custom busy polling in many drivers, it is done in the core networking since 4.5 times. From Eric Dumazet. 20) Support XDP adjust_head in virtio_net, from John Fastabend. 21) Fix several major holes in neighbour entry confirmation, from Julian Anastasov. 22) Add XDP support to bnxt_en driver, from Michael Chan. 23) VXLAN offloads for enic driver, from Govindarajulu Varadarajan. 24) Add IPVTAP driver (IP-VLAN based tap driver) from Sainath Grandhi. 25) Support GRO in IPSEC protocols, from Steffen Klassert" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1764 commits) Revert "ath10k: Search SMBIOS for OEM board file extension" net: socket: fix recvmmsg not returning error from sock_error bnxt_en: use eth_hw_addr_random() bpf: fix unlocking of jited image when module ronx not set arch: add ARCH_HAS_SET_MEMORY config net: napi_watchdog() can use napi_schedule_irqoff() tcp: Revert "tcp: tcp_probe: use spin_lock_bh()" net/hsr: use eth_hw_addr_random() net: mvpp2: enable building on 64-bit platforms net: mvpp2: switch to build_skb() in the RX path net: mvpp2: simplify MVPP2_PRS_RI_* definitions net: mvpp2: fix indentation of MVPP2_EXT_GLOBAL_CTRL_DEFAULT net: mvpp2: remove unused register definitions net: mvpp2: simplify mvpp2_bm_bufs_add() net: mvpp2: drop useless fields in mvpp2_bm_pool and related code net: mvpp2: remove unused 'tx_skb' field of 'struct mvpp2_tx_queue' net: mvpp2: release reference to txq_cpu[] entry after unmapping net: mvpp2: handle too large value in mvpp2_rx_time_coal_set() net: mvpp2: handle too large value handling in mvpp2_rx_pkts_coal_set() net: mvpp2: remove useless arguments in mvpp2_rx_{pkts, time}_coal_set ...
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c337
1 files changed, 266 insertions, 71 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7ea85370c11c..f54f4265b37f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -98,6 +98,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb); 98 struct sk_buff *skb);
99static void rt6_dst_from_metrics_check(struct rt6_info *rt); 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
100static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101static size_t rt6_nlmsg_size(struct rt6_info *rt);
102static int rt6_fill_node(struct net *net,
103 struct sk_buff *skb, struct rt6_info *rt,
104 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags);
101 107
102#ifdef CONFIG_IPV6_ROUTE_INFO 108#ifdef CONFIG_IPV6_ROUTE_INFO
103static struct rt6_info *rt6_add_route_info(struct net *net, 109static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -217,6 +223,21 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
217 return neigh_create(&nd_tbl, daddr, dst->dev); 223 return neigh_create(&nd_tbl, daddr, dst->dev);
218} 224}
219 225
226static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
227{
228 struct net_device *dev = dst->dev;
229 struct rt6_info *rt = (struct rt6_info *)dst;
230
231 daddr = choose_neigh_daddr(rt, NULL, daddr);
232 if (!daddr)
233 return;
234 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
235 return;
236 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
237 return;
238 __ipv6_confirm_neigh(dev, daddr);
239}
240
220static struct dst_ops ip6_dst_ops_template = { 241static struct dst_ops ip6_dst_ops_template = {
221 .family = AF_INET6, 242 .family = AF_INET6,
222 .gc = ip6_dst_gc, 243 .gc = ip6_dst_gc,
@@ -233,6 +254,7 @@ static struct dst_ops ip6_dst_ops_template = {
233 .redirect = rt6_do_redirect, 254 .redirect = rt6_do_redirect,
234 .local_out = __ip6_local_out, 255 .local_out = __ip6_local_out,
235 .neigh_lookup = ip6_neigh_lookup, 256 .neigh_lookup = ip6_neigh_lookup,
257 .confirm_neigh = ip6_confirm_neigh,
236}; 258};
237 259
238static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 260static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
@@ -1359,6 +1381,7 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1359static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 1381static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1360 const struct ipv6hdr *iph, u32 mtu) 1382 const struct ipv6hdr *iph, u32 mtu)
1361{ 1383{
1384 const struct in6_addr *daddr, *saddr;
1362 struct rt6_info *rt6 = (struct rt6_info *)dst; 1385 struct rt6_info *rt6 = (struct rt6_info *)dst;
1363 1386
1364 if (rt6->rt6i_flags & RTF_LOCAL) 1387 if (rt6->rt6i_flags & RTF_LOCAL)
@@ -1367,26 +1390,26 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1367 if (dst_metric_locked(dst, RTAX_MTU)) 1390 if (dst_metric_locked(dst, RTAX_MTU))
1368 return; 1391 return;
1369 1392
1370 dst_confirm(dst); 1393 if (iph) {
1394 daddr = &iph->daddr;
1395 saddr = &iph->saddr;
1396 } else if (sk) {
1397 daddr = &sk->sk_v6_daddr;
1398 saddr = &inet6_sk(sk)->saddr;
1399 } else {
1400 daddr = NULL;
1401 saddr = NULL;
1402 }
1403 dst_confirm_neigh(dst, daddr);
1371 mtu = max_t(u32, mtu, IPV6_MIN_MTU); 1404 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1372 if (mtu >= dst_mtu(dst)) 1405 if (mtu >= dst_mtu(dst))
1373 return; 1406 return;
1374 1407
1375 if (!rt6_cache_allowed_for_pmtu(rt6)) { 1408 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1376 rt6_do_update_pmtu(rt6, mtu); 1409 rt6_do_update_pmtu(rt6, mtu);
1377 } else { 1410 } else if (daddr) {
1378 const struct in6_addr *daddr, *saddr;
1379 struct rt6_info *nrt6; 1411 struct rt6_info *nrt6;
1380 1412
1381 if (iph) {
1382 daddr = &iph->daddr;
1383 saddr = &iph->saddr;
1384 } else if (sk) {
1385 daddr = &sk->sk_v6_daddr;
1386 saddr = &inet6_sk(sk)->saddr;
1387 } else {
1388 return;
1389 }
1390 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); 1413 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1391 if (nrt6) { 1414 if (nrt6) {
1392 rt6_do_update_pmtu(nrt6, mtu); 1415 rt6_do_update_pmtu(nrt6, mtu);
@@ -1897,7 +1920,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1897 if (cfg->fc_encap) { 1920 if (cfg->fc_encap) {
1898 struct lwtunnel_state *lwtstate; 1921 struct lwtunnel_state *lwtstate;
1899 1922
1900 err = lwtunnel_build_state(dev, cfg->fc_encap_type, 1923 err = lwtunnel_build_state(cfg->fc_encap_type,
1901 cfg->fc_encap, AF_INET6, cfg, 1924 cfg->fc_encap, AF_INET6, cfg,
1902 &lwtstate); 1925 &lwtstate);
1903 if (err) 1926 if (err)
@@ -2143,6 +2166,54 @@ int ip6_del_rt(struct rt6_info *rt)
2143 return __ip6_del_rt(rt, &info); 2166 return __ip6_del_rt(rt, &info);
2144} 2167}
2145 2168
2169static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2170{
2171 struct nl_info *info = &cfg->fc_nlinfo;
2172 struct sk_buff *skb = NULL;
2173 struct fib6_table *table;
2174 int err;
2175
2176 table = rt->rt6i_table;
2177 write_lock_bh(&table->tb6_lock);
2178
2179 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2180 struct rt6_info *sibling, *next_sibling;
2181
2182 /* prefer to send a single notification with all hops */
2183 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2184 if (skb) {
2185 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2186
2187 if (rt6_fill_node(info->nl_net, skb, rt,
2188 NULL, NULL, 0, RTM_DELROUTE,
2189 info->portid, seq, 0) < 0) {
2190 kfree_skb(skb);
2191 skb = NULL;
2192 } else
2193 info->skip_notify = 1;
2194 }
2195
2196 list_for_each_entry_safe(sibling, next_sibling,
2197 &rt->rt6i_siblings,
2198 rt6i_siblings) {
2199 err = fib6_del(sibling, info);
2200 if (err)
2201 goto out;
2202 }
2203 }
2204
2205 err = fib6_del(rt, info);
2206out:
2207 write_unlock_bh(&table->tb6_lock);
2208 ip6_rt_put(rt);
2209
2210 if (skb) {
2211 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV6_ROUTE,
2212 info->nlh, gfp_any());
2213 }
2214 return err;
2215}
2216
2146static int ip6_route_del(struct fib6_config *cfg) 2217static int ip6_route_del(struct fib6_config *cfg)
2147{ 2218{
2148 struct fib6_table *table; 2219 struct fib6_table *table;
@@ -2179,7 +2250,11 @@ static int ip6_route_del(struct fib6_config *cfg)
2179 dst_hold(&rt->dst); 2250 dst_hold(&rt->dst);
2180 read_unlock_bh(&table->tb6_lock); 2251 read_unlock_bh(&table->tb6_lock);
2181 2252
2182 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 2253 /* if gateway was specified only delete the one hop */
2254 if (cfg->fc_flags & RTF_GATEWAY)
2255 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2256
2257 return __ip6_del_rt_siblings(rt, cfg);
2183 } 2258 }
2184 } 2259 }
2185 read_unlock_bh(&table->tb6_lock); 2260 read_unlock_bh(&table->tb6_lock);
@@ -2258,7 +2333,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
2258 * Look, redirects are sent only in response to data packets, 2333 * Look, redirects are sent only in response to data packets,
2259 * so that this nexthop apparently is reachable. --ANK 2334 * so that this nexthop apparently is reachable. --ANK
2260 */ 2335 */
2261 dst_confirm(&rt->dst); 2336 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2262 2337
2263 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 2338 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2264 if (!neigh) 2339 if (!neigh)
@@ -2634,6 +2709,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2634 rt->dst.output = ip6_output; 2709 rt->dst.output = ip6_output;
2635 rt->rt6i_idev = idev; 2710 rt->rt6i_idev = idev;
2636 2711
2712 rt->rt6i_protocol = RTPROT_KERNEL;
2637 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2713 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2638 if (anycast) 2714 if (anycast)
2639 rt->rt6i_flags |= RTF_ANYCAST; 2715 rt->rt6i_flags |= RTF_ANYCAST;
@@ -2711,13 +2787,16 @@ struct arg_dev_net {
2711 struct net *net; 2787 struct net *net;
2712}; 2788};
2713 2789
2790/* called with write lock held for table with rt */
2714static int fib6_ifdown(struct rt6_info *rt, void *arg) 2791static int fib6_ifdown(struct rt6_info *rt, void *arg)
2715{ 2792{
2716 const struct arg_dev_net *adn = arg; 2793 const struct arg_dev_net *adn = arg;
2717 const struct net_device *dev = adn->dev; 2794 const struct net_device *dev = adn->dev;
2718 2795
2719 if ((rt->dst.dev == dev || !dev) && 2796 if ((rt->dst.dev == dev || !dev) &&
2720 rt != adn->net->ipv6.ip6_null_entry) 2797 rt != adn->net->ipv6.ip6_null_entry &&
2798 (rt->rt6i_nsiblings == 0 ||
2799 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2721 return -1; 2800 return -1;
2722 2801
2723 return 0; 2802 return 0;
@@ -2948,7 +3027,7 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2948 struct rt6_nh *nh; 3027 struct rt6_nh *nh;
2949 3028
2950 list_for_each_entry(nh, rt6_nh_list, next) { 3029 list_for_each_entry(nh, rt6_nh_list, next) {
2951 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n", 3030 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
2952 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, 3031 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2953 nh->r_cfg.fc_ifindex); 3032 nh->r_cfg.fc_ifindex);
2954 } 3033 }
@@ -2987,13 +3066,37 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list,
2987 return 0; 3066 return 0;
2988} 3067}
2989 3068
3069static void ip6_route_mpath_notify(struct rt6_info *rt,
3070 struct rt6_info *rt_last,
3071 struct nl_info *info,
3072 __u16 nlflags)
3073{
3074 /* if this is an APPEND route, then rt points to the first route
3075 * inserted and rt_last points to last route inserted. Userspace
3076 * wants a consistent dump of the route which starts at the first
3077 * nexthop. Since sibling routes are always added at the end of
3078 * the list, find the first sibling of the last route appended
3079 */
3080 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3081 rt = list_first_entry(&rt_last->rt6i_siblings,
3082 struct rt6_info,
3083 rt6i_siblings);
3084 }
3085
3086 if (rt)
3087 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3088}
3089
2990static int ip6_route_multipath_add(struct fib6_config *cfg) 3090static int ip6_route_multipath_add(struct fib6_config *cfg)
2991{ 3091{
3092 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3093 struct nl_info *info = &cfg->fc_nlinfo;
2992 struct fib6_config r_cfg; 3094 struct fib6_config r_cfg;
2993 struct rtnexthop *rtnh; 3095 struct rtnexthop *rtnh;
2994 struct rt6_info *rt; 3096 struct rt6_info *rt;
2995 struct rt6_nh *err_nh; 3097 struct rt6_nh *err_nh;
2996 struct rt6_nh *nh, *nh_safe; 3098 struct rt6_nh *nh, *nh_safe;
3099 __u16 nlflags;
2997 int remaining; 3100 int remaining;
2998 int attrlen; 3101 int attrlen;
2999 int err = 1; 3102 int err = 1;
@@ -3002,6 +3105,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
3002 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); 3105 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3003 LIST_HEAD(rt6_nh_list); 3106 LIST_HEAD(rt6_nh_list);
3004 3107
3108 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3109 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3110 nlflags |= NLM_F_APPEND;
3111
3005 remaining = cfg->fc_mp_len; 3112 remaining = cfg->fc_mp_len;
3006 rtnh = (struct rtnexthop *)cfg->fc_mp; 3113 rtnh = (struct rtnexthop *)cfg->fc_mp;
3007 3114
@@ -3044,9 +3151,20 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
3044 rtnh = rtnh_next(rtnh, &remaining); 3151 rtnh = rtnh_next(rtnh, &remaining);
3045 } 3152 }
3046 3153
3154 /* for add and replace send one notification with all nexthops.
3155 * Skip the notification in fib6_add_rt2node and send one with
3156 * the full route when done
3157 */
3158 info->skip_notify = 1;
3159
3047 err_nh = NULL; 3160 err_nh = NULL;
3048 list_for_each_entry(nh, &rt6_nh_list, next) { 3161 list_for_each_entry(nh, &rt6_nh_list, next) {
3049 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); 3162 rt_last = nh->rt6_info;
3163 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc);
3164 /* save reference to first route for notification */
3165 if (!rt_notif && !err)
3166 rt_notif = nh->rt6_info;
3167
3050 /* nh->rt6_info is used or freed at this point, reset to NULL*/ 3168 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3051 nh->rt6_info = NULL; 3169 nh->rt6_info = NULL;
3052 if (err) { 3170 if (err) {
@@ -3068,9 +3186,18 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
3068 nhn++; 3186 nhn++;
3069 } 3187 }
3070 3188
3189 /* success ... tell user about new route */
3190 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3071 goto cleanup; 3191 goto cleanup;
3072 3192
3073add_errout: 3193add_errout:
3194 /* send notification for routes that were added so that
3195 * the delete notifications sent by ip6_route_del are
3196 * coherent
3197 */
3198 if (rt_notif)
3199 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3200
3074 /* Delete routes that were already added */ 3201 /* Delete routes that were already added */
3075 list_for_each_entry(nh, &rt6_nh_list, next) { 3202 list_for_each_entry(nh, &rt6_nh_list, next) {
3076 if (err_nh == nh) 3203 if (err_nh == nh)
@@ -3138,8 +3265,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3138 3265
3139 if (cfg.fc_mp) 3266 if (cfg.fc_mp)
3140 return ip6_route_multipath_del(&cfg); 3267 return ip6_route_multipath_del(&cfg);
3141 else 3268 else {
3269 cfg.fc_delete_all_nh = 1;
3142 return ip6_route_del(&cfg); 3270 return ip6_route_del(&cfg);
3271 }
3143} 3272}
3144 3273
3145static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) 3274static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -3157,8 +3286,20 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3157 return ip6_route_add(&cfg); 3286 return ip6_route_add(&cfg);
3158} 3287}
3159 3288
3160static inline size_t rt6_nlmsg_size(struct rt6_info *rt) 3289static size_t rt6_nlmsg_size(struct rt6_info *rt)
3161{ 3290{
3291 int nexthop_len = 0;
3292
3293 if (rt->rt6i_nsiblings) {
3294 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3295 + NLA_ALIGN(sizeof(struct rtnexthop))
3296 + nla_total_size(16) /* RTA_GATEWAY */
3297 + nla_total_size(4) /* RTA_OIF */
3298 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3299
3300 nexthop_len *= rt->rt6i_nsiblings;
3301 }
3302
3162 return NLMSG_ALIGN(sizeof(struct rtmsg)) 3303 return NLMSG_ALIGN(sizeof(struct rtmsg))
3163 + nla_total_size(16) /* RTA_SRC */ 3304 + nla_total_size(16) /* RTA_SRC */
3164 + nla_total_size(16) /* RTA_DST */ 3305 + nla_total_size(16) /* RTA_DST */
@@ -3172,14 +3313,69 @@ static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3172 + nla_total_size(sizeof(struct rta_cacheinfo)) 3313 + nla_total_size(sizeof(struct rta_cacheinfo))
3173 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 3314 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3174 + nla_total_size(1) /* RTA_PREF */ 3315 + nla_total_size(1) /* RTA_PREF */
3175 + lwtunnel_get_encap_size(rt->dst.lwtstate); 3316 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3317 + nexthop_len;
3318}
3319
3320static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3321 unsigned int *flags)
3322{
3323 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3324 *flags |= RTNH_F_LINKDOWN;
3325 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3326 *flags |= RTNH_F_DEAD;
3327 }
3328
3329 if (rt->rt6i_flags & RTF_GATEWAY) {
3330 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3331 goto nla_put_failure;
3332 }
3333
3334 if (rt->dst.dev &&
3335 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3336 goto nla_put_failure;
3337
3338 if (rt->dst.lwtstate &&
3339 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3340 goto nla_put_failure;
3341
3342 return 0;
3343
3344nla_put_failure:
3345 return -EMSGSIZE;
3346}
3347
3348static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3349{
3350 struct rtnexthop *rtnh;
3351 unsigned int flags = 0;
3352
3353 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3354 if (!rtnh)
3355 goto nla_put_failure;
3356
3357 rtnh->rtnh_hops = 0;
3358 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3359
3360 if (rt6_nexthop_info(skb, rt, &flags) < 0)
3361 goto nla_put_failure;
3362
3363 rtnh->rtnh_flags = flags;
3364
3365 /* length of rtnetlink header + attributes */
3366 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3367
3368 return 0;
3369
3370nla_put_failure:
3371 return -EMSGSIZE;
3176} 3372}
3177 3373
3178static int rt6_fill_node(struct net *net, 3374static int rt6_fill_node(struct net *net,
3179 struct sk_buff *skb, struct rt6_info *rt, 3375 struct sk_buff *skb, struct rt6_info *rt,
3180 struct in6_addr *dst, struct in6_addr *src, 3376 struct in6_addr *dst, struct in6_addr *src,
3181 int iif, int type, u32 portid, u32 seq, 3377 int iif, int type, u32 portid, u32 seq,
3182 int prefix, int nowait, unsigned int flags) 3378 unsigned int flags)
3183{ 3379{
3184 u32 metrics[RTAX_MAX]; 3380 u32 metrics[RTAX_MAX];
3185 struct rtmsg *rtm; 3381 struct rtmsg *rtm;
@@ -3187,13 +3383,6 @@ static int rt6_fill_node(struct net *net,
3187 long expires; 3383 long expires;
3188 u32 table; 3384 u32 table;
3189 3385
3190 if (prefix) { /* user wants prefix routes only */
3191 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3192 /* success since this is not a prefix route */
3193 return 1;
3194 }
3195 }
3196
3197 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 3386 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3198 if (!nlh) 3387 if (!nlh)
3199 return -EMSGSIZE; 3388 return -EMSGSIZE;
@@ -3233,11 +3422,6 @@ static int rt6_fill_node(struct net *net,
3233 else 3422 else
3234 rtm->rtm_type = RTN_UNICAST; 3423 rtm->rtm_type = RTN_UNICAST;
3235 rtm->rtm_flags = 0; 3424 rtm->rtm_flags = 0;
3236 if (!netif_carrier_ok(rt->dst.dev)) {
3237 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3238 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3239 rtm->rtm_flags |= RTNH_F_DEAD;
3240 }
3241 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 3425 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3242 rtm->rtm_protocol = rt->rt6i_protocol; 3426 rtm->rtm_protocol = rt->rt6i_protocol;
3243 if (rt->rt6i_flags & RTF_DYNAMIC) 3427 if (rt->rt6i_flags & RTF_DYNAMIC)
@@ -3271,19 +3455,12 @@ static int rt6_fill_node(struct net *net,
3271 if (iif) { 3455 if (iif) {
3272#ifdef CONFIG_IPV6_MROUTE 3456#ifdef CONFIG_IPV6_MROUTE
3273 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 3457 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3274 int err = ip6mr_get_route(net, skb, rtm, nowait, 3458 int err = ip6mr_get_route(net, skb, rtm, portid);
3275 portid); 3459
3276 3460 if (err == 0)
3277 if (err <= 0) { 3461 return 0;
3278 if (!nowait) { 3462 if (err < 0)
3279 if (err == 0) 3463 goto nla_put_failure;
3280 return 0;
3281 goto nla_put_failure;
3282 } else {
3283 if (err == -EMSGSIZE)
3284 goto nla_put_failure;
3285 }
3286 }
3287 } else 3464 } else
3288#endif 3465#endif
3289 if (nla_put_u32(skb, RTA_IIF, iif)) 3466 if (nla_put_u32(skb, RTA_IIF, iif))
@@ -3308,17 +3485,35 @@ static int rt6_fill_node(struct net *net,
3308 if (rtnetlink_put_metrics(skb, metrics) < 0) 3485 if (rtnetlink_put_metrics(skb, metrics) < 0)
3309 goto nla_put_failure; 3486 goto nla_put_failure;
3310 3487
3311 if (rt->rt6i_flags & RTF_GATEWAY) {
3312 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3313 goto nla_put_failure;
3314 }
3315
3316 if (rt->dst.dev &&
3317 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3318 goto nla_put_failure;
3319 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 3488 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3320 goto nla_put_failure; 3489 goto nla_put_failure;
3321 3490
3491 /* For multipath routes, walk the siblings list and add
3492 * each as a nexthop within RTA_MULTIPATH.
3493 */
3494 if (rt->rt6i_nsiblings) {
3495 struct rt6_info *sibling, *next_sibling;
3496 struct nlattr *mp;
3497
3498 mp = nla_nest_start(skb, RTA_MULTIPATH);
3499 if (!mp)
3500 goto nla_put_failure;
3501
3502 if (rt6_add_nexthop(skb, rt) < 0)
3503 goto nla_put_failure;
3504
3505 list_for_each_entry_safe(sibling, next_sibling,
3506 &rt->rt6i_siblings, rt6i_siblings) {
3507 if (rt6_add_nexthop(skb, sibling) < 0)
3508 goto nla_put_failure;
3509 }
3510
3511 nla_nest_end(skb, mp);
3512 } else {
3513 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0)
3514 goto nla_put_failure;
3515 }
3516
3322 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 3517 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3323 3518
3324 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 3519 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
@@ -3327,8 +3522,6 @@ static int rt6_fill_node(struct net *net,
3327 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) 3522 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3328 goto nla_put_failure; 3523 goto nla_put_failure;
3329 3524
3330 if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3331 goto nla_put_failure;
3332 3525
3333 nlmsg_end(skb, nlh); 3526 nlmsg_end(skb, nlh);
3334 return 0; 3527 return 0;
@@ -3341,18 +3534,26 @@ nla_put_failure:
3341int rt6_dump_route(struct rt6_info *rt, void *p_arg) 3534int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3342{ 3535{
3343 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 3536 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3344 int prefix; 3537 struct net *net = arg->net;
3538
3539 if (rt == net->ipv6.ip6_null_entry)
3540 return 0;
3345 3541
3346 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 3542 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3347 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 3543 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3348 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3349 } else
3350 prefix = 0;
3351 3544
3352 return rt6_fill_node(arg->net, 3545 /* user wants prefix routes only */
3546 if (rtm->rtm_flags & RTM_F_PREFIX &&
3547 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3548 /* success since this is not a prefix route */
3549 return 1;
3550 }
3551 }
3552
3553 return rt6_fill_node(net,
3353 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 3554 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3354 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 3555 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3355 prefix, 0, NLM_F_MULTI); 3556 NLM_F_MULTI);
3356} 3557}
3357 3558
3358static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) 3559static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
@@ -3433,17 +3634,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3433 goto errout; 3634 goto errout;
3434 } 3635 }
3435 3636
3436 /* Reserve room for dummy headers, this skb can pass
3437 through good chunk of routing engine.
3438 */
3439 skb_reset_mac_header(skb);
3440 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3441
3442 skb_dst_set(skb, &rt->dst); 3637 skb_dst_set(skb, &rt->dst);
3443 3638
3444 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 3639 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3445 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 3640 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3446 nlh->nlmsg_seq, 0, 0, 0); 3641 nlh->nlmsg_seq, 0);
3447 if (err < 0) { 3642 if (err < 0) {
3448 kfree_skb(skb); 3643 kfree_skb(skb);
3449 goto errout; 3644 goto errout;
@@ -3470,7 +3665,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3470 goto errout; 3665 goto errout;
3471 3666
3472 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 3667 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3473 event, info->portid, seq, 0, 0, nlm_flags); 3668 event, info->portid, seq, nlm_flags);
3474 if (err < 0) { 3669 if (err < 0) {
3475 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 3670 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3476 WARN_ON(err == -EMSGSIZE); 3671 WARN_ON(err == -EMSGSIZE);