diff options
Diffstat (limited to 'net/ipv4/ipip.c')
-rw-r--r-- | net/ipv4/ipip.c | 748 |
1 files changed, 59 insertions, 689 deletions
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 8f024d41eefa..77bfcce64fe5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -111,227 +111,21 @@ | |||
111 | #include <net/sock.h> | 111 | #include <net/sock.h> |
112 | #include <net/ip.h> | 112 | #include <net/ip.h> |
113 | #include <net/icmp.h> | 113 | #include <net/icmp.h> |
114 | #include <net/ipip.h> | 114 | #include <net/ip_tunnels.h> |
115 | #include <net/inet_ecn.h> | 115 | #include <net/inet_ecn.h> |
116 | #include <net/xfrm.h> | 116 | #include <net/xfrm.h> |
117 | #include <net/net_namespace.h> | 117 | #include <net/net_namespace.h> |
118 | #include <net/netns/generic.h> | 118 | #include <net/netns/generic.h> |
119 | 119 | ||
120 | #define HASH_SIZE 16 | ||
121 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) | ||
122 | |||
123 | static bool log_ecn_error = true; | 120 | static bool log_ecn_error = true; |
124 | module_param(log_ecn_error, bool, 0644); | 121 | module_param(log_ecn_error, bool, 0644); |
125 | MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); | 122 | MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); |
126 | 123 | ||
127 | static int ipip_net_id __read_mostly; | 124 | static int ipip_net_id __read_mostly; |
128 | struct ipip_net { | ||
129 | struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; | ||
130 | struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; | ||
131 | struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; | ||
132 | struct ip_tunnel __rcu *tunnels_wc[1]; | ||
133 | struct ip_tunnel __rcu **tunnels[4]; | ||
134 | |||
135 | struct net_device *fb_tunnel_dev; | ||
136 | }; | ||
137 | 125 | ||
138 | static int ipip_tunnel_init(struct net_device *dev); | 126 | static int ipip_tunnel_init(struct net_device *dev); |
139 | static void ipip_tunnel_setup(struct net_device *dev); | ||
140 | static void ipip_dev_free(struct net_device *dev); | ||
141 | static struct rtnl_link_ops ipip_link_ops __read_mostly; | 127 | static struct rtnl_link_ops ipip_link_ops __read_mostly; |
142 | 128 | ||
143 | static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, | ||
144 | struct rtnl_link_stats64 *tot) | ||
145 | { | ||
146 | int i; | ||
147 | |||
148 | for_each_possible_cpu(i) { | ||
149 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); | ||
150 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | ||
151 | unsigned int start; | ||
152 | |||
153 | do { | ||
154 | start = u64_stats_fetch_begin_bh(&tstats->syncp); | ||
155 | rx_packets = tstats->rx_packets; | ||
156 | tx_packets = tstats->tx_packets; | ||
157 | rx_bytes = tstats->rx_bytes; | ||
158 | tx_bytes = tstats->tx_bytes; | ||
159 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
160 | |||
161 | tot->rx_packets += rx_packets; | ||
162 | tot->tx_packets += tx_packets; | ||
163 | tot->rx_bytes += rx_bytes; | ||
164 | tot->tx_bytes += tx_bytes; | ||
165 | } | ||
166 | |||
167 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; | ||
168 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; | ||
169 | tot->tx_dropped = dev->stats.tx_dropped; | ||
170 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; | ||
171 | tot->tx_errors = dev->stats.tx_errors; | ||
172 | tot->collisions = dev->stats.collisions; | ||
173 | |||
174 | return tot; | ||
175 | } | ||
176 | |||
177 | static struct ip_tunnel *ipip_tunnel_lookup(struct net *net, | ||
178 | __be32 remote, __be32 local) | ||
179 | { | ||
180 | unsigned int h0 = HASH(remote); | ||
181 | unsigned int h1 = HASH(local); | ||
182 | struct ip_tunnel *t; | ||
183 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
184 | |||
185 | for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1]) | ||
186 | if (local == t->parms.iph.saddr && | ||
187 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | ||
188 | return t; | ||
189 | |||
190 | for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0]) | ||
191 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | ||
192 | return t; | ||
193 | |||
194 | for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1]) | ||
195 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) | ||
196 | return t; | ||
197 | |||
198 | t = rcu_dereference(ipn->tunnels_wc[0]); | ||
199 | if (t && (t->dev->flags&IFF_UP)) | ||
200 | return t; | ||
201 | return NULL; | ||
202 | } | ||
203 | |||
204 | static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn, | ||
205 | struct ip_tunnel_parm *parms) | ||
206 | { | ||
207 | __be32 remote = parms->iph.daddr; | ||
208 | __be32 local = parms->iph.saddr; | ||
209 | unsigned int h = 0; | ||
210 | int prio = 0; | ||
211 | |||
212 | if (remote) { | ||
213 | prio |= 2; | ||
214 | h ^= HASH(remote); | ||
215 | } | ||
216 | if (local) { | ||
217 | prio |= 1; | ||
218 | h ^= HASH(local); | ||
219 | } | ||
220 | return &ipn->tunnels[prio][h]; | ||
221 | } | ||
222 | |||
223 | static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn, | ||
224 | struct ip_tunnel *t) | ||
225 | { | ||
226 | return __ipip_bucket(ipn, &t->parms); | ||
227 | } | ||
228 | |||
229 | static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) | ||
230 | { | ||
231 | struct ip_tunnel __rcu **tp; | ||
232 | struct ip_tunnel *iter; | ||
233 | |||
234 | for (tp = ipip_bucket(ipn, t); | ||
235 | (iter = rtnl_dereference(*tp)) != NULL; | ||
236 | tp = &iter->next) { | ||
237 | if (t == iter) { | ||
238 | rcu_assign_pointer(*tp, t->next); | ||
239 | break; | ||
240 | } | ||
241 | } | ||
242 | } | ||
243 | |||
244 | static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) | ||
245 | { | ||
246 | struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); | ||
247 | |||
248 | rcu_assign_pointer(t->next, rtnl_dereference(*tp)); | ||
249 | rcu_assign_pointer(*tp, t); | ||
250 | } | ||
251 | |||
252 | static int ipip_tunnel_create(struct net_device *dev) | ||
253 | { | ||
254 | struct ip_tunnel *t = netdev_priv(dev); | ||
255 | struct net *net = dev_net(dev); | ||
256 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
257 | int err; | ||
258 | |||
259 | err = ipip_tunnel_init(dev); | ||
260 | if (err < 0) | ||
261 | goto out; | ||
262 | |||
263 | err = register_netdevice(dev); | ||
264 | if (err < 0) | ||
265 | goto out; | ||
266 | |||
267 | strcpy(t->parms.name, dev->name); | ||
268 | dev->rtnl_link_ops = &ipip_link_ops; | ||
269 | |||
270 | dev_hold(dev); | ||
271 | ipip_tunnel_link(ipn, t); | ||
272 | return 0; | ||
273 | |||
274 | out: | ||
275 | return err; | ||
276 | } | ||
277 | |||
278 | static struct ip_tunnel *ipip_tunnel_locate(struct net *net, | ||
279 | struct ip_tunnel_parm *parms, int create) | ||
280 | { | ||
281 | __be32 remote = parms->iph.daddr; | ||
282 | __be32 local = parms->iph.saddr; | ||
283 | struct ip_tunnel *t, *nt; | ||
284 | struct ip_tunnel __rcu **tp; | ||
285 | struct net_device *dev; | ||
286 | char name[IFNAMSIZ]; | ||
287 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
288 | |||
289 | for (tp = __ipip_bucket(ipn, parms); | ||
290 | (t = rtnl_dereference(*tp)) != NULL; | ||
291 | tp = &t->next) { | ||
292 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) | ||
293 | return t; | ||
294 | } | ||
295 | if (!create) | ||
296 | return NULL; | ||
297 | |||
298 | if (parms->name[0]) | ||
299 | strlcpy(name, parms->name, IFNAMSIZ); | ||
300 | else | ||
301 | strcpy(name, "tunl%d"); | ||
302 | |||
303 | dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); | ||
304 | if (dev == NULL) | ||
305 | return NULL; | ||
306 | |||
307 | dev_net_set(dev, net); | ||
308 | |||
309 | nt = netdev_priv(dev); | ||
310 | nt->parms = *parms; | ||
311 | |||
312 | if (ipip_tunnel_create(dev) < 0) | ||
313 | goto failed_free; | ||
314 | |||
315 | return nt; | ||
316 | |||
317 | failed_free: | ||
318 | ipip_dev_free(dev); | ||
319 | return NULL; | ||
320 | } | ||
321 | |||
322 | /* called with RTNL */ | ||
323 | static void ipip_tunnel_uninit(struct net_device *dev) | ||
324 | { | ||
325 | struct net *net = dev_net(dev); | ||
326 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
327 | |||
328 | if (dev == ipn->fb_tunnel_dev) | ||
329 | RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL); | ||
330 | else | ||
331 | ipip_tunnel_unlink(ipn, netdev_priv(dev)); | ||
332 | dev_put(dev); | ||
333 | } | ||
334 | |||
335 | static int ipip_err(struct sk_buff *skb, u32 info) | 129 | static int ipip_err(struct sk_buff *skb, u32 info) |
336 | { | 130 | { |
337 | 131 | ||
@@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
339 | 8 bytes of packet payload. It means, that precise relaying of | 133 | 8 bytes of packet payload. It means, that precise relaying of |
340 | ICMP in the real Internet is absolutely infeasible. | 134 | ICMP in the real Internet is absolutely infeasible. |
341 | */ | 135 | */ |
136 | struct net *net = dev_net(skb->dev); | ||
137 | struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); | ||
342 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 138 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
343 | const int type = icmp_hdr(skb)->type; | ||
344 | const int code = icmp_hdr(skb)->code; | ||
345 | struct ip_tunnel *t; | 139 | struct ip_tunnel *t; |
346 | int err; | 140 | int err; |
347 | 141 | const int type = icmp_hdr(skb)->type; | |
348 | switch (type) { | 142 | const int code = icmp_hdr(skb)->code; |
349 | default: | ||
350 | case ICMP_PARAMETERPROB: | ||
351 | return 0; | ||
352 | |||
353 | case ICMP_DEST_UNREACH: | ||
354 | switch (code) { | ||
355 | case ICMP_SR_FAILED: | ||
356 | case ICMP_PORT_UNREACH: | ||
357 | /* Impossible event. */ | ||
358 | return 0; | ||
359 | default: | ||
360 | /* All others are translated to HOST_UNREACH. | ||
361 | rfc2003 contains "deep thoughts" about NET_UNREACH, | ||
362 | I believe they are just ether pollution. --ANK | ||
363 | */ | ||
364 | break; | ||
365 | } | ||
366 | break; | ||
367 | case ICMP_TIME_EXCEEDED: | ||
368 | if (code != ICMP_EXC_TTL) | ||
369 | return 0; | ||
370 | break; | ||
371 | case ICMP_REDIRECT: | ||
372 | break; | ||
373 | } | ||
374 | 143 | ||
375 | err = -ENOENT; | 144 | err = -ENOENT; |
376 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); | 145 | t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, |
146 | iph->daddr, iph->saddr, 0); | ||
377 | if (t == NULL) | 147 | if (t == NULL) |
378 | goto out; | 148 | goto out; |
379 | 149 | ||
@@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
403 | else | 173 | else |
404 | t->err_count = 1; | 174 | t->err_count = 1; |
405 | t->err_time = jiffies; | 175 | t->err_time = jiffies; |
406 | out: | ||
407 | 176 | ||
177 | out: | ||
408 | return err; | 178 | return err; |
409 | } | 179 | } |
410 | 180 | ||
181 | static const struct tnl_ptk_info tpi = { | ||
182 | /* no tunnel info required for ipip. */ | ||
183 | .proto = htons(ETH_P_IP), | ||
184 | }; | ||
185 | |||
411 | static int ipip_rcv(struct sk_buff *skb) | 186 | static int ipip_rcv(struct sk_buff *skb) |
412 | { | 187 | { |
188 | struct net *net = dev_net(skb->dev); | ||
189 | struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); | ||
413 | struct ip_tunnel *tunnel; | 190 | struct ip_tunnel *tunnel; |
414 | const struct iphdr *iph = ip_hdr(skb); | 191 | const struct iphdr *iph = ip_hdr(skb); |
415 | int err; | ||
416 | |||
417 | tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); | ||
418 | if (tunnel != NULL) { | ||
419 | struct pcpu_tstats *tstats; | ||
420 | 192 | ||
193 | tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, | ||
194 | iph->saddr, iph->daddr, 0); | ||
195 | if (tunnel) { | ||
421 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | 196 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) |
422 | goto drop; | 197 | goto drop; |
423 | 198 | return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); | |
424 | secpath_reset(skb); | ||
425 | |||
426 | skb->mac_header = skb->network_header; | ||
427 | skb_reset_network_header(skb); | ||
428 | skb->protocol = htons(ETH_P_IP); | ||
429 | skb->pkt_type = PACKET_HOST; | ||
430 | |||
431 | __skb_tunnel_rx(skb, tunnel->dev); | ||
432 | |||
433 | err = IP_ECN_decapsulate(iph, skb); | ||
434 | if (unlikely(err)) { | ||
435 | if (log_ecn_error) | ||
436 | net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", | ||
437 | &iph->saddr, iph->tos); | ||
438 | if (err > 1) { | ||
439 | ++tunnel->dev->stats.rx_frame_errors; | ||
440 | ++tunnel->dev->stats.rx_errors; | ||
441 | goto drop; | ||
442 | } | ||
443 | } | ||
444 | |||
445 | tstats = this_cpu_ptr(tunnel->dev->tstats); | ||
446 | u64_stats_update_begin(&tstats->syncp); | ||
447 | tstats->rx_packets++; | ||
448 | tstats->rx_bytes += skb->len; | ||
449 | u64_stats_update_end(&tstats->syncp); | ||
450 | |||
451 | netif_rx(skb); | ||
452 | return 0; | ||
453 | } | 199 | } |
454 | 200 | ||
455 | return -1; | 201 | return -1; |
@@ -463,329 +209,64 @@ drop: | |||
463 | * This function assumes it is being called from dev_queue_xmit() | 209 | * This function assumes it is being called from dev_queue_xmit() |
464 | * and that skb is filled properly by that function. | 210 | * and that skb is filled properly by that function. |
465 | */ | 211 | */ |
466 | |||
467 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 212 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
468 | { | 213 | { |
469 | struct ip_tunnel *tunnel = netdev_priv(dev); | 214 | struct ip_tunnel *tunnel = netdev_priv(dev); |
470 | const struct iphdr *tiph = &tunnel->parms.iph; | 215 | const struct iphdr *tiph = &tunnel->parms.iph; |
471 | u8 tos = tunnel->parms.iph.tos; | ||
472 | __be16 df = tiph->frag_off; | ||
473 | struct rtable *rt; /* Route to the other host */ | ||
474 | struct net_device *tdev; /* Device to other host */ | ||
475 | const struct iphdr *old_iph; | ||
476 | struct iphdr *iph; /* Our new IP header */ | ||
477 | unsigned int max_headroom; /* The extra header space needed */ | ||
478 | __be32 dst = tiph->daddr; | ||
479 | struct flowi4 fl4; | ||
480 | int mtu; | ||
481 | |||
482 | if (skb->protocol != htons(ETH_P_IP)) | ||
483 | goto tx_error; | ||
484 | 216 | ||
485 | if (skb->ip_summed == CHECKSUM_PARTIAL && | 217 | if (unlikely(skb->protocol != htons(ETH_P_IP))) |
486 | skb_checksum_help(skb)) | ||
487 | goto tx_error; | 218 | goto tx_error; |
488 | 219 | ||
489 | old_iph = ip_hdr(skb); | 220 | if (likely(!skb->encapsulation)) { |
490 | 221 | skb_reset_inner_headers(skb); | |
491 | if (tos & 1) | 222 | skb->encapsulation = 1; |
492 | tos = old_iph->tos; | ||
493 | |||
494 | if (!dst) { | ||
495 | /* NBMA tunnel */ | ||
496 | if ((rt = skb_rtable(skb)) == NULL) { | ||
497 | dev->stats.tx_fifo_errors++; | ||
498 | goto tx_error; | ||
499 | } | ||
500 | dst = rt_nexthop(rt, old_iph->daddr); | ||
501 | } | 223 | } |
502 | 224 | ||
503 | rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, | 225 | ip_tunnel_xmit(skb, dev, tiph); |
504 | dst, tiph->saddr, | ||
505 | 0, 0, | ||
506 | IPPROTO_IPIP, RT_TOS(tos), | ||
507 | tunnel->parms.link); | ||
508 | if (IS_ERR(rt)) { | ||
509 | dev->stats.tx_carrier_errors++; | ||
510 | goto tx_error_icmp; | ||
511 | } | ||
512 | tdev = rt->dst.dev; | ||
513 | |||
514 | if (tdev == dev) { | ||
515 | ip_rt_put(rt); | ||
516 | dev->stats.collisions++; | ||
517 | goto tx_error; | ||
518 | } | ||
519 | |||
520 | df |= old_iph->frag_off & htons(IP_DF); | ||
521 | |||
522 | if (df) { | ||
523 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); | ||
524 | |||
525 | if (mtu < 68) { | ||
526 | dev->stats.collisions++; | ||
527 | ip_rt_put(rt); | ||
528 | goto tx_error; | ||
529 | } | ||
530 | |||
531 | if (skb_dst(skb)) | ||
532 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); | ||
533 | |||
534 | if ((old_iph->frag_off & htons(IP_DF)) && | ||
535 | mtu < ntohs(old_iph->tot_len)) { | ||
536 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | ||
537 | htonl(mtu)); | ||
538 | ip_rt_put(rt); | ||
539 | goto tx_error; | ||
540 | } | ||
541 | } | ||
542 | |||
543 | if (tunnel->err_count > 0) { | ||
544 | if (time_before(jiffies, | ||
545 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | ||
546 | tunnel->err_count--; | ||
547 | dst_link_failure(skb); | ||
548 | } else | ||
549 | tunnel->err_count = 0; | ||
550 | } | ||
551 | |||
552 | /* | ||
553 | * Okay, now see if we can stuff it in the buffer as-is. | ||
554 | */ | ||
555 | max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); | ||
556 | |||
557 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || | ||
558 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | ||
559 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | ||
560 | if (!new_skb) { | ||
561 | ip_rt_put(rt); | ||
562 | dev->stats.tx_dropped++; | ||
563 | dev_kfree_skb(skb); | ||
564 | return NETDEV_TX_OK; | ||
565 | } | ||
566 | if (skb->sk) | ||
567 | skb_set_owner_w(new_skb, skb->sk); | ||
568 | dev_kfree_skb(skb); | ||
569 | skb = new_skb; | ||
570 | old_iph = ip_hdr(skb); | ||
571 | } | ||
572 | |||
573 | skb->transport_header = skb->network_header; | ||
574 | skb_push(skb, sizeof(struct iphdr)); | ||
575 | skb_reset_network_header(skb); | ||
576 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | ||
577 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | ||
578 | IPSKB_REROUTED); | ||
579 | skb_dst_drop(skb); | ||
580 | skb_dst_set(skb, &rt->dst); | ||
581 | |||
582 | /* | ||
583 | * Push down and install the IPIP header. | ||
584 | */ | ||
585 | |||
586 | iph = ip_hdr(skb); | ||
587 | iph->version = 4; | ||
588 | iph->ihl = sizeof(struct iphdr)>>2; | ||
589 | iph->frag_off = df; | ||
590 | iph->protocol = IPPROTO_IPIP; | ||
591 | iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); | ||
592 | iph->daddr = fl4.daddr; | ||
593 | iph->saddr = fl4.saddr; | ||
594 | |||
595 | if ((iph->ttl = tiph->ttl) == 0) | ||
596 | iph->ttl = old_iph->ttl; | ||
597 | |||
598 | iptunnel_xmit(skb, dev); | ||
599 | return NETDEV_TX_OK; | 226 | return NETDEV_TX_OK; |
600 | 227 | ||
601 | tx_error_icmp: | ||
602 | dst_link_failure(skb); | ||
603 | tx_error: | 228 | tx_error: |
604 | dev->stats.tx_errors++; | 229 | dev->stats.tx_errors++; |
605 | dev_kfree_skb(skb); | 230 | dev_kfree_skb(skb); |
606 | return NETDEV_TX_OK; | 231 | return NETDEV_TX_OK; |
607 | } | 232 | } |
608 | 233 | ||
609 | static void ipip_tunnel_bind_dev(struct net_device *dev) | ||
610 | { | ||
611 | struct net_device *tdev = NULL; | ||
612 | struct ip_tunnel *tunnel; | ||
613 | const struct iphdr *iph; | ||
614 | |||
615 | tunnel = netdev_priv(dev); | ||
616 | iph = &tunnel->parms.iph; | ||
617 | |||
618 | if (iph->daddr) { | ||
619 | struct rtable *rt; | ||
620 | struct flowi4 fl4; | ||
621 | |||
622 | rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, | ||
623 | iph->daddr, iph->saddr, | ||
624 | 0, 0, | ||
625 | IPPROTO_IPIP, | ||
626 | RT_TOS(iph->tos), | ||
627 | tunnel->parms.link); | ||
628 | if (!IS_ERR(rt)) { | ||
629 | tdev = rt->dst.dev; | ||
630 | ip_rt_put(rt); | ||
631 | } | ||
632 | dev->flags |= IFF_POINTOPOINT; | ||
633 | } | ||
634 | |||
635 | if (!tdev && tunnel->parms.link) | ||
636 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); | ||
637 | |||
638 | if (tdev) { | ||
639 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); | ||
640 | dev->mtu = tdev->mtu - sizeof(struct iphdr); | ||
641 | } | ||
642 | dev->iflink = tunnel->parms.link; | ||
643 | } | ||
644 | |||
645 | static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) | ||
646 | { | ||
647 | struct net *net = dev_net(t->dev); | ||
648 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
649 | |||
650 | ipip_tunnel_unlink(ipn, t); | ||
651 | synchronize_net(); | ||
652 | t->parms.iph.saddr = p->iph.saddr; | ||
653 | t->parms.iph.daddr = p->iph.daddr; | ||
654 | memcpy(t->dev->dev_addr, &p->iph.saddr, 4); | ||
655 | memcpy(t->dev->broadcast, &p->iph.daddr, 4); | ||
656 | ipip_tunnel_link(ipn, t); | ||
657 | t->parms.iph.ttl = p->iph.ttl; | ||
658 | t->parms.iph.tos = p->iph.tos; | ||
659 | t->parms.iph.frag_off = p->iph.frag_off; | ||
660 | if (t->parms.link != p->link) { | ||
661 | t->parms.link = p->link; | ||
662 | ipip_tunnel_bind_dev(t->dev); | ||
663 | } | ||
664 | netdev_state_change(t->dev); | ||
665 | } | ||
666 | |||
667 | static int | 234 | static int |
668 | ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | 235 | ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) |
669 | { | 236 | { |
670 | int err = 0; | 237 | int err = 0; |
671 | struct ip_tunnel_parm p; | 238 | struct ip_tunnel_parm p; |
672 | struct ip_tunnel *t; | ||
673 | struct net *net = dev_net(dev); | ||
674 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
675 | |||
676 | switch (cmd) { | ||
677 | case SIOCGETTUNNEL: | ||
678 | t = NULL; | ||
679 | if (dev == ipn->fb_tunnel_dev) { | ||
680 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | ||
681 | err = -EFAULT; | ||
682 | break; | ||
683 | } | ||
684 | t = ipip_tunnel_locate(net, &p, 0); | ||
685 | } | ||
686 | if (t == NULL) | ||
687 | t = netdev_priv(dev); | ||
688 | memcpy(&p, &t->parms, sizeof(p)); | ||
689 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | ||
690 | err = -EFAULT; | ||
691 | break; | ||
692 | |||
693 | case SIOCADDTUNNEL: | ||
694 | case SIOCCHGTUNNEL: | ||
695 | err = -EPERM; | ||
696 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | ||
697 | goto done; | ||
698 | |||
699 | err = -EFAULT; | ||
700 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | ||
701 | goto done; | ||
702 | |||
703 | err = -EINVAL; | ||
704 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || | ||
705 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) | ||
706 | goto done; | ||
707 | if (p.iph.ttl) | ||
708 | p.iph.frag_off |= htons(IP_DF); | ||
709 | |||
710 | t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); | ||
711 | |||
712 | if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | ||
713 | if (t != NULL) { | ||
714 | if (t->dev != dev) { | ||
715 | err = -EEXIST; | ||
716 | break; | ||
717 | } | ||
718 | } else { | ||
719 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || | ||
720 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { | ||
721 | err = -EINVAL; | ||
722 | break; | ||
723 | } | ||
724 | t = netdev_priv(dev); | ||
725 | } | ||
726 | |||
727 | ipip_tunnel_update(t, &p); | ||
728 | } | ||
729 | |||
730 | if (t) { | ||
731 | err = 0; | ||
732 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) | ||
733 | err = -EFAULT; | ||
734 | } else | ||
735 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | ||
736 | break; | ||
737 | |||
738 | case SIOCDELTUNNEL: | ||
739 | err = -EPERM; | ||
740 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | ||
741 | goto done; | ||
742 | |||
743 | if (dev == ipn->fb_tunnel_dev) { | ||
744 | err = -EFAULT; | ||
745 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | ||
746 | goto done; | ||
747 | err = -ENOENT; | ||
748 | if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) | ||
749 | goto done; | ||
750 | err = -EPERM; | ||
751 | if (t->dev == ipn->fb_tunnel_dev) | ||
752 | goto done; | ||
753 | dev = t->dev; | ||
754 | } | ||
755 | unregister_netdevice(dev); | ||
756 | err = 0; | ||
757 | break; | ||
758 | 239 | ||
759 | default: | 240 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
760 | err = -EINVAL; | 241 | return -EFAULT; |
761 | } | ||
762 | |||
763 | done: | ||
764 | return err; | ||
765 | } | ||
766 | 242 | ||
767 | static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) | 243 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || |
768 | { | 244 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) |
769 | if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) | 245 | return -EINVAL; |
246 | if (p.i_key || p.o_key || p.i_flags || p.o_flags) | ||
770 | return -EINVAL; | 247 | return -EINVAL; |
771 | dev->mtu = new_mtu; | 248 | if (p.iph.ttl) |
249 | p.iph.frag_off |= htons(IP_DF); | ||
250 | |||
251 | err = ip_tunnel_ioctl(dev, &p, cmd); | ||
252 | if (err) | ||
253 | return err; | ||
254 | |||
255 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | ||
256 | return -EFAULT; | ||
257 | |||
772 | return 0; | 258 | return 0; |
773 | } | 259 | } |
774 | 260 | ||
775 | static const struct net_device_ops ipip_netdev_ops = { | 261 | static const struct net_device_ops ipip_netdev_ops = { |
776 | .ndo_uninit = ipip_tunnel_uninit, | 262 | .ndo_init = ipip_tunnel_init, |
263 | .ndo_uninit = ip_tunnel_uninit, | ||
777 | .ndo_start_xmit = ipip_tunnel_xmit, | 264 | .ndo_start_xmit = ipip_tunnel_xmit, |
778 | .ndo_do_ioctl = ipip_tunnel_ioctl, | 265 | .ndo_do_ioctl = ipip_tunnel_ioctl, |
779 | .ndo_change_mtu = ipip_tunnel_change_mtu, | 266 | .ndo_change_mtu = ip_tunnel_change_mtu, |
780 | .ndo_get_stats64 = ipip_get_stats64, | 267 | .ndo_get_stats64 = ip_tunnel_get_stats64, |
781 | }; | 268 | }; |
782 | 269 | ||
783 | static void ipip_dev_free(struct net_device *dev) | ||
784 | { | ||
785 | free_percpu(dev->tstats); | ||
786 | free_netdev(dev); | ||
787 | } | ||
788 | |||
789 | #define IPIP_FEATURES (NETIF_F_SG | \ | 270 | #define IPIP_FEATURES (NETIF_F_SG | \ |
790 | NETIF_F_FRAGLIST | \ | 271 | NETIF_F_FRAGLIST | \ |
791 | NETIF_F_HIGHDMA | \ | 272 | NETIF_F_HIGHDMA | \ |
@@ -794,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev) | |||
794 | static void ipip_tunnel_setup(struct net_device *dev) | 275 | static void ipip_tunnel_setup(struct net_device *dev) |
795 | { | 276 | { |
796 | dev->netdev_ops = &ipip_netdev_ops; | 277 | dev->netdev_ops = &ipip_netdev_ops; |
797 | dev->destructor = ipip_dev_free; | ||
798 | 278 | ||
799 | dev->type = ARPHRD_TUNNEL; | 279 | dev->type = ARPHRD_TUNNEL; |
800 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); | ||
801 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); | ||
802 | dev->flags = IFF_NOARP; | 280 | dev->flags = IFF_NOARP; |
803 | dev->iflink = 0; | 281 | dev->iflink = 0; |
804 | dev->addr_len = 4; | 282 | dev->addr_len = 4; |
@@ -808,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev) | |||
808 | 286 | ||
809 | dev->features |= IPIP_FEATURES; | 287 | dev->features |= IPIP_FEATURES; |
810 | dev->hw_features |= IPIP_FEATURES; | 288 | dev->hw_features |= IPIP_FEATURES; |
289 | ip_tunnel_setup(dev, ipip_net_id); | ||
811 | } | 290 | } |
812 | 291 | ||
813 | static int ipip_tunnel_init(struct net_device *dev) | 292 | static int ipip_tunnel_init(struct net_device *dev) |
814 | { | 293 | { |
815 | struct ip_tunnel *tunnel = netdev_priv(dev); | 294 | struct ip_tunnel *tunnel = netdev_priv(dev); |
816 | 295 | ||
817 | tunnel->dev = dev; | ||
818 | |||
819 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); | 296 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); |
820 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 297 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
821 | 298 | ||
822 | ipip_tunnel_bind_dev(dev); | 299 | tunnel->hlen = 0; |
823 | 300 | tunnel->parms.iph.protocol = IPPROTO_IPIP; | |
824 | dev->tstats = alloc_percpu(struct pcpu_tstats); | 301 | return ip_tunnel_init(dev); |
825 | if (!dev->tstats) | ||
826 | return -ENOMEM; | ||
827 | |||
828 | return 0; | ||
829 | } | ||
830 | |||
831 | static int __net_init ipip_fb_tunnel_init(struct net_device *dev) | ||
832 | { | ||
833 | struct ip_tunnel *tunnel = netdev_priv(dev); | ||
834 | struct iphdr *iph = &tunnel->parms.iph; | ||
835 | struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); | ||
836 | |||
837 | tunnel->dev = dev; | ||
838 | strcpy(tunnel->parms.name, dev->name); | ||
839 | |||
840 | iph->version = 4; | ||
841 | iph->protocol = IPPROTO_IPIP; | ||
842 | iph->ihl = 5; | ||
843 | |||
844 | dev->tstats = alloc_percpu(struct pcpu_tstats); | ||
845 | if (!dev->tstats) | ||
846 | return -ENOMEM; | ||
847 | |||
848 | dev_hold(dev); | ||
849 | rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); | ||
850 | return 0; | ||
851 | } | 302 | } |
852 | 303 | ||
853 | static void ipip_netlink_parms(struct nlattr *data[], | 304 | static void ipip_netlink_parms(struct nlattr *data[], |
@@ -887,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[], | |||
887 | static int ipip_newlink(struct net *src_net, struct net_device *dev, | 338 | static int ipip_newlink(struct net *src_net, struct net_device *dev, |
888 | struct nlattr *tb[], struct nlattr *data[]) | 339 | struct nlattr *tb[], struct nlattr *data[]) |
889 | { | 340 | { |
890 | struct net *net = dev_net(dev); | 341 | struct ip_tunnel_parm p; |
891 | struct ip_tunnel *nt; | ||
892 | |||
893 | nt = netdev_priv(dev); | ||
894 | ipip_netlink_parms(data, &nt->parms); | ||
895 | |||
896 | if (ipip_tunnel_locate(net, &nt->parms, 0)) | ||
897 | return -EEXIST; | ||
898 | 342 | ||
899 | return ipip_tunnel_create(dev); | 343 | ipip_netlink_parms(data, &p); |
344 | return ip_tunnel_newlink(dev, tb, &p); | ||
900 | } | 345 | } |
901 | 346 | ||
902 | static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], | 347 | static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], |
903 | struct nlattr *data[]) | 348 | struct nlattr *data[]) |
904 | { | 349 | { |
905 | struct ip_tunnel *t; | ||
906 | struct ip_tunnel_parm p; | 350 | struct ip_tunnel_parm p; |
907 | struct net *net = dev_net(dev); | ||
908 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
909 | |||
910 | if (dev == ipn->fb_tunnel_dev) | ||
911 | return -EINVAL; | ||
912 | 351 | ||
913 | ipip_netlink_parms(data, &p); | 352 | ipip_netlink_parms(data, &p); |
914 | 353 | ||
@@ -916,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], | |||
916 | (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) | 355 | (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) |
917 | return -EINVAL; | 356 | return -EINVAL; |
918 | 357 | ||
919 | t = ipip_tunnel_locate(net, &p, 0); | 358 | return ip_tunnel_changelink(dev, tb, &p); |
920 | |||
921 | if (t) { | ||
922 | if (t->dev != dev) | ||
923 | return -EEXIST; | ||
924 | } else | ||
925 | t = netdev_priv(dev); | ||
926 | |||
927 | ipip_tunnel_update(t, &p); | ||
928 | return 0; | ||
929 | } | 359 | } |
930 | 360 | ||
931 | static size_t ipip_get_size(const struct net_device *dev) | 361 | static size_t ipip_get_size(const struct net_device *dev) |
@@ -982,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = { | |||
982 | .setup = ipip_tunnel_setup, | 412 | .setup = ipip_tunnel_setup, |
983 | .newlink = ipip_newlink, | 413 | .newlink = ipip_newlink, |
984 | .changelink = ipip_changelink, | 414 | .changelink = ipip_changelink, |
415 | .dellink = ip_tunnel_dellink, | ||
985 | .get_size = ipip_get_size, | 416 | .get_size = ipip_get_size, |
986 | .fill_info = ipip_fill_info, | 417 | .fill_info = ipip_fill_info, |
987 | }; | 418 | }; |
@@ -992,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = { | |||
992 | .priority = 1, | 423 | .priority = 1, |
993 | }; | 424 | }; |
994 | 425 | ||
995 | static const char banner[] __initconst = | ||
996 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; | ||
997 | |||
998 | static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) | ||
999 | { | ||
1000 | int prio; | ||
1001 | |||
1002 | for (prio = 1; prio < 4; prio++) { | ||
1003 | int h; | ||
1004 | for (h = 0; h < HASH_SIZE; h++) { | ||
1005 | struct ip_tunnel *t; | ||
1006 | |||
1007 | t = rtnl_dereference(ipn->tunnels[prio][h]); | ||
1008 | while (t != NULL) { | ||
1009 | unregister_netdevice_queue(t->dev, head); | ||
1010 | t = rtnl_dereference(t->next); | ||
1011 | } | ||
1012 | } | ||
1013 | } | ||
1014 | } | ||
1015 | |||
1016 | static int __net_init ipip_init_net(struct net *net) | 426 | static int __net_init ipip_init_net(struct net *net) |
1017 | { | 427 | { |
1018 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 428 | return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); |
1019 | struct ip_tunnel *t; | ||
1020 | int err; | ||
1021 | |||
1022 | ipn->tunnels[0] = ipn->tunnels_wc; | ||
1023 | ipn->tunnels[1] = ipn->tunnels_l; | ||
1024 | ipn->tunnels[2] = ipn->tunnels_r; | ||
1025 | ipn->tunnels[3] = ipn->tunnels_r_l; | ||
1026 | |||
1027 | ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), | ||
1028 | "tunl0", | ||
1029 | ipip_tunnel_setup); | ||
1030 | if (!ipn->fb_tunnel_dev) { | ||
1031 | err = -ENOMEM; | ||
1032 | goto err_alloc_dev; | ||
1033 | } | ||
1034 | dev_net_set(ipn->fb_tunnel_dev, net); | ||
1035 | |||
1036 | err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev); | ||
1037 | if (err) | ||
1038 | goto err_reg_dev; | ||
1039 | |||
1040 | if ((err = register_netdev(ipn->fb_tunnel_dev))) | ||
1041 | goto err_reg_dev; | ||
1042 | |||
1043 | t = netdev_priv(ipn->fb_tunnel_dev); | ||
1044 | |||
1045 | strcpy(t->parms.name, ipn->fb_tunnel_dev->name); | ||
1046 | return 0; | ||
1047 | |||
1048 | err_reg_dev: | ||
1049 | ipip_dev_free(ipn->fb_tunnel_dev); | ||
1050 | err_alloc_dev: | ||
1051 | /* nothing */ | ||
1052 | return err; | ||
1053 | } | 429 | } |
1054 | 430 | ||
1055 | static void __net_exit ipip_exit_net(struct net *net) | 431 | static void __net_exit ipip_exit_net(struct net *net) |
1056 | { | 432 | { |
1057 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 433 | struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); |
1058 | LIST_HEAD(list); | 434 | ip_tunnel_delete_net(itn); |
1059 | |||
1060 | rtnl_lock(); | ||
1061 | ipip_destroy_tunnels(ipn, &list); | ||
1062 | unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); | ||
1063 | unregister_netdevice_many(&list); | ||
1064 | rtnl_unlock(); | ||
1065 | } | 435 | } |
1066 | 436 | ||
1067 | static struct pernet_operations ipip_net_ops = { | 437 | static struct pernet_operations ipip_net_ops = { |
1068 | .init = ipip_init_net, | 438 | .init = ipip_init_net, |
1069 | .exit = ipip_exit_net, | 439 | .exit = ipip_exit_net, |
1070 | .id = &ipip_net_id, | 440 | .id = &ipip_net_id, |
1071 | .size = sizeof(struct ipip_net), | 441 | .size = sizeof(struct ip_tunnel_net), |
1072 | }; | 442 | }; |
1073 | 443 | ||
1074 | static int __init ipip_init(void) | 444 | static int __init ipip_init(void) |
1075 | { | 445 | { |
1076 | int err; | 446 | int err; |
1077 | 447 | ||
1078 | printk(banner); | 448 | pr_info("ipip: IPv4 over IPv4 tunneling driver\n"); |
1079 | 449 | ||
1080 | err = register_pernet_device(&ipip_net_ops); | 450 | err = register_pernet_device(&ipip_net_ops); |
1081 | if (err < 0) | 451 | if (err < 0) |