diff options
author | Pravin B Shelar <pshelar@nicira.com> | 2013-03-25 10:49:41 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-03-26 12:27:18 -0400 |
commit | fd58156e456d9f68fe04486be378d0bc93641532 (patch) | |
tree | 50dd9ea7c5b048dad0f9b39e95f3c0299448d86c /net/ipv4/ipip.c | |
parent | c54419321455631079c7d6e60bc732dd0c5914c5 (diff) |
IPIP: Use ip-tunneling code.
Reuse common ip-tunneling code which is re-factored from GRE
module.
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ipip.c')
-rw-r--r-- | net/ipv4/ipip.c | 746 |
1 files changed, 56 insertions, 690 deletions
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index a557d6ab127a..77bfcce64fe5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -117,221 +117,15 @@ | |||
117 | #include <net/net_namespace.h> | 117 | #include <net/net_namespace.h> |
118 | #include <net/netns/generic.h> | 118 | #include <net/netns/generic.h> |
119 | 119 | ||
120 | #define HASH_SIZE 16 | ||
121 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) | ||
122 | |||
123 | static bool log_ecn_error = true; | 120 | static bool log_ecn_error = true; |
124 | module_param(log_ecn_error, bool, 0644); | 121 | module_param(log_ecn_error, bool, 0644); |
125 | MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); | 122 | MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); |
126 | 123 | ||
127 | static int ipip_net_id __read_mostly; | 124 | static int ipip_net_id __read_mostly; |
128 | struct ipip_net { | ||
129 | struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; | ||
130 | struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; | ||
131 | struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; | ||
132 | struct ip_tunnel __rcu *tunnels_wc[1]; | ||
133 | struct ip_tunnel __rcu **tunnels[4]; | ||
134 | |||
135 | struct net_device *fb_tunnel_dev; | ||
136 | }; | ||
137 | 125 | ||
138 | static int ipip_tunnel_init(struct net_device *dev); | 126 | static int ipip_tunnel_init(struct net_device *dev); |
139 | static void ipip_tunnel_setup(struct net_device *dev); | ||
140 | static void ipip_dev_free(struct net_device *dev); | ||
141 | static struct rtnl_link_ops ipip_link_ops __read_mostly; | 127 | static struct rtnl_link_ops ipip_link_ops __read_mostly; |
142 | 128 | ||
143 | static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, | ||
144 | struct rtnl_link_stats64 *tot) | ||
145 | { | ||
146 | int i; | ||
147 | |||
148 | for_each_possible_cpu(i) { | ||
149 | const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); | ||
150 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | ||
151 | unsigned int start; | ||
152 | |||
153 | do { | ||
154 | start = u64_stats_fetch_begin_bh(&tstats->syncp); | ||
155 | rx_packets = tstats->rx_packets; | ||
156 | tx_packets = tstats->tx_packets; | ||
157 | rx_bytes = tstats->rx_bytes; | ||
158 | tx_bytes = tstats->tx_bytes; | ||
159 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
160 | |||
161 | tot->rx_packets += rx_packets; | ||
162 | tot->tx_packets += tx_packets; | ||
163 | tot->rx_bytes += rx_bytes; | ||
164 | tot->tx_bytes += tx_bytes; | ||
165 | } | ||
166 | |||
167 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; | ||
168 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; | ||
169 | tot->tx_dropped = dev->stats.tx_dropped; | ||
170 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; | ||
171 | tot->tx_errors = dev->stats.tx_errors; | ||
172 | tot->collisions = dev->stats.collisions; | ||
173 | |||
174 | return tot; | ||
175 | } | ||
176 | |||
177 | static struct ip_tunnel *ipip_tunnel_lookup(struct net *net, | ||
178 | __be32 remote, __be32 local) | ||
179 | { | ||
180 | unsigned int h0 = HASH(remote); | ||
181 | unsigned int h1 = HASH(local); | ||
182 | struct ip_tunnel *t; | ||
183 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
184 | |||
185 | for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1]) | ||
186 | if (local == t->parms.iph.saddr && | ||
187 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | ||
188 | return t; | ||
189 | |||
190 | for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0]) | ||
191 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | ||
192 | return t; | ||
193 | |||
194 | for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1]) | ||
195 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) | ||
196 | return t; | ||
197 | |||
198 | t = rcu_dereference(ipn->tunnels_wc[0]); | ||
199 | if (t && (t->dev->flags&IFF_UP)) | ||
200 | return t; | ||
201 | return NULL; | ||
202 | } | ||
203 | |||
204 | static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn, | ||
205 | struct ip_tunnel_parm *parms) | ||
206 | { | ||
207 | __be32 remote = parms->iph.daddr; | ||
208 | __be32 local = parms->iph.saddr; | ||
209 | unsigned int h = 0; | ||
210 | int prio = 0; | ||
211 | |||
212 | if (remote) { | ||
213 | prio |= 2; | ||
214 | h ^= HASH(remote); | ||
215 | } | ||
216 | if (local) { | ||
217 | prio |= 1; | ||
218 | h ^= HASH(local); | ||
219 | } | ||
220 | return &ipn->tunnels[prio][h]; | ||
221 | } | ||
222 | |||
223 | static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn, | ||
224 | struct ip_tunnel *t) | ||
225 | { | ||
226 | return __ipip_bucket(ipn, &t->parms); | ||
227 | } | ||
228 | |||
229 | static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) | ||
230 | { | ||
231 | struct ip_tunnel __rcu **tp; | ||
232 | struct ip_tunnel *iter; | ||
233 | |||
234 | for (tp = ipip_bucket(ipn, t); | ||
235 | (iter = rtnl_dereference(*tp)) != NULL; | ||
236 | tp = &iter->next) { | ||
237 | if (t == iter) { | ||
238 | rcu_assign_pointer(*tp, t->next); | ||
239 | break; | ||
240 | } | ||
241 | } | ||
242 | } | ||
243 | |||
244 | static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) | ||
245 | { | ||
246 | struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); | ||
247 | |||
248 | rcu_assign_pointer(t->next, rtnl_dereference(*tp)); | ||
249 | rcu_assign_pointer(*tp, t); | ||
250 | } | ||
251 | |||
252 | static int ipip_tunnel_create(struct net_device *dev) | ||
253 | { | ||
254 | struct ip_tunnel *t = netdev_priv(dev); | ||
255 | struct net *net = dev_net(dev); | ||
256 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
257 | int err; | ||
258 | |||
259 | err = ipip_tunnel_init(dev); | ||
260 | if (err < 0) | ||
261 | goto out; | ||
262 | |||
263 | err = register_netdevice(dev); | ||
264 | if (err < 0) | ||
265 | goto out; | ||
266 | |||
267 | strcpy(t->parms.name, dev->name); | ||
268 | dev->rtnl_link_ops = &ipip_link_ops; | ||
269 | |||
270 | dev_hold(dev); | ||
271 | ipip_tunnel_link(ipn, t); | ||
272 | return 0; | ||
273 | |||
274 | out: | ||
275 | return err; | ||
276 | } | ||
277 | |||
278 | static struct ip_tunnel *ipip_tunnel_locate(struct net *net, | ||
279 | struct ip_tunnel_parm *parms, int create) | ||
280 | { | ||
281 | __be32 remote = parms->iph.daddr; | ||
282 | __be32 local = parms->iph.saddr; | ||
283 | struct ip_tunnel *t, *nt; | ||
284 | struct ip_tunnel __rcu **tp; | ||
285 | struct net_device *dev; | ||
286 | char name[IFNAMSIZ]; | ||
287 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
288 | |||
289 | for (tp = __ipip_bucket(ipn, parms); | ||
290 | (t = rtnl_dereference(*tp)) != NULL; | ||
291 | tp = &t->next) { | ||
292 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) | ||
293 | return t; | ||
294 | } | ||
295 | if (!create) | ||
296 | return NULL; | ||
297 | |||
298 | if (parms->name[0]) | ||
299 | strlcpy(name, parms->name, IFNAMSIZ); | ||
300 | else | ||
301 | strcpy(name, "tunl%d"); | ||
302 | |||
303 | dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); | ||
304 | if (dev == NULL) | ||
305 | return NULL; | ||
306 | |||
307 | dev_net_set(dev, net); | ||
308 | |||
309 | nt = netdev_priv(dev); | ||
310 | nt->parms = *parms; | ||
311 | |||
312 | if (ipip_tunnel_create(dev) < 0) | ||
313 | goto failed_free; | ||
314 | |||
315 | return nt; | ||
316 | |||
317 | failed_free: | ||
318 | ipip_dev_free(dev); | ||
319 | return NULL; | ||
320 | } | ||
321 | |||
322 | /* called with RTNL */ | ||
323 | static void ipip_tunnel_uninit(struct net_device *dev) | ||
324 | { | ||
325 | struct net *net = dev_net(dev); | ||
326 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
327 | |||
328 | if (dev == ipn->fb_tunnel_dev) | ||
329 | RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL); | ||
330 | else | ||
331 | ipip_tunnel_unlink(ipn, netdev_priv(dev)); | ||
332 | dev_put(dev); | ||
333 | } | ||
334 | |||
335 | static int ipip_err(struct sk_buff *skb, u32 info) | 129 | static int ipip_err(struct sk_buff *skb, u32 info) |
336 | { | 130 | { |
337 | 131 | ||
@@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
339 | 8 bytes of packet payload. It means, that precise relaying of | 133 | 8 bytes of packet payload. It means, that precise relaying of |
340 | ICMP in the real Internet is absolutely infeasible. | 134 | ICMP in the real Internet is absolutely infeasible. |
341 | */ | 135 | */ |
136 | struct net *net = dev_net(skb->dev); | ||
137 | struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); | ||
342 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 138 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
343 | const int type = icmp_hdr(skb)->type; | ||
344 | const int code = icmp_hdr(skb)->code; | ||
345 | struct ip_tunnel *t; | 139 | struct ip_tunnel *t; |
346 | int err; | 140 | int err; |
347 | 141 | const int type = icmp_hdr(skb)->type; | |
348 | switch (type) { | 142 | const int code = icmp_hdr(skb)->code; |
349 | default: | ||
350 | case ICMP_PARAMETERPROB: | ||
351 | return 0; | ||
352 | |||
353 | case ICMP_DEST_UNREACH: | ||
354 | switch (code) { | ||
355 | case ICMP_SR_FAILED: | ||
356 | case ICMP_PORT_UNREACH: | ||
357 | /* Impossible event. */ | ||
358 | return 0; | ||
359 | default: | ||
360 | /* All others are translated to HOST_UNREACH. | ||
361 | rfc2003 contains "deep thoughts" about NET_UNREACH, | ||
362 | I believe they are just ether pollution. --ANK | ||
363 | */ | ||
364 | break; | ||
365 | } | ||
366 | break; | ||
367 | case ICMP_TIME_EXCEEDED: | ||
368 | if (code != ICMP_EXC_TTL) | ||
369 | return 0; | ||
370 | break; | ||
371 | case ICMP_REDIRECT: | ||
372 | break; | ||
373 | } | ||
374 | 143 | ||
375 | err = -ENOENT; | 144 | err = -ENOENT; |
376 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); | 145 | t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, |
146 | iph->daddr, iph->saddr, 0); | ||
377 | if (t == NULL) | 147 | if (t == NULL) |
378 | goto out; | 148 | goto out; |
379 | 149 | ||
@@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
403 | else | 173 | else |
404 | t->err_count = 1; | 174 | t->err_count = 1; |
405 | t->err_time = jiffies; | 175 | t->err_time = jiffies; |
406 | out: | ||
407 | 176 | ||
177 | out: | ||
408 | return err; | 178 | return err; |
409 | } | 179 | } |
410 | 180 | ||
181 | static const struct tnl_ptk_info tpi = { | ||
182 | /* no tunnel info required for ipip. */ | ||
183 | .proto = htons(ETH_P_IP), | ||
184 | }; | ||
185 | |||
411 | static int ipip_rcv(struct sk_buff *skb) | 186 | static int ipip_rcv(struct sk_buff *skb) |
412 | { | 187 | { |
188 | struct net *net = dev_net(skb->dev); | ||
189 | struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); | ||
413 | struct ip_tunnel *tunnel; | 190 | struct ip_tunnel *tunnel; |
414 | const struct iphdr *iph = ip_hdr(skb); | 191 | const struct iphdr *iph = ip_hdr(skb); |
415 | int err; | ||
416 | |||
417 | tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); | ||
418 | if (tunnel != NULL) { | ||
419 | struct pcpu_tstats *tstats; | ||
420 | 192 | ||
193 | tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, | ||
194 | iph->saddr, iph->daddr, 0); | ||
195 | if (tunnel) { | ||
421 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | 196 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) |
422 | goto drop; | 197 | goto drop; |
423 | 198 | return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); | |
424 | secpath_reset(skb); | ||
425 | |||
426 | skb->mac_header = skb->network_header; | ||
427 | skb_reset_network_header(skb); | ||
428 | skb->protocol = htons(ETH_P_IP); | ||
429 | skb->pkt_type = PACKET_HOST; | ||
430 | |||
431 | __skb_tunnel_rx(skb, tunnel->dev); | ||
432 | |||
433 | err = IP_ECN_decapsulate(iph, skb); | ||
434 | if (unlikely(err)) { | ||
435 | if (log_ecn_error) | ||
436 | net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", | ||
437 | &iph->saddr, iph->tos); | ||
438 | if (err > 1) { | ||
439 | ++tunnel->dev->stats.rx_frame_errors; | ||
440 | ++tunnel->dev->stats.rx_errors; | ||
441 | goto drop; | ||
442 | } | ||
443 | } | ||
444 | |||
445 | tstats = this_cpu_ptr(tunnel->dev->tstats); | ||
446 | u64_stats_update_begin(&tstats->syncp); | ||
447 | tstats->rx_packets++; | ||
448 | tstats->rx_bytes += skb->len; | ||
449 | u64_stats_update_end(&tstats->syncp); | ||
450 | |||
451 | netif_rx(skb); | ||
452 | return 0; | ||
453 | } | 199 | } |
454 | 200 | ||
455 | return -1; | 201 | return -1; |
@@ -463,333 +209,64 @@ drop: | |||
463 | * This function assumes it is being called from dev_queue_xmit() | 209 | * This function assumes it is being called from dev_queue_xmit() |
464 | * and that skb is filled properly by that function. | 210 | * and that skb is filled properly by that function. |
465 | */ | 211 | */ |
466 | |||
467 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 212 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
468 | { | 213 | { |
469 | struct ip_tunnel *tunnel = netdev_priv(dev); | 214 | struct ip_tunnel *tunnel = netdev_priv(dev); |
470 | const struct iphdr *tiph = &tunnel->parms.iph; | 215 | const struct iphdr *tiph = &tunnel->parms.iph; |
471 | u8 tos = tunnel->parms.iph.tos; | ||
472 | __be16 df = tiph->frag_off; | ||
473 | struct rtable *rt; /* Route to the other host */ | ||
474 | struct net_device *tdev; /* Device to other host */ | ||
475 | const struct iphdr *old_iph; | ||
476 | struct iphdr *iph; /* Our new IP header */ | ||
477 | unsigned int max_headroom; /* The extra header space needed */ | ||
478 | __be32 dst = tiph->daddr; | ||
479 | struct flowi4 fl4; | ||
480 | int mtu; | ||
481 | |||
482 | if (skb->protocol != htons(ETH_P_IP)) | ||
483 | goto tx_error; | ||
484 | old_iph = ip_hdr(skb); | ||
485 | |||
486 | if (tos & 1) | ||
487 | tos = old_iph->tos; | ||
488 | |||
489 | if (!dst) { | ||
490 | /* NBMA tunnel */ | ||
491 | if ((rt = skb_rtable(skb)) == NULL) { | ||
492 | dev->stats.tx_fifo_errors++; | ||
493 | goto tx_error; | ||
494 | } | ||
495 | dst = rt_nexthop(rt, old_iph->daddr); | ||
496 | } | ||
497 | 216 | ||
498 | rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, | 217 | if (unlikely(skb->protocol != htons(ETH_P_IP))) |
499 | dst, tiph->saddr, | ||
500 | 0, 0, | ||
501 | IPPROTO_IPIP, RT_TOS(tos), | ||
502 | tunnel->parms.link); | ||
503 | if (IS_ERR(rt)) { | ||
504 | dev->stats.tx_carrier_errors++; | ||
505 | goto tx_error_icmp; | ||
506 | } | ||
507 | tdev = rt->dst.dev; | ||
508 | |||
509 | if (tdev == dev) { | ||
510 | ip_rt_put(rt); | ||
511 | dev->stats.collisions++; | ||
512 | goto tx_error; | 218 | goto tx_error; |
513 | } | ||
514 | |||
515 | df |= old_iph->frag_off & htons(IP_DF); | ||
516 | |||
517 | if (df) { | ||
518 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); | ||
519 | |||
520 | if (mtu < 68) { | ||
521 | dev->stats.collisions++; | ||
522 | ip_rt_put(rt); | ||
523 | goto tx_error; | ||
524 | } | ||
525 | |||
526 | if (skb_dst(skb)) | ||
527 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); | ||
528 | |||
529 | if ((old_iph->frag_off & htons(IP_DF)) && | ||
530 | mtu < ntohs(old_iph->tot_len)) { | ||
531 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | ||
532 | htonl(mtu)); | ||
533 | ip_rt_put(rt); | ||
534 | goto tx_error; | ||
535 | } | ||
536 | } | ||
537 | |||
538 | if (tunnel->err_count > 0) { | ||
539 | if (time_before(jiffies, | ||
540 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | ||
541 | tunnel->err_count--; | ||
542 | dst_link_failure(skb); | ||
543 | } else | ||
544 | tunnel->err_count = 0; | ||
545 | } | ||
546 | 219 | ||
547 | /* | 220 | if (likely(!skb->encapsulation)) { |
548 | * Okay, now see if we can stuff it in the buffer as-is. | ||
549 | */ | ||
550 | max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); | ||
551 | |||
552 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || | ||
553 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | ||
554 | struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); | ||
555 | if (!new_skb) { | ||
556 | ip_rt_put(rt); | ||
557 | dev->stats.tx_dropped++; | ||
558 | dev_kfree_skb(skb); | ||
559 | return NETDEV_TX_OK; | ||
560 | } | ||
561 | if (skb->sk) | ||
562 | skb_set_owner_w(new_skb, skb->sk); | ||
563 | dev_kfree_skb(skb); | ||
564 | skb = new_skb; | ||
565 | old_iph = ip_hdr(skb); | ||
566 | } | ||
567 | |||
568 | if (!skb->encapsulation) { | ||
569 | skb_reset_inner_headers(skb); | 221 | skb_reset_inner_headers(skb); |
570 | skb->encapsulation = 1; | 222 | skb->encapsulation = 1; |
571 | } | 223 | } |
572 | if (skb->ip_summed != CHECKSUM_PARTIAL) | ||
573 | skb->ip_summed = CHECKSUM_NONE; | ||
574 | |||
575 | skb->transport_header = skb->network_header; | ||
576 | skb_push(skb, sizeof(struct iphdr)); | ||
577 | skb_reset_network_header(skb); | ||
578 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | ||
579 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | ||
580 | IPSKB_REROUTED); | ||
581 | skb_dst_drop(skb); | ||
582 | skb_dst_set(skb, &rt->dst); | ||
583 | |||
584 | /* | ||
585 | * Push down and install the IPIP header. | ||
586 | */ | ||
587 | |||
588 | iph = ip_hdr(skb); | ||
589 | iph->version = 4; | ||
590 | iph->ihl = sizeof(struct iphdr)>>2; | ||
591 | iph->frag_off = df; | ||
592 | iph->protocol = IPPROTO_IPIP; | ||
593 | iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); | ||
594 | iph->daddr = fl4.daddr; | ||
595 | iph->saddr = fl4.saddr; | ||
596 | tunnel_ip_select_ident(skb, old_iph, &rt->dst); | ||
597 | |||
598 | if ((iph->ttl = tiph->ttl) == 0) | ||
599 | iph->ttl = old_iph->ttl; | ||
600 | |||
601 | iptunnel_xmit(skb, dev); | ||
602 | 224 | ||
225 | ip_tunnel_xmit(skb, dev, tiph); | ||
603 | return NETDEV_TX_OK; | 226 | return NETDEV_TX_OK; |
604 | 227 | ||
605 | tx_error_icmp: | ||
606 | dst_link_failure(skb); | ||
607 | tx_error: | 228 | tx_error: |
608 | dev->stats.tx_errors++; | 229 | dev->stats.tx_errors++; |
609 | dev_kfree_skb(skb); | 230 | dev_kfree_skb(skb); |
610 | return NETDEV_TX_OK; | 231 | return NETDEV_TX_OK; |
611 | } | 232 | } |
612 | 233 | ||
613 | static void ipip_tunnel_bind_dev(struct net_device *dev) | ||
614 | { | ||
615 | struct net_device *tdev = NULL; | ||
616 | struct ip_tunnel *tunnel; | ||
617 | const struct iphdr *iph; | ||
618 | |||
619 | tunnel = netdev_priv(dev); | ||
620 | iph = &tunnel->parms.iph; | ||
621 | |||
622 | if (iph->daddr) { | ||
623 | struct rtable *rt; | ||
624 | struct flowi4 fl4; | ||
625 | |||
626 | rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, | ||
627 | iph->daddr, iph->saddr, | ||
628 | 0, 0, | ||
629 | IPPROTO_IPIP, | ||
630 | RT_TOS(iph->tos), | ||
631 | tunnel->parms.link); | ||
632 | if (!IS_ERR(rt)) { | ||
633 | tdev = rt->dst.dev; | ||
634 | ip_rt_put(rt); | ||
635 | } | ||
636 | dev->flags |= IFF_POINTOPOINT; | ||
637 | } | ||
638 | |||
639 | if (!tdev && tunnel->parms.link) | ||
640 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); | ||
641 | |||
642 | if (tdev) { | ||
643 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); | ||
644 | dev->mtu = tdev->mtu - sizeof(struct iphdr); | ||
645 | } | ||
646 | dev->iflink = tunnel->parms.link; | ||
647 | } | ||
648 | |||
649 | static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) | ||
650 | { | ||
651 | struct net *net = dev_net(t->dev); | ||
652 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
653 | |||
654 | ipip_tunnel_unlink(ipn, t); | ||
655 | synchronize_net(); | ||
656 | t->parms.iph.saddr = p->iph.saddr; | ||
657 | t->parms.iph.daddr = p->iph.daddr; | ||
658 | memcpy(t->dev->dev_addr, &p->iph.saddr, 4); | ||
659 | memcpy(t->dev->broadcast, &p->iph.daddr, 4); | ||
660 | ipip_tunnel_link(ipn, t); | ||
661 | t->parms.iph.ttl = p->iph.ttl; | ||
662 | t->parms.iph.tos = p->iph.tos; | ||
663 | t->parms.iph.frag_off = p->iph.frag_off; | ||
664 | if (t->parms.link != p->link) { | ||
665 | t->parms.link = p->link; | ||
666 | ipip_tunnel_bind_dev(t->dev); | ||
667 | } | ||
668 | netdev_state_change(t->dev); | ||
669 | } | ||
670 | |||
671 | static int | 234 | static int |
672 | ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | 235 | ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) |
673 | { | 236 | { |
674 | int err = 0; | 237 | int err = 0; |
675 | struct ip_tunnel_parm p; | 238 | struct ip_tunnel_parm p; |
676 | struct ip_tunnel *t; | ||
677 | struct net *net = dev_net(dev); | ||
678 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
679 | |||
680 | switch (cmd) { | ||
681 | case SIOCGETTUNNEL: | ||
682 | t = NULL; | ||
683 | if (dev == ipn->fb_tunnel_dev) { | ||
684 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | ||
685 | err = -EFAULT; | ||
686 | break; | ||
687 | } | ||
688 | t = ipip_tunnel_locate(net, &p, 0); | ||
689 | } | ||
690 | if (t == NULL) | ||
691 | t = netdev_priv(dev); | ||
692 | memcpy(&p, &t->parms, sizeof(p)); | ||
693 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | ||
694 | err = -EFAULT; | ||
695 | break; | ||
696 | |||
697 | case SIOCADDTUNNEL: | ||
698 | case SIOCCHGTUNNEL: | ||
699 | err = -EPERM; | ||
700 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | ||
701 | goto done; | ||
702 | |||
703 | err = -EFAULT; | ||
704 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | ||
705 | goto done; | ||
706 | |||
707 | err = -EINVAL; | ||
708 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || | ||
709 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) | ||
710 | goto done; | ||
711 | if (p.iph.ttl) | ||
712 | p.iph.frag_off |= htons(IP_DF); | ||
713 | |||
714 | t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); | ||
715 | |||
716 | if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | ||
717 | if (t != NULL) { | ||
718 | if (t->dev != dev) { | ||
719 | err = -EEXIST; | ||
720 | break; | ||
721 | } | ||
722 | } else { | ||
723 | if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || | ||
724 | (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { | ||
725 | err = -EINVAL; | ||
726 | break; | ||
727 | } | ||
728 | t = netdev_priv(dev); | ||
729 | } | ||
730 | |||
731 | ipip_tunnel_update(t, &p); | ||
732 | } | ||
733 | |||
734 | if (t) { | ||
735 | err = 0; | ||
736 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) | ||
737 | err = -EFAULT; | ||
738 | } else | ||
739 | err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); | ||
740 | break; | ||
741 | |||
742 | case SIOCDELTUNNEL: | ||
743 | err = -EPERM; | ||
744 | if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) | ||
745 | goto done; | ||
746 | |||
747 | if (dev == ipn->fb_tunnel_dev) { | ||
748 | err = -EFAULT; | ||
749 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | ||
750 | goto done; | ||
751 | err = -ENOENT; | ||
752 | if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) | ||
753 | goto done; | ||
754 | err = -EPERM; | ||
755 | if (t->dev == ipn->fb_tunnel_dev) | ||
756 | goto done; | ||
757 | dev = t->dev; | ||
758 | } | ||
759 | unregister_netdevice(dev); | ||
760 | err = 0; | ||
761 | break; | ||
762 | |||
763 | default: | ||
764 | err = -EINVAL; | ||
765 | } | ||
766 | 239 | ||
767 | done: | 240 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
768 | return err; | 241 | return -EFAULT; |
769 | } | ||
770 | 242 | ||
771 | static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) | 243 | if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || |
772 | { | 244 | p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) |
773 | if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) | 245 | return -EINVAL; |
246 | if (p.i_key || p.o_key || p.i_flags || p.o_flags) | ||
774 | return -EINVAL; | 247 | return -EINVAL; |
775 | dev->mtu = new_mtu; | 248 | if (p.iph.ttl) |
249 | p.iph.frag_off |= htons(IP_DF); | ||
250 | |||
251 | err = ip_tunnel_ioctl(dev, &p, cmd); | ||
252 | if (err) | ||
253 | return err; | ||
254 | |||
255 | if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) | ||
256 | return -EFAULT; | ||
257 | |||
776 | return 0; | 258 | return 0; |
777 | } | 259 | } |
778 | 260 | ||
779 | static const struct net_device_ops ipip_netdev_ops = { | 261 | static const struct net_device_ops ipip_netdev_ops = { |
780 | .ndo_uninit = ipip_tunnel_uninit, | 262 | .ndo_init = ipip_tunnel_init, |
263 | .ndo_uninit = ip_tunnel_uninit, | ||
781 | .ndo_start_xmit = ipip_tunnel_xmit, | 264 | .ndo_start_xmit = ipip_tunnel_xmit, |
782 | .ndo_do_ioctl = ipip_tunnel_ioctl, | 265 | .ndo_do_ioctl = ipip_tunnel_ioctl, |
783 | .ndo_change_mtu = ipip_tunnel_change_mtu, | 266 | .ndo_change_mtu = ip_tunnel_change_mtu, |
784 | .ndo_get_stats64 = ipip_get_stats64, | 267 | .ndo_get_stats64 = ip_tunnel_get_stats64, |
785 | }; | 268 | }; |
786 | 269 | ||
787 | static void ipip_dev_free(struct net_device *dev) | ||
788 | { | ||
789 | free_percpu(dev->tstats); | ||
790 | free_netdev(dev); | ||
791 | } | ||
792 | |||
793 | #define IPIP_FEATURES (NETIF_F_SG | \ | 270 | #define IPIP_FEATURES (NETIF_F_SG | \ |
794 | NETIF_F_FRAGLIST | \ | 271 | NETIF_F_FRAGLIST | \ |
795 | NETIF_F_HIGHDMA | \ | 272 | NETIF_F_HIGHDMA | \ |
@@ -798,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev) | |||
798 | static void ipip_tunnel_setup(struct net_device *dev) | 275 | static void ipip_tunnel_setup(struct net_device *dev) |
799 | { | 276 | { |
800 | dev->netdev_ops = &ipip_netdev_ops; | 277 | dev->netdev_ops = &ipip_netdev_ops; |
801 | dev->destructor = ipip_dev_free; | ||
802 | 278 | ||
803 | dev->type = ARPHRD_TUNNEL; | 279 | dev->type = ARPHRD_TUNNEL; |
804 | dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); | ||
805 | dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); | ||
806 | dev->flags = IFF_NOARP; | 280 | dev->flags = IFF_NOARP; |
807 | dev->iflink = 0; | 281 | dev->iflink = 0; |
808 | dev->addr_len = 4; | 282 | dev->addr_len = 4; |
@@ -812,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev) | |||
812 | 286 | ||
813 | dev->features |= IPIP_FEATURES; | 287 | dev->features |= IPIP_FEATURES; |
814 | dev->hw_features |= IPIP_FEATURES; | 288 | dev->hw_features |= IPIP_FEATURES; |
289 | ip_tunnel_setup(dev, ipip_net_id); | ||
815 | } | 290 | } |
816 | 291 | ||
817 | static int ipip_tunnel_init(struct net_device *dev) | 292 | static int ipip_tunnel_init(struct net_device *dev) |
818 | { | 293 | { |
819 | struct ip_tunnel *tunnel = netdev_priv(dev); | 294 | struct ip_tunnel *tunnel = netdev_priv(dev); |
820 | 295 | ||
821 | tunnel->dev = dev; | ||
822 | |||
823 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); | 296 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); |
824 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 297 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
825 | 298 | ||
826 | ipip_tunnel_bind_dev(dev); | 299 | tunnel->hlen = 0; |
827 | 300 | tunnel->parms.iph.protocol = IPPROTO_IPIP; | |
828 | dev->tstats = alloc_percpu(struct pcpu_tstats); | 301 | return ip_tunnel_init(dev); |
829 | if (!dev->tstats) | ||
830 | return -ENOMEM; | ||
831 | |||
832 | return 0; | ||
833 | } | ||
834 | |||
835 | static int __net_init ipip_fb_tunnel_init(struct net_device *dev) | ||
836 | { | ||
837 | struct ip_tunnel *tunnel = netdev_priv(dev); | ||
838 | struct iphdr *iph = &tunnel->parms.iph; | ||
839 | struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); | ||
840 | |||
841 | tunnel->dev = dev; | ||
842 | strcpy(tunnel->parms.name, dev->name); | ||
843 | |||
844 | iph->version = 4; | ||
845 | iph->protocol = IPPROTO_IPIP; | ||
846 | iph->ihl = 5; | ||
847 | |||
848 | dev->tstats = alloc_percpu(struct pcpu_tstats); | ||
849 | if (!dev->tstats) | ||
850 | return -ENOMEM; | ||
851 | |||
852 | dev_hold(dev); | ||
853 | rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); | ||
854 | return 0; | ||
855 | } | 302 | } |
856 | 303 | ||
857 | static void ipip_netlink_parms(struct nlattr *data[], | 304 | static void ipip_netlink_parms(struct nlattr *data[], |
@@ -891,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[], | |||
891 | static int ipip_newlink(struct net *src_net, struct net_device *dev, | 338 | static int ipip_newlink(struct net *src_net, struct net_device *dev, |
892 | struct nlattr *tb[], struct nlattr *data[]) | 339 | struct nlattr *tb[], struct nlattr *data[]) |
893 | { | 340 | { |
894 | struct net *net = dev_net(dev); | 341 | struct ip_tunnel_parm p; |
895 | struct ip_tunnel *nt; | ||
896 | |||
897 | nt = netdev_priv(dev); | ||
898 | ipip_netlink_parms(data, &nt->parms); | ||
899 | |||
900 | if (ipip_tunnel_locate(net, &nt->parms, 0)) | ||
901 | return -EEXIST; | ||
902 | 342 | ||
903 | return ipip_tunnel_create(dev); | 343 | ipip_netlink_parms(data, &p); |
344 | return ip_tunnel_newlink(dev, tb, &p); | ||
904 | } | 345 | } |
905 | 346 | ||
906 | static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], | 347 | static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], |
907 | struct nlattr *data[]) | 348 | struct nlattr *data[]) |
908 | { | 349 | { |
909 | struct ip_tunnel *t; | ||
910 | struct ip_tunnel_parm p; | 350 | struct ip_tunnel_parm p; |
911 | struct net *net = dev_net(dev); | ||
912 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
913 | |||
914 | if (dev == ipn->fb_tunnel_dev) | ||
915 | return -EINVAL; | ||
916 | 351 | ||
917 | ipip_netlink_parms(data, &p); | 352 | ipip_netlink_parms(data, &p); |
918 | 353 | ||
@@ -920,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], | |||
920 | (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) | 355 | (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) |
921 | return -EINVAL; | 356 | return -EINVAL; |
922 | 357 | ||
923 | t = ipip_tunnel_locate(net, &p, 0); | 358 | return ip_tunnel_changelink(dev, tb, &p); |
924 | |||
925 | if (t) { | ||
926 | if (t->dev != dev) | ||
927 | return -EEXIST; | ||
928 | } else | ||
929 | t = netdev_priv(dev); | ||
930 | |||
931 | ipip_tunnel_update(t, &p); | ||
932 | return 0; | ||
933 | } | 359 | } |
934 | 360 | ||
935 | static size_t ipip_get_size(const struct net_device *dev) | 361 | static size_t ipip_get_size(const struct net_device *dev) |
@@ -986,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = { | |||
986 | .setup = ipip_tunnel_setup, | 412 | .setup = ipip_tunnel_setup, |
987 | .newlink = ipip_newlink, | 413 | .newlink = ipip_newlink, |
988 | .changelink = ipip_changelink, | 414 | .changelink = ipip_changelink, |
415 | .dellink = ip_tunnel_dellink, | ||
989 | .get_size = ipip_get_size, | 416 | .get_size = ipip_get_size, |
990 | .fill_info = ipip_fill_info, | 417 | .fill_info = ipip_fill_info, |
991 | }; | 418 | }; |
@@ -996,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = { | |||
996 | .priority = 1, | 423 | .priority = 1, |
997 | }; | 424 | }; |
998 | 425 | ||
999 | static const char banner[] __initconst = | ||
1000 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; | ||
1001 | |||
1002 | static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) | ||
1003 | { | ||
1004 | int prio; | ||
1005 | |||
1006 | for (prio = 1; prio < 4; prio++) { | ||
1007 | int h; | ||
1008 | for (h = 0; h < HASH_SIZE; h++) { | ||
1009 | struct ip_tunnel *t; | ||
1010 | |||
1011 | t = rtnl_dereference(ipn->tunnels[prio][h]); | ||
1012 | while (t != NULL) { | ||
1013 | unregister_netdevice_queue(t->dev, head); | ||
1014 | t = rtnl_dereference(t->next); | ||
1015 | } | ||
1016 | } | ||
1017 | } | ||
1018 | } | ||
1019 | |||
1020 | static int __net_init ipip_init_net(struct net *net) | 426 | static int __net_init ipip_init_net(struct net *net) |
1021 | { | 427 | { |
1022 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 428 | return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); |
1023 | struct ip_tunnel *t; | ||
1024 | int err; | ||
1025 | |||
1026 | ipn->tunnels[0] = ipn->tunnels_wc; | ||
1027 | ipn->tunnels[1] = ipn->tunnels_l; | ||
1028 | ipn->tunnels[2] = ipn->tunnels_r; | ||
1029 | ipn->tunnels[3] = ipn->tunnels_r_l; | ||
1030 | |||
1031 | ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), | ||
1032 | "tunl0", | ||
1033 | ipip_tunnel_setup); | ||
1034 | if (!ipn->fb_tunnel_dev) { | ||
1035 | err = -ENOMEM; | ||
1036 | goto err_alloc_dev; | ||
1037 | } | ||
1038 | dev_net_set(ipn->fb_tunnel_dev, net); | ||
1039 | |||
1040 | err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev); | ||
1041 | if (err) | ||
1042 | goto err_reg_dev; | ||
1043 | |||
1044 | if ((err = register_netdev(ipn->fb_tunnel_dev))) | ||
1045 | goto err_reg_dev; | ||
1046 | |||
1047 | t = netdev_priv(ipn->fb_tunnel_dev); | ||
1048 | |||
1049 | strcpy(t->parms.name, ipn->fb_tunnel_dev->name); | ||
1050 | return 0; | ||
1051 | |||
1052 | err_reg_dev: | ||
1053 | ipip_dev_free(ipn->fb_tunnel_dev); | ||
1054 | err_alloc_dev: | ||
1055 | /* nothing */ | ||
1056 | return err; | ||
1057 | } | 429 | } |
1058 | 430 | ||
1059 | static void __net_exit ipip_exit_net(struct net *net) | 431 | static void __net_exit ipip_exit_net(struct net *net) |
1060 | { | 432 | { |
1061 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 433 | struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); |
1062 | LIST_HEAD(list); | 434 | ip_tunnel_delete_net(itn); |
1063 | |||
1064 | rtnl_lock(); | ||
1065 | ipip_destroy_tunnels(ipn, &list); | ||
1066 | unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); | ||
1067 | unregister_netdevice_many(&list); | ||
1068 | rtnl_unlock(); | ||
1069 | } | 435 | } |
1070 | 436 | ||
1071 | static struct pernet_operations ipip_net_ops = { | 437 | static struct pernet_operations ipip_net_ops = { |
1072 | .init = ipip_init_net, | 438 | .init = ipip_init_net, |
1073 | .exit = ipip_exit_net, | 439 | .exit = ipip_exit_net, |
1074 | .id = &ipip_net_id, | 440 | .id = &ipip_net_id, |
1075 | .size = sizeof(struct ipip_net), | 441 | .size = sizeof(struct ip_tunnel_net), |
1076 | }; | 442 | }; |
1077 | 443 | ||
1078 | static int __init ipip_init(void) | 444 | static int __init ipip_init(void) |
1079 | { | 445 | { |
1080 | int err; | 446 | int err; |
1081 | 447 | ||
1082 | printk(banner); | 448 | pr_info("ipip: IPv4 over IPv4 tunneling driver\n"); |
1083 | 449 | ||
1084 | err = register_pernet_device(&ipip_net_ops); | 450 | err = register_pernet_device(&ipip_net_ops); |
1085 | if (err < 0) | 451 | if (err < 0) |