aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_dev.c12
-rw-r--r--net/batman-adv/Kconfig10
-rw-r--r--net/batman-adv/Makefile1
-rw-r--r--net/batman-adv/bat_iv_ogm.c51
-rw-r--r--net/batman-adv/bitarray.c23
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c36
-rw-r--r--net/batman-adv/debugfs.c26
-rw-r--r--net/batman-adv/distributed-arp-table.c1066
-rw-r--r--net/batman-adv/distributed-arp-table.h167
-rw-r--r--net/batman-adv/gateway_client.c19
-rw-r--r--net/batman-adv/hard-interface.c7
-rw-r--r--net/batman-adv/icmp_socket.c16
-rw-r--r--net/batman-adv/main.c55
-rw-r--r--net/batman-adv/main.h32
-rw-r--r--net/batman-adv/originator.c21
-rw-r--r--net/batman-adv/packet.h68
-rw-r--r--net/batman-adv/routing.c105
-rw-r--r--net/batman-adv/send.c4
-rw-r--r--net/batman-adv/soft-interface.c84
-rw-r--r--net/batman-adv/sysfs.c56
-rw-r--r--net/batman-adv/translation-table.c97
-rw-r--r--net/batman-adv/translation-table.h6
-rw-r--r--net/batman-adv/types.h89
-rw-r--r--net/batman-adv/unicast.c135
-rw-r--r--net/batman-adv/unicast.h36
-rw-r--r--net/batman-adv/vis.c9
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_input.c17
-rw-r--r--net/bridge/br_netlink.c75
-rw-r--r--net/bridge/br_private.h16
-rw-r--r--net/bridge/br_sysfs_br.c12
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/filter.c139
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/pktgen.c47
-rw-r--r--net/core/rtnetlink.c214
-rw-r--r--net/core/skbuff.c24
-rw-r--r--net/core/sock.c17
-rw-r--r--net/dccp/minisocks.c3
-rw-r--r--net/ipv4/devinet.c188
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c25
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/ipconfig.c6
-rw-r--r--net/ipv4/ipip.c69
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_input.c45
-rw-r--r--net/ipv4/tcp_ipv4.c25
-rw-r--r--net/ipv4/tcp_minisocks.c8
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv6/addrconf.c177
-rw-r--r--net/ipv6/ah6.c10
-rw-r--r--net/ipv6/anycast.c2
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/exthdrs.c18
-rw-r--r--net/ipv6/fib6_rules.c2
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ip6_fib.c57
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_output.c9
-rw-r--r--net/ipv6/ip6_tunnel.c62
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/mcast.c4
-rw-r--r--net/ipv6/ndisc.c9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c6
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c6
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c2
-rw-r--r--net/ipv6/raw.c6
-rw-r--r--net/ipv6/route.c174
-rw-r--r--net/ipv6/sit.c57
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c15
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/ipv6/xfrm6_state.c4
-rw-r--r--net/l2tp/l2tp_netlink.c2
-rw-r--r--net/netfilter/ipvs/Kconfig7
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c404
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c42
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c41
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c73
-rw-r--r--net/netfilter/xt_ipvs.c4
-rw-r--r--net/packet/af_packet.c48
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/sched/cls_cgroup.c24
-rw-r--r--net/sched/sch_api.c9
-rw-r--r--net/sched/sch_cbq.c3
-rw-r--r--net/sched/sch_htb.c139
-rw-r--r--net/sctp/Kconfig39
-rw-r--r--net/sctp/protocol.c9
-rw-r--r--net/sctp/sm_sideeffect.c45
-rw-r--r--net/sctp/socket.c11
-rw-r--r--net/sctp/sysctl.c59
-rw-r--r--net/sctp/ulpqueue.c3
-rw-r--r--net/socket.c8
-rw-r--r--net/unix/diag.c3
110 files changed, 3871 insertions, 1037 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index ee070722a3a3..30ee4bc0f7cc 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -294,7 +294,7 @@ static void vlan_transfer_features(struct net_device *dev,
294 else 294 else
295 vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; 295 vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
296 296
297#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 297#if IS_ENABLED(CONFIG_FCOE)
298 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; 298 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
299#endif 299#endif
300 300
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 402442402af7..4a6d31a082b9 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -409,7 +409,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
409 return err; 409 return err;
410} 410}
411 411
412#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 412#if IS_ENABLED(CONFIG_FCOE)
413static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid, 413static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
414 struct scatterlist *sgl, unsigned int sgc) 414 struct scatterlist *sgl, unsigned int sgc)
415{ 415{
@@ -531,6 +531,10 @@ static const struct header_ops vlan_header_ops = {
531 .parse = eth_header_parse, 531 .parse = eth_header_parse,
532}; 532};
533 533
534static struct device_type vlan_type = {
535 .name = "vlan",
536};
537
534static const struct net_device_ops vlan_netdev_ops; 538static const struct net_device_ops vlan_netdev_ops;
535 539
536static int vlan_dev_init(struct net_device *dev) 540static int vlan_dev_init(struct net_device *dev)
@@ -564,7 +568,7 @@ static int vlan_dev_init(struct net_device *dev)
564 if (is_zero_ether_addr(dev->broadcast)) 568 if (is_zero_ether_addr(dev->broadcast))
565 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); 569 memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
566 570
567#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 571#if IS_ENABLED(CONFIG_FCOE)
568 dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid; 572 dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid;
569#endif 573#endif
570 574
@@ -579,6 +583,8 @@ static int vlan_dev_init(struct net_device *dev)
579 583
580 dev->netdev_ops = &vlan_netdev_ops; 584 dev->netdev_ops = &vlan_netdev_ops;
581 585
586 SET_NETDEV_DEVTYPE(dev, &vlan_type);
587
582 if (is_vlan_dev(real_dev)) 588 if (is_vlan_dev(real_dev))
583 subclass = 1; 589 subclass = 1;
584 590
@@ -741,7 +747,7 @@ static const struct net_device_ops vlan_netdev_ops = {
741 .ndo_do_ioctl = vlan_dev_ioctl, 747 .ndo_do_ioctl = vlan_dev_ioctl,
742 .ndo_neigh_setup = vlan_dev_neigh_setup, 748 .ndo_neigh_setup = vlan_dev_neigh_setup,
743 .ndo_get_stats64 = vlan_dev_get_stats64, 749 .ndo_get_stats64 = vlan_dev_get_stats64,
744#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 750#if IS_ENABLED(CONFIG_FCOE)
745 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, 751 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
746 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, 752 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
747 .ndo_fcoe_enable = vlan_dev_fcoe_enable, 753 .ndo_fcoe_enable = vlan_dev_fcoe_enable,
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 53f5244e28f8..250e0b58109c 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -25,6 +25,16 @@ config BATMAN_ADV_BLA
25 more than one mesh node in the same LAN, you can safely remove 25 more than one mesh node in the same LAN, you can safely remove
26 this feature and save some space. 26 this feature and save some space.
27 27
28config BATMAN_ADV_DAT
29 bool "Distributed ARP Table"
30 depends on BATMAN_ADV && INET
31 default n
32 help
33 This option enables DAT (Distributed ARP Table), a DHT based
34 mechanism that increases ARP reliability on sparse wireless
35 mesh networks. If you think that your network does not need
36 this option you can safely remove it and save some space.
37
28config BATMAN_ADV_DEBUG 38config BATMAN_ADV_DEBUG
29 bool "B.A.T.M.A.N. debugging" 39 bool "B.A.T.M.A.N. debugging"
30 depends on BATMAN_ADV 40 depends on BATMAN_ADV
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 8676d2b1d574..e45e3b4e32e3 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -23,6 +23,7 @@ batman-adv-y += bat_iv_ogm.o
23batman-adv-y += bitarray.o 23batman-adv-y += bitarray.o
24batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o 24batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
25batman-adv-y += debugfs.o 25batman-adv-y += debugfs.o
26batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
26batman-adv-y += gateway_client.o 27batman-adv-y += gateway_client.o
27batman-adv-y += gateway_common.o 28batman-adv-y += gateway_common.o
28batman-adv-y += hard-interface.o 29batman-adv-y += hard-interface.o
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index b02b75dae3a8..9f3925a85aab 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -57,20 +57,22 @@ out:
57static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) 57static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
58{ 58{
59 struct batadv_ogm_packet *batadv_ogm_packet; 59 struct batadv_ogm_packet *batadv_ogm_packet;
60 unsigned char *ogm_buff;
60 uint32_t random_seqno; 61 uint32_t random_seqno;
61 int res = -ENOMEM; 62 int res = -ENOMEM;
62 63
63 /* randomize initial seqno to avoid collision */ 64 /* randomize initial seqno to avoid collision */
64 get_random_bytes(&random_seqno, sizeof(random_seqno)); 65 get_random_bytes(&random_seqno, sizeof(random_seqno));
65 atomic_set(&hard_iface->seqno, random_seqno); 66 atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno);
66 67
67 hard_iface->packet_len = BATADV_OGM_HLEN; 68 hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN;
68 hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); 69 ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC);
69 70 if (!ogm_buff)
70 if (!hard_iface->packet_buff)
71 goto out; 71 goto out;
72 72
73 batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; 73 hard_iface->bat_iv.ogm_buff = ogm_buff;
74
75 batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
74 batadv_ogm_packet->header.packet_type = BATADV_IV_OGM; 76 batadv_ogm_packet->header.packet_type = BATADV_IV_OGM;
75 batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION; 77 batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION;
76 batadv_ogm_packet->header.ttl = 2; 78 batadv_ogm_packet->header.ttl = 2;
@@ -87,15 +89,16 @@ out:
87 89
88static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) 90static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
89{ 91{
90 kfree(hard_iface->packet_buff); 92 kfree(hard_iface->bat_iv.ogm_buff);
91 hard_iface->packet_buff = NULL; 93 hard_iface->bat_iv.ogm_buff = NULL;
92} 94}
93 95
94static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) 96static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
95{ 97{
96 struct batadv_ogm_packet *batadv_ogm_packet; 98 struct batadv_ogm_packet *batadv_ogm_packet;
99 unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
97 100
98 batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; 101 batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
99 memcpy(batadv_ogm_packet->orig, 102 memcpy(batadv_ogm_packet->orig,
100 hard_iface->net_dev->dev_addr, ETH_ALEN); 103 hard_iface->net_dev->dev_addr, ETH_ALEN);
101 memcpy(batadv_ogm_packet->prev_sender, 104 memcpy(batadv_ogm_packet->prev_sender,
@@ -106,8 +109,9 @@ static void
106batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) 109batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface)
107{ 110{
108 struct batadv_ogm_packet *batadv_ogm_packet; 111 struct batadv_ogm_packet *batadv_ogm_packet;
112 unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
109 113
110 batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; 114 batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
111 batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP; 115 batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP;
112 batadv_ogm_packet->header.ttl = BATADV_TTL; 116 batadv_ogm_packet->header.ttl = BATADV_TTL;
113} 117}
@@ -407,9 +411,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
407 411
408 if ((atomic_read(&bat_priv->aggregated_ogms)) && 412 if ((atomic_read(&bat_priv->aggregated_ogms)) &&
409 (packet_len < BATADV_MAX_AGGREGATION_BYTES)) 413 (packet_len < BATADV_MAX_AGGREGATION_BYTES))
410 skb_size = BATADV_MAX_AGGREGATION_BYTES + ETH_HLEN; 414 skb_size = BATADV_MAX_AGGREGATION_BYTES;
411 else 415 else
412 skb_size = packet_len + ETH_HLEN; 416 skb_size = packet_len;
417
418 skb_size += ETH_HLEN + NET_IP_ALIGN;
413 419
414 forw_packet_aggr->skb = dev_alloc_skb(skb_size); 420 forw_packet_aggr->skb = dev_alloc_skb(skb_size);
415 if (!forw_packet_aggr->skb) { 421 if (!forw_packet_aggr->skb) {
@@ -418,7 +424,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
418 kfree(forw_packet_aggr); 424 kfree(forw_packet_aggr);
419 goto out; 425 goto out;
420 } 426 }
421 skb_reserve(forw_packet_aggr->skb, ETH_HLEN); 427 skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN);
422 428
423 INIT_HLIST_NODE(&forw_packet_aggr->list); 429 INIT_HLIST_NODE(&forw_packet_aggr->list);
424 430
@@ -590,8 +596,10 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
590static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) 596static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
591{ 597{
592 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 598 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
599 unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff;
593 struct batadv_ogm_packet *batadv_ogm_packet; 600 struct batadv_ogm_packet *batadv_ogm_packet;
594 struct batadv_hard_iface *primary_if; 601 struct batadv_hard_iface *primary_if;
602 int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len;
595 int vis_server, tt_num_changes = 0; 603 int vis_server, tt_num_changes = 0;
596 uint32_t seqno; 604 uint32_t seqno;
597 uint8_t bandwidth; 605 uint8_t bandwidth;
@@ -600,17 +608,16 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
600 primary_if = batadv_primary_if_get_selected(bat_priv); 608 primary_if = batadv_primary_if_get_selected(bat_priv);
601 609
602 if (hard_iface == primary_if) 610 if (hard_iface == primary_if)
603 tt_num_changes = batadv_tt_append_diff(bat_priv, 611 tt_num_changes = batadv_tt_append_diff(bat_priv, ogm_buff,
604 &hard_iface->packet_buff, 612 ogm_buff_len,
605 &hard_iface->packet_len,
606 BATADV_OGM_HLEN); 613 BATADV_OGM_HLEN);
607 614
608 batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; 615 batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff);
609 616
610 /* change sequence number to network order */ 617 /* change sequence number to network order */
611 seqno = (uint32_t)atomic_read(&hard_iface->seqno); 618 seqno = (uint32_t)atomic_read(&hard_iface->bat_iv.ogm_seqno);
612 batadv_ogm_packet->seqno = htonl(seqno); 619 batadv_ogm_packet->seqno = htonl(seqno);
613 atomic_inc(&hard_iface->seqno); 620 atomic_inc(&hard_iface->bat_iv.ogm_seqno);
614 621
615 batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn); 622 batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn);
616 batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc); 623 batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc);
@@ -631,8 +638,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
631 } 638 }
632 639
633 batadv_slide_own_bcast_window(hard_iface); 640 batadv_slide_own_bcast_window(hard_iface);
634 batadv_iv_ogm_queue_add(bat_priv, hard_iface->packet_buff, 641 batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff,
635 hard_iface->packet_len, hard_iface, 1, 642 hard_iface->bat_iv.ogm_buff_len, hard_iface, 1,
636 batadv_iv_ogm_emit_send_time(bat_priv)); 643 batadv_iv_ogm_emit_send_time(bat_priv));
637 644
638 if (primary_if) 645 if (primary_if)
@@ -1015,7 +1022,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
1015 return; 1022 return;
1016 1023
1017 /* could be changed by schedule_own_packet() */ 1024 /* could be changed by schedule_own_packet() */
1018 if_incoming_seqno = atomic_read(&if_incoming->seqno); 1025 if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno);
1019 1026
1020 if (batadv_ogm_packet->flags & BATADV_DIRECTLINK) 1027 if (batadv_ogm_packet->flags & BATADV_DIRECTLINK)
1021 has_directlink_flag = 1; 1028 has_directlink_flag = 1;
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index aea174cdbfbd..5453b17d8df2 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -79,20 +79,17 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits,
79 * or the old packet got delayed somewhere in the network. The 79 * or the old packet got delayed somewhere in the network. The
80 * packet should be dropped without calling this function if the 80 * packet should be dropped without calling this function if the
81 * seqno window is protected. 81 * seqno window is protected.
82 *
83 * seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE
84 * or
85 * seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE
82 */ 86 */
83 if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE || 87 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
84 seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) { 88 "Other host probably restarted!\n");
85 89
86 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 90 bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
87 "Other host probably restarted!\n"); 91 if (set_mark)
88 92 batadv_set_bit(seq_bits, 0);
89 bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
90 if (set_mark)
91 batadv_set_bit(seq_bits, 0);
92
93 return 1;
94 }
95 93
96 /* never reached */ 94 return 1;
97 return 0;
98} 95}
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index fd8d5afec0dd..29a5542aac75 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1585,23 +1585,11 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
1585 struct hlist_head *head; 1585 struct hlist_head *head;
1586 uint32_t i; 1586 uint32_t i;
1587 bool is_own; 1587 bool is_own;
1588 int ret = 0;
1589 uint8_t *primary_addr; 1588 uint8_t *primary_addr;
1590 1589
1591 primary_if = batadv_primary_if_get_selected(bat_priv); 1590 primary_if = batadv_seq_print_text_primary_if_get(seq);
1592 if (!primary_if) { 1591 if (!primary_if)
1593 ret = seq_printf(seq,
1594 "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
1595 net_dev->name);
1596 goto out;
1597 }
1598
1599 if (primary_if->if_status != BATADV_IF_ACTIVE) {
1600 ret = seq_printf(seq,
1601 "BATMAN mesh %s disabled - primary interface not active\n",
1602 net_dev->name);
1603 goto out; 1592 goto out;
1604 }
1605 1593
1606 primary_addr = primary_if->net_dev->dev_addr; 1594 primary_addr = primary_if->net_dev->dev_addr;
1607 seq_printf(seq, 1595 seq_printf(seq,
@@ -1628,7 +1616,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
1628out: 1616out:
1629 if (primary_if) 1617 if (primary_if)
1630 batadv_hardif_free_ref(primary_if); 1618 batadv_hardif_free_ref(primary_if);
1631 return ret; 1619 return 0;
1632} 1620}
1633 1621
1634int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) 1622int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
@@ -1643,23 +1631,11 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
1643 int secs, msecs; 1631 int secs, msecs;
1644 uint32_t i; 1632 uint32_t i;
1645 bool is_own; 1633 bool is_own;
1646 int ret = 0;
1647 uint8_t *primary_addr; 1634 uint8_t *primary_addr;
1648 1635
1649 primary_if = batadv_primary_if_get_selected(bat_priv); 1636 primary_if = batadv_seq_print_text_primary_if_get(seq);
1650 if (!primary_if) { 1637 if (!primary_if)
1651 ret = seq_printf(seq,
1652 "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
1653 net_dev->name);
1654 goto out;
1655 }
1656
1657 if (primary_if->if_status != BATADV_IF_ACTIVE) {
1658 ret = seq_printf(seq,
1659 "BATMAN mesh %s disabled - primary interface not active\n",
1660 net_dev->name);
1661 goto out; 1638 goto out;
1662 }
1663 1639
1664 primary_addr = primary_if->net_dev->dev_addr; 1640 primary_addr = primary_if->net_dev->dev_addr;
1665 seq_printf(seq, 1641 seq_printf(seq,
@@ -1693,5 +1669,5 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
1693out: 1669out:
1694 if (primary_if) 1670 if (primary_if)
1695 batadv_hardif_free_ref(primary_if); 1671 batadv_hardif_free_ref(primary_if);
1696 return ret; 1672 return 0;
1697} 1673}
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 391d4fb2026f..3f679cb2d0e2 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -31,6 +31,7 @@
31#include "vis.h" 31#include "vis.h"
32#include "icmp_socket.h" 32#include "icmp_socket.h"
33#include "bridge_loop_avoidance.h" 33#include "bridge_loop_avoidance.h"
34#include "distributed-arp-table.h"
34 35
35static struct dentry *batadv_debugfs; 36static struct dentry *batadv_debugfs;
36 37
@@ -99,15 +100,17 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
99 100
100static int batadv_log_open(struct inode *inode, struct file *file) 101static int batadv_log_open(struct inode *inode, struct file *file)
101{ 102{
103 if (!try_module_get(THIS_MODULE))
104 return -EBUSY;
105
102 nonseekable_open(inode, file); 106 nonseekable_open(inode, file);
103 file->private_data = inode->i_private; 107 file->private_data = inode->i_private;
104 batadv_inc_module_count();
105 return 0; 108 return 0;
106} 109}
107 110
108static int batadv_log_release(struct inode *inode, struct file *file) 111static int batadv_log_release(struct inode *inode, struct file *file)
109{ 112{
110 batadv_dec_module_count(); 113 module_put(THIS_MODULE);
111 return 0; 114 return 0;
112} 115}
113 116
@@ -278,6 +281,19 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
278 281
279#endif 282#endif
280 283
284#ifdef CONFIG_BATMAN_ADV_DAT
285/**
286 * batadv_dat_cache_open - Prepare file handler for reads from dat_chache
287 * @inode: inode which was opened
288 * @file: file handle to be initialized
289 */
290static int batadv_dat_cache_open(struct inode *inode, struct file *file)
291{
292 struct net_device *net_dev = (struct net_device *)inode->i_private;
293 return single_open(file, batadv_dat_cache_seq_print_text, net_dev);
294}
295#endif
296
281static int batadv_transtable_local_open(struct inode *inode, struct file *file) 297static int batadv_transtable_local_open(struct inode *inode, struct file *file)
282{ 298{
283 struct net_device *net_dev = (struct net_device *)inode->i_private; 299 struct net_device *net_dev = (struct net_device *)inode->i_private;
@@ -317,6 +333,9 @@ static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open);
317static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO, 333static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO,
318 batadv_bla_backbone_table_open); 334 batadv_bla_backbone_table_open);
319#endif 335#endif
336#ifdef CONFIG_BATMAN_ADV_DAT
337static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);
338#endif
320static BATADV_DEBUGINFO(transtable_local, S_IRUGO, 339static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
321 batadv_transtable_local_open); 340 batadv_transtable_local_open);
322static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open); 341static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open);
@@ -329,6 +348,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
329 &batadv_debuginfo_bla_claim_table, 348 &batadv_debuginfo_bla_claim_table,
330 &batadv_debuginfo_bla_backbone_table, 349 &batadv_debuginfo_bla_backbone_table,
331#endif 350#endif
351#ifdef CONFIG_BATMAN_ADV_DAT
352 &batadv_debuginfo_dat_cache,
353#endif
332 &batadv_debuginfo_transtable_local, 354 &batadv_debuginfo_transtable_local,
333 &batadv_debuginfo_vis_data, 355 &batadv_debuginfo_vis_data,
334 NULL, 356 NULL,
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
new file mode 100644
index 000000000000..8e1d89d2b1c1
--- /dev/null
+++ b/net/batman-adv/distributed-arp-table.c
@@ -0,0 +1,1066 @@
1/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
2 *
3 * Antonio Quartulli
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#include <linux/if_ether.h>
21#include <linux/if_arp.h>
22#include <net/arp.h>
23
24#include "main.h"
25#include "hash.h"
26#include "distributed-arp-table.h"
27#include "hard-interface.h"
28#include "originator.h"
29#include "send.h"
30#include "types.h"
31#include "translation-table.h"
32#include "unicast.h"
33
34static void batadv_dat_purge(struct work_struct *work);
35
36/**
37 * batadv_dat_start_timer - initialise the DAT periodic worker
38 * @bat_priv: the bat priv with all the soft interface information
39 */
40static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
41{
42 INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
43 queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work,
44 msecs_to_jiffies(10000));
45}
46
47/**
48 * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly
49 * free it
50 * @dat_entry: the oentry to free
51 */
52static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry)
53{
54 if (atomic_dec_and_test(&dat_entry->refcount))
55 kfree_rcu(dat_entry, rcu);
56}
57
58/**
59 * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not
60 * @dat_entry: the entry to check
61 *
62 * Returns true if the entry has to be purged now, false otherwise
63 */
64static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
65{
66 return batadv_has_timed_out(dat_entry->last_update,
67 BATADV_DAT_ENTRY_TIMEOUT);
68}
69
70/**
71 * __batadv_dat_purge - delete entries from the DAT local storage
72 * @bat_priv: the bat priv with all the soft interface information
73 * @to_purge: function in charge to decide whether an entry has to be purged or
74 * not. This function takes the dat_entry as argument and has to
75 * returns a boolean value: true is the entry has to be deleted,
76 * false otherwise
77 *
78 * Loops over each entry in the DAT local storage and delete it if and only if
79 * the to_purge function passed as argument returns true
80 */
81static void __batadv_dat_purge(struct batadv_priv *bat_priv,
82 bool (*to_purge)(struct batadv_dat_entry *))
83{
84 spinlock_t *list_lock; /* protects write access to the hash lists */
85 struct batadv_dat_entry *dat_entry;
86 struct hlist_node *node, *node_tmp;
87 struct hlist_head *head;
88 uint32_t i;
89
90 if (!bat_priv->dat.hash)
91 return;
92
93 for (i = 0; i < bat_priv->dat.hash->size; i++) {
94 head = &bat_priv->dat.hash->table[i];
95 list_lock = &bat_priv->dat.hash->list_locks[i];
96
97 spin_lock_bh(list_lock);
98 hlist_for_each_entry_safe(dat_entry, node, node_tmp, head,
99 hash_entry) {
100 /* if an helper function has been passed as parameter,
101 * ask it if the entry has to be purged or not
102 */
103 if (to_purge && !to_purge(dat_entry))
104 continue;
105
106 hlist_del_rcu(node);
107 batadv_dat_entry_free_ref(dat_entry);
108 }
109 spin_unlock_bh(list_lock);
110 }
111}
112
113/**
114 * batadv_dat_purge - periodic task that deletes old entries from the local DAT
115 * hash table
116 * @work: kernel work struct
117 */
118static void batadv_dat_purge(struct work_struct *work)
119{
120 struct delayed_work *delayed_work;
121 struct batadv_priv_dat *priv_dat;
122 struct batadv_priv *bat_priv;
123
124 delayed_work = container_of(work, struct delayed_work, work);
125 priv_dat = container_of(delayed_work, struct batadv_priv_dat, work);
126 bat_priv = container_of(priv_dat, struct batadv_priv, dat);
127
128 __batadv_dat_purge(bat_priv, batadv_dat_to_purge);
129 batadv_dat_start_timer(bat_priv);
130}
131
132/**
133 * batadv_compare_dat - comparing function used in the local DAT hash table
134 * @node: node in the local table
135 * @data2: second object to compare the node to
136 *
137 * Returns 1 if the two entry are the same, 0 otherwise
138 */
139static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
140{
141 const void *data1 = container_of(node, struct batadv_dat_entry,
142 hash_entry);
143
144 return (memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0);
145}
146
147/**
148 * batadv_arp_hw_src - extract the hw_src field from an ARP packet
149 * @skb: ARP packet
150 * @hdr_size: size of the possible header before the ARP packet
151 *
152 * Returns the value of the hw_src field in the ARP packet
153 */
154static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
155{
156 uint8_t *addr;
157
158 addr = (uint8_t *)(skb->data + hdr_size);
159 addr += ETH_HLEN + sizeof(struct arphdr);
160
161 return addr;
162}
163
164/**
165 * batadv_arp_ip_src - extract the ip_src field from an ARP packet
166 * @skb: ARP packet
167 * @hdr_size: size of the possible header before the ARP packet
168 *
169 * Returns the value of the ip_src field in the ARP packet
170 */
171static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
172{
173 return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN);
174}
175
176/**
177 * batadv_arp_hw_dst - extract the hw_dst field from an ARP packet
178 * @skb: ARP packet
179 * @hdr_size: size of the possible header before the ARP packet
180 *
181 * Returns the value of the hw_dst field in the ARP packet
182 */
183static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
184{
185 return batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN + 4;
186}
187
188/**
189 * batadv_arp_ip_dst - extract the ip_dst field from an ARP packet
190 * @skb: ARP packet
191 * @hdr_size: size of the possible header before the ARP packet
192 *
193 * Returns the value of the ip_dst field in the ARP packet
194 */
195static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
196{
197 return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN * 2 + 4);
198}
199
200/**
201 * batadv_hash_dat - compute the hash value for an IP address
202 * @data: data to hash
203 * @size: size of the hash table
204 *
205 * Returns the selected index in the hash table for the given data
206 */
207static uint32_t batadv_hash_dat(const void *data, uint32_t size)
208{
209 const unsigned char *key = data;
210 uint32_t hash = 0;
211 size_t i;
212
213 for (i = 0; i < 4; i++) {
214 hash += key[i];
215 hash += (hash << 10);
216 hash ^= (hash >> 6);
217 }
218
219 hash += (hash << 3);
220 hash ^= (hash >> 11);
221 hash += (hash << 15);
222
223 return hash % size;
224}
225
226/**
227 * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash
228 * table
229 * @bat_priv: the bat priv with all the soft interface information
230 * @ip: search key
231 *
232 * Returns the dat_entry if found, NULL otherwise
233 */
234static struct batadv_dat_entry *
235batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
236{
237 struct hlist_head *head;
238 struct hlist_node *node;
239 struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL;
240 struct batadv_hashtable *hash = bat_priv->dat.hash;
241 uint32_t index;
242
243 if (!hash)
244 return NULL;
245
246 index = batadv_hash_dat(&ip, hash->size);
247 head = &hash->table[index];
248
249 rcu_read_lock();
250 hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) {
251 if (dat_entry->ip != ip)
252 continue;
253
254 if (!atomic_inc_not_zero(&dat_entry->refcount))
255 continue;
256
257 dat_entry_tmp = dat_entry;
258 break;
259 }
260 rcu_read_unlock();
261
262 return dat_entry_tmp;
263}
264
265/**
266 * batadv_dat_entry_add - add a new dat entry or update it if already exists
267 * @bat_priv: the bat priv with all the soft interface information
268 * @ip: ipv4 to add/edit
269 * @mac_addr: mac address to assign to the given ipv4
270 */
271static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
272 uint8_t *mac_addr)
273{
274 struct batadv_dat_entry *dat_entry;
275 int hash_added;
276
277 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip);
278 /* if this entry is already known, just update it */
279 if (dat_entry) {
280 if (!batadv_compare_eth(dat_entry->mac_addr, mac_addr))
281 memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN);
282 dat_entry->last_update = jiffies;
283 batadv_dbg(BATADV_DBG_DAT, bat_priv,
284 "Entry updated: %pI4 %pM\n", &dat_entry->ip,
285 dat_entry->mac_addr);
286 goto out;
287 }
288
289 dat_entry = kmalloc(sizeof(*dat_entry), GFP_ATOMIC);
290 if (!dat_entry)
291 goto out;
292
293 dat_entry->ip = ip;
294 memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN);
295 dat_entry->last_update = jiffies;
296 atomic_set(&dat_entry->refcount, 2);
297
298 hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
299 batadv_hash_dat, &dat_entry->ip,
300 &dat_entry->hash_entry);
301
302 if (unlikely(hash_added != 0)) {
303 /* remove the reference for the hash */
304 batadv_dat_entry_free_ref(dat_entry);
305 goto out;
306 }
307
308 batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM\n",
309 &dat_entry->ip, dat_entry->mac_addr);
310
311out:
312 if (dat_entry)
313 batadv_dat_entry_free_ref(dat_entry);
314}
315
316#ifdef CONFIG_BATMAN_ADV_DEBUG
317
318/**
319 * batadv_dbg_arp - print a debug message containing all the ARP packet details
320 * @bat_priv: the bat priv with all the soft interface information
321 * @skb: ARP packet
322 * @type: ARP type
323 * @hdr_size: size of the possible header before the ARP packet
324 * @msg: message to print together with the debugging information
325 */
326static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
327 uint16_t type, int hdr_size, char *msg)
328{
329 struct batadv_unicast_4addr_packet *unicast_4addr_packet;
330 struct batadv_bcast_packet *bcast_pkt;
331 uint8_t *orig_addr;
332 __be32 ip_src, ip_dst;
333
334 if (msg)
335 batadv_dbg(BATADV_DBG_DAT, bat_priv, "%s\n", msg);
336
337 ip_src = batadv_arp_ip_src(skb, hdr_size);
338 ip_dst = batadv_arp_ip_dst(skb, hdr_size);
339 batadv_dbg(BATADV_DBG_DAT, bat_priv,
340 "ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]\n",
341 batadv_arp_hw_src(skb, hdr_size), &ip_src,
342 batadv_arp_hw_dst(skb, hdr_size), &ip_dst);
343
344 if (hdr_size == 0)
345 return;
346
347 /* if the ARP packet is encapsulated in a batman packet, let's print
348 * some debug messages
349 */
350 unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
351
352 switch (unicast_4addr_packet->u.header.packet_type) {
353 case BATADV_UNICAST:
354 batadv_dbg(BATADV_DBG_DAT, bat_priv,
355 "* encapsulated within a UNICAST packet\n");
356 break;
357 case BATADV_UNICAST_4ADDR:
358 batadv_dbg(BATADV_DBG_DAT, bat_priv,
359 "* encapsulated within a UNICAST_4ADDR packet (src: %pM)\n",
360 unicast_4addr_packet->src);
361 switch (unicast_4addr_packet->subtype) {
362 case BATADV_P_DAT_DHT_PUT:
363 batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DAT_DHT_PUT\n");
364 break;
365 case BATADV_P_DAT_DHT_GET:
366 batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DAT_DHT_GET\n");
367 break;
368 case BATADV_P_DAT_CACHE_REPLY:
369 batadv_dbg(BATADV_DBG_DAT, bat_priv,
370 "* type: DAT_CACHE_REPLY\n");
371 break;
372 case BATADV_P_DATA:
373 batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DATA\n");
374 break;
375 default:
376 batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: Unknown (%u)!\n",
377 unicast_4addr_packet->u.header.packet_type);
378 }
379 break;
380 case BATADV_BCAST:
381 bcast_pkt = (struct batadv_bcast_packet *)unicast_4addr_packet;
382 orig_addr = bcast_pkt->orig;
383 batadv_dbg(BATADV_DBG_DAT, bat_priv,
384 "* encapsulated within a BCAST packet (src: %pM)\n",
385 orig_addr);
386 break;
387 default:
388 batadv_dbg(BATADV_DBG_DAT, bat_priv,
389 "* encapsulated within an unknown packet type (0x%x)\n",
390 unicast_4addr_packet->u.header.packet_type);
391 }
392}
393
394#else
395
396static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
397 uint16_t type, int hdr_size, char *msg)
398{
399}
400
401#endif /* CONFIG_BATMAN_ADV_DEBUG */
402
403/**
404 * batadv_is_orig_node_eligible - check whether a node can be a DHT candidate
405 * @res: the array with the already selected candidates
406 * @select: number of already selected candidates
407 * @tmp_max: address of the currently evaluated node
408 * @max: current round max address
409 * @last_max: address of the last selected candidate
410 * @candidate: orig_node under evaluation
411 * @max_orig_node: last selected candidate
412 *
413 * Returns true if the node has been elected as next candidate or false othrwise
414 */
415static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
416 int select, batadv_dat_addr_t tmp_max,
417 batadv_dat_addr_t max,
418 batadv_dat_addr_t last_max,
419 struct batadv_orig_node *candidate,
420 struct batadv_orig_node *max_orig_node)
421{
422 bool ret = false;
423 int j;
424
425 /* Check if this node has already been selected... */
426 for (j = 0; j < select; j++)
427 if (res[j].orig_node == candidate)
428 break;
429 /* ..and possibly skip it */
430 if (j < select)
431 goto out;
432 /* sanity check: has it already been selected? This should not happen */
433 if (tmp_max > last_max)
434 goto out;
435 /* check if during this iteration an originator with a closer dht
436 * address has already been found
437 */
438 if (tmp_max < max)
439 goto out;
440 /* this is an hash collision with the temporary selected node. Choose
441 * the one with the lowest address
442 */
443 if ((tmp_max == max) &&
444 (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0))
445 goto out;
446
447 ret = true;
448out:
449 return ret;
450}
451
452/**
453 * batadv_choose_next_candidate - select the next DHT candidate
454 * @bat_priv: the bat priv with all the soft interface information
455 * @cands: candidates array
456 * @select: number of candidates already present in the array
457 * @ip_key: key to look up in the DHT
458 * @last_max: pointer where the address of the selected candidate will be saved
459 */
460static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
461 struct batadv_dat_candidate *cands,
462 int select, batadv_dat_addr_t ip_key,
463 batadv_dat_addr_t *last_max)
464{
465 batadv_dat_addr_t max = 0, tmp_max = 0;
466 struct batadv_orig_node *orig_node, *max_orig_node = NULL;
467 struct batadv_hashtable *hash = bat_priv->orig_hash;
468 struct hlist_node *node;
469 struct hlist_head *head;
470 int i;
471
472 /* if no node is eligible as candidate, leave the candidate type as
473 * NOT_FOUND
474 */
475 cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND;
476
477 /* iterate over the originator list and find the node with closest
478 * dat_address which has not been selected yet
479 */
480 for (i = 0; i < hash->size; i++) {
481 head = &hash->table[i];
482
483 rcu_read_lock();
484 hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
485 /* the dht space is a ring and addresses are unsigned */
486 tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr +
487 ip_key;
488
489 if (!batadv_is_orig_node_eligible(cands, select,
490 tmp_max, max,
491 *last_max, orig_node,
492 max_orig_node))
493 continue;
494
495 if (!atomic_inc_not_zero(&orig_node->refcount))
496 continue;
497
498 max = tmp_max;
499 if (max_orig_node)
500 batadv_orig_node_free_ref(max_orig_node);
501 max_orig_node = orig_node;
502 }
503 rcu_read_unlock();
504 }
505 if (max_orig_node) {
506 cands[select].type = BATADV_DAT_CANDIDATE_ORIG;
507 cands[select].orig_node = max_orig_node;
508 batadv_dbg(BATADV_DBG_DAT, bat_priv,
509 "dat_select_candidates() %d: selected %pM addr=%u dist=%u\n",
510 select, max_orig_node->orig, max_orig_node->dat_addr,
511 max);
512 }
513 *last_max = max;
514}
515
516/**
517 * batadv_dat_select_candidates - selects the nodes which the DHT message has to
518 * be sent to
519 * @bat_priv: the bat priv with all the soft interface information
520 * @ip_dst: ipv4 to look up in the DHT
521 *
522 * An originator O is selected if and only if its DHT_ID value is one of three
523 * closest values (from the LEFT, with wrap around if needed) then the hash
524 * value of the key. ip_dst is the key.
525 *
526 * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM
527 */
528static struct batadv_dat_candidate *
529batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
530{
531 int select;
532 batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
533 struct batadv_dat_candidate *res;
534
535 if (!bat_priv->orig_hash)
536 return NULL;
537
538 res = kmalloc(BATADV_DAT_CANDIDATES_NUM * sizeof(*res), GFP_ATOMIC);
539 if (!res)
540 return NULL;
541
542 ip_key = (batadv_dat_addr_t)batadv_hash_dat(&ip_dst,
543 BATADV_DAT_ADDR_MAX);
544
545 batadv_dbg(BATADV_DBG_DAT, bat_priv,
546 "dat_select_candidates(): IP=%pI4 hash(IP)=%u\n", &ip_dst,
547 ip_key);
548
549 for (select = 0; select < BATADV_DAT_CANDIDATES_NUM; select++)
550 batadv_choose_next_candidate(bat_priv, res, select, ip_key,
551 &last_max);
552
553 return res;
554}
555
556/**
557 * batadv_dat_send_data - send a payload to the selected candidates
558 * @bat_priv: the bat priv with all the soft interface information
559 * @skb: payload to send
560 * @ip: the DHT key
561 * @packet_subtype: unicast4addr packet subtype to use
562 *
563 * In this function the skb is copied by means of pskb_copy() and is sent as
564 * unicast packet to each of the selected candidates
565 *
566 * Returns true if the packet is sent to at least one candidate, false otherwise
567 */
568static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
569 struct sk_buff *skb, __be32 ip,
570 int packet_subtype)
571{
572 int i;
573 bool ret = false;
574 int send_status;
575 struct batadv_neigh_node *neigh_node = NULL;
576 struct sk_buff *tmp_skb;
577 struct batadv_dat_candidate *cand;
578
579 cand = batadv_dat_select_candidates(bat_priv, ip);
580 if (!cand)
581 goto out;
582
583 batadv_dbg(BATADV_DBG_DAT, bat_priv, "DHT_SEND for %pI4\n", &ip);
584
585 for (i = 0; i < BATADV_DAT_CANDIDATES_NUM; i++) {
586 if (cand[i].type == BATADV_DAT_CANDIDATE_NOT_FOUND)
587 continue;
588
589 neigh_node = batadv_orig_node_get_router(cand[i].orig_node);
590 if (!neigh_node)
591 goto free_orig;
592
593 tmp_skb = pskb_copy(skb, GFP_ATOMIC);
594 if (!batadv_unicast_4addr_prepare_skb(bat_priv, tmp_skb,
595 cand[i].orig_node,
596 packet_subtype)) {
597 kfree_skb(tmp_skb);
598 goto free_neigh;
599 }
600
601 send_status = batadv_send_skb_packet(tmp_skb,
602 neigh_node->if_incoming,
603 neigh_node->addr);
604 if (send_status == NET_XMIT_SUCCESS) {
605 /* count the sent packet */
606 switch (packet_subtype) {
607 case BATADV_P_DAT_DHT_GET:
608 batadv_inc_counter(bat_priv,
609 BATADV_CNT_DAT_GET_TX);
610 break;
611 case BATADV_P_DAT_DHT_PUT:
612 batadv_inc_counter(bat_priv,
613 BATADV_CNT_DAT_PUT_TX);
614 break;
615 }
616
617 /* packet sent to a candidate: return true */
618 ret = true;
619 }
620free_neigh:
621 batadv_neigh_node_free_ref(neigh_node);
622free_orig:
623 batadv_orig_node_free_ref(cand[i].orig_node);
624 }
625
626out:
627 kfree(cand);
628 return ret;
629}
630
631/**
632 * batadv_dat_hash_free - free the local DAT hash table
633 * @bat_priv: the bat priv with all the soft interface information
634 */
635static void batadv_dat_hash_free(struct batadv_priv *bat_priv)
636{
637 if (!bat_priv->dat.hash)
638 return;
639
640 __batadv_dat_purge(bat_priv, NULL);
641
642 batadv_hash_destroy(bat_priv->dat.hash);
643
644 bat_priv->dat.hash = NULL;
645}
646
647/**
648 * batadv_dat_init - initialise the DAT internals
649 * @bat_priv: the bat priv with all the soft interface information
650 */
651int batadv_dat_init(struct batadv_priv *bat_priv)
652{
653 if (bat_priv->dat.hash)
654 return 0;
655
656 bat_priv->dat.hash = batadv_hash_new(1024);
657
658 if (!bat_priv->dat.hash)
659 return -ENOMEM;
660
661 batadv_dat_start_timer(bat_priv);
662
663 return 0;
664}
665
666/**
667 * batadv_dat_free - free the DAT internals
668 * @bat_priv: the bat priv with all the soft interface information
669 */
670void batadv_dat_free(struct batadv_priv *bat_priv)
671{
672 cancel_delayed_work_sync(&bat_priv->dat.work);
673
674 batadv_dat_hash_free(bat_priv);
675}
676
677/**
678 * batadv_dat_cache_seq_print_text - print the local DAT hash table
679 * @seq: seq file to print on
680 * @offset: not used
681 */
682int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
683{
684 struct net_device *net_dev = (struct net_device *)seq->private;
685 struct batadv_priv *bat_priv = netdev_priv(net_dev);
686 struct batadv_hashtable *hash = bat_priv->dat.hash;
687 struct batadv_dat_entry *dat_entry;
688 struct batadv_hard_iface *primary_if;
689 struct hlist_node *node;
690 struct hlist_head *head;
691 unsigned long last_seen_jiffies;
692 int last_seen_msecs, last_seen_secs, last_seen_mins;
693 uint32_t i;
694
695 primary_if = batadv_seq_print_text_primary_if_get(seq);
696 if (!primary_if)
697 goto out;
698
699 seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name);
700 seq_printf(seq, " %-7s %-13s %5s\n", "IPv4", "MAC",
701 "last-seen");
702
703 for (i = 0; i < hash->size; i++) {
704 head = &hash->table[i];
705
706 rcu_read_lock();
707 hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) {
708 last_seen_jiffies = jiffies - dat_entry->last_update;
709 last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
710 last_seen_mins = last_seen_msecs / 60000;
711 last_seen_msecs = last_seen_msecs % 60000;
712 last_seen_secs = last_seen_msecs / 1000;
713
714 seq_printf(seq, " * %15pI4 %14pM %6i:%02i\n",
715 &dat_entry->ip, dat_entry->mac_addr,
716 last_seen_mins, last_seen_secs);
717 }
718 rcu_read_unlock();
719 }
720
721out:
722 if (primary_if)
723 batadv_hardif_free_ref(primary_if);
724 return 0;
725}
726
727/**
728 * batadv_arp_get_type - parse an ARP packet and gets the type
729 * @bat_priv: the bat priv with all the soft interface information
730 * @skb: packet to analyse
731 * @hdr_size: size of the possible header before the ARP packet in the skb
732 *
733 * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise
734 */
735static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv,
736 struct sk_buff *skb, int hdr_size)
737{
738 struct arphdr *arphdr;
739 struct ethhdr *ethhdr;
740 __be32 ip_src, ip_dst;
741 uint16_t type = 0;
742
743 /* pull the ethernet header */
744 if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN)))
745 goto out;
746
747 ethhdr = (struct ethhdr *)(skb->data + hdr_size);
748
749 if (ethhdr->h_proto != htons(ETH_P_ARP))
750 goto out;
751
752 /* pull the ARP payload */
753 if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN +
754 arp_hdr_len(skb->dev))))
755 goto out;
756
757 arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN);
758
759 /* Check whether the ARP packet carries a valid
760 * IP information
761 */
762 if (arphdr->ar_hrd != htons(ARPHRD_ETHER))
763 goto out;
764
765 if (arphdr->ar_pro != htons(ETH_P_IP))
766 goto out;
767
768 if (arphdr->ar_hln != ETH_ALEN)
769 goto out;
770
771 if (arphdr->ar_pln != 4)
772 goto out;
773
774 /* Check for bad reply/request. If the ARP message is not sane, DAT
775 * will simply ignore it
776 */
777 ip_src = batadv_arp_ip_src(skb, hdr_size);
778 ip_dst = batadv_arp_ip_dst(skb, hdr_size);
779 if (ipv4_is_loopback(ip_src) || ipv4_is_multicast(ip_src) ||
780 ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst))
781 goto out;
782
783 type = ntohs(arphdr->ar_op);
784out:
785 return type;
786}
787
788/**
789 * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to
790 * answer using DAT
791 * @bat_priv: the bat priv with all the soft interface information
792 * @skb: packet to check
793 *
794 * Returns true if the message has been sent to the dht candidates, false
795 * otherwise. In case of true the message has to be enqueued to permit the
796 * fallback
797 */
798bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
799 struct sk_buff *skb)
800{
801 uint16_t type = 0;
802 __be32 ip_dst, ip_src;
803 uint8_t *hw_src;
804 bool ret = false;
805 struct batadv_dat_entry *dat_entry = NULL;
806 struct sk_buff *skb_new;
807 struct batadv_hard_iface *primary_if = NULL;
808
809 if (!atomic_read(&bat_priv->distributed_arp_table))
810 goto out;
811
812 type = batadv_arp_get_type(bat_priv, skb, 0);
813 /* If the node gets an ARP_REQUEST it has to send a DHT_GET unicast
814 * message to the selected DHT candidates
815 */
816 if (type != ARPOP_REQUEST)
817 goto out;
818
819 batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REQUEST");
820
821 ip_src = batadv_arp_ip_src(skb, 0);
822 hw_src = batadv_arp_hw_src(skb, 0);
823 ip_dst = batadv_arp_ip_dst(skb, 0);
824
825 batadv_dat_entry_add(bat_priv, ip_src, hw_src);
826
827 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
828 if (dat_entry) {
829 primary_if = batadv_primary_if_get_selected(bat_priv);
830 if (!primary_if)
831 goto out;
832
833 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
834 primary_if->soft_iface, ip_dst, hw_src,
835 dat_entry->mac_addr, hw_src);
836 if (!skb_new)
837 goto out;
838
839 skb_reset_mac_header(skb_new);
840 skb_new->protocol = eth_type_trans(skb_new,
841 primary_if->soft_iface);
842 bat_priv->stats.rx_packets++;
843 bat_priv->stats.rx_bytes += skb->len + ETH_HLEN;
844 primary_if->soft_iface->last_rx = jiffies;
845
846 netif_rx(skb_new);
847 batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
848 ret = true;
849 } else {
850 /* Send the request on the DHT */
851 ret = batadv_dat_send_data(bat_priv, skb, ip_dst,
852 BATADV_P_DAT_DHT_GET);
853 }
854out:
855 if (dat_entry)
856 batadv_dat_entry_free_ref(dat_entry);
857 if (primary_if)
858 batadv_hardif_free_ref(primary_if);
859 return ret;
860}
861
862/**
863 * batadv_dat_snoop_incoming_arp_request - snoop the ARP request and try to
864 * answer using the local DAT storage
865 * @bat_priv: the bat priv with all the soft interface information
866 * @skb: packet to check
867 * @hdr_size: size of the encapsulation header
868 *
869 * Returns true if the request has been answered, false otherwise
870 */
871bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
872 struct sk_buff *skb, int hdr_size)
873{
874 uint16_t type;
875 __be32 ip_src, ip_dst;
876 uint8_t *hw_src;
877 struct sk_buff *skb_new;
878 struct batadv_hard_iface *primary_if = NULL;
879 struct batadv_dat_entry *dat_entry = NULL;
880 bool ret = false;
881 int err;
882
883 if (!atomic_read(&bat_priv->distributed_arp_table))
884 goto out;
885
886 type = batadv_arp_get_type(bat_priv, skb, hdr_size);
887 if (type != ARPOP_REQUEST)
888 goto out;
889
890 hw_src = batadv_arp_hw_src(skb, hdr_size);
891 ip_src = batadv_arp_ip_src(skb, hdr_size);
892 ip_dst = batadv_arp_ip_dst(skb, hdr_size);
893
894 batadv_dbg_arp(bat_priv, skb, type, hdr_size,
895 "Parsing incoming ARP REQUEST");
896
897 batadv_dat_entry_add(bat_priv, ip_src, hw_src);
898
899 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
900 if (!dat_entry)
901 goto out;
902
903 primary_if = batadv_primary_if_get_selected(bat_priv);
904 if (!primary_if)
905 goto out;
906
907 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
908 primary_if->soft_iface, ip_dst, hw_src,
909 dat_entry->mac_addr, hw_src);
910
911 if (!skb_new)
912 goto out;
913
914 /* to preserve backwards compatibility, here the node has to answer
915 * using the same packet type it received for the request. This is due
916 * to that if a node is not using the 4addr packet format it may not
917 * support it.
918 */
919 if (hdr_size == sizeof(struct batadv_unicast_4addr_packet))
920 err = batadv_unicast_4addr_send_skb(bat_priv, skb_new,
921 BATADV_P_DAT_CACHE_REPLY);
922 else
923 err = batadv_unicast_send_skb(bat_priv, skb_new);
924
925 if (!err) {
926 batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX);
927 ret = true;
928 }
929out:
930 if (dat_entry)
931 batadv_dat_entry_free_ref(dat_entry);
932 if (primary_if)
933 batadv_hardif_free_ref(primary_if);
934 if (ret)
935 kfree_skb(skb);
936 return ret;
937}
938
939/**
940 * batadv_dat_snoop_outgoing_arp_reply - snoop the ARP reply and fill the DHT
941 * @bat_priv: the bat priv with all the soft interface information
942 * @skb: packet to check
943 */
944void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
945 struct sk_buff *skb)
946{
947 uint16_t type;
948 __be32 ip_src, ip_dst;
949 uint8_t *hw_src, *hw_dst;
950
951 if (!atomic_read(&bat_priv->distributed_arp_table))
952 return;
953
954 type = batadv_arp_get_type(bat_priv, skb, 0);
955 if (type != ARPOP_REPLY)
956 return;
957
958 batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REPLY");
959
960 hw_src = batadv_arp_hw_src(skb, 0);
961 ip_src = batadv_arp_ip_src(skb, 0);
962 hw_dst = batadv_arp_hw_dst(skb, 0);
963 ip_dst = batadv_arp_ip_dst(skb, 0);
964
965 batadv_dat_entry_add(bat_priv, ip_src, hw_src);
966 batadv_dat_entry_add(bat_priv, ip_dst, hw_dst);
967
968 /* Send the ARP reply to the candidates for both the IP addresses that
969 * the node got within the ARP reply
970 */
971 batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
972 batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
973}
974/**
975 * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local
976 * DAT storage only
977 * @bat_priv: the bat priv with all the soft interface information
978 * @skb: packet to check
979 * @hdr_size: siaze of the encapsulation header
980 */
981bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
982 struct sk_buff *skb, int hdr_size)
983{
984 uint16_t type;
985 __be32 ip_src, ip_dst;
986 uint8_t *hw_src, *hw_dst;
987 bool ret = false;
988
989 if (!atomic_read(&bat_priv->distributed_arp_table))
990 goto out;
991
992 type = batadv_arp_get_type(bat_priv, skb, hdr_size);
993 if (type != ARPOP_REPLY)
994 goto out;
995
996 batadv_dbg_arp(bat_priv, skb, type, hdr_size,
997 "Parsing incoming ARP REPLY");
998
999 hw_src = batadv_arp_hw_src(skb, hdr_size);
1000 ip_src = batadv_arp_ip_src(skb, hdr_size);
1001 hw_dst = batadv_arp_hw_dst(skb, hdr_size);
1002 ip_dst = batadv_arp_ip_dst(skb, hdr_size);
1003
1004 /* Update our internal cache with both the IP addresses the node got
1005 * within the ARP reply
1006 */
1007 batadv_dat_entry_add(bat_priv, ip_src, hw_src);
1008 batadv_dat_entry_add(bat_priv, ip_dst, hw_dst);
1009
1010 /* if this REPLY is directed to a client of mine, let's deliver the
1011 * packet to the interface
1012 */
1013 ret = !batadv_is_my_client(bat_priv, hw_dst);
1014out:
1015 /* if ret == false -> packet has to be delivered to the interface */
1016 return ret;
1017}
1018
1019/**
1020 * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped
1021 * (because the node has already got the reply via DAT) or not
1022 * @bat_priv: the bat priv with all the soft interface information
1023 * @forw_packet: the broadcast packet
1024 *
1025 * Returns true if the node can drop the packet, false otherwise
1026 */
1027bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
1028 struct batadv_forw_packet *forw_packet)
1029{
1030 uint16_t type;
1031 __be32 ip_dst;
1032 struct batadv_dat_entry *dat_entry = NULL;
1033 bool ret = false;
1034 const size_t bcast_len = sizeof(struct batadv_bcast_packet);
1035
1036 if (!atomic_read(&bat_priv->distributed_arp_table))
1037 goto out;
1038
1039 /* If this packet is an ARP_REQUEST and the node already has the
1040 * information that it is going to ask, then the packet can be dropped
1041 */
1042 if (forw_packet->num_packets)
1043 goto out;
1044
1045 type = batadv_arp_get_type(bat_priv, forw_packet->skb, bcast_len);
1046 if (type != ARPOP_REQUEST)
1047 goto out;
1048
1049 ip_dst = batadv_arp_ip_dst(forw_packet->skb, bcast_len);
1050 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
1051 /* check if the node already got this entry */
1052 if (!dat_entry) {
1053 batadv_dbg(BATADV_DBG_DAT, bat_priv,
1054 "ARP Request for %pI4: fallback\n", &ip_dst);
1055 goto out;
1056 }
1057
1058 batadv_dbg(BATADV_DBG_DAT, bat_priv,
1059 "ARP Request for %pI4: fallback prevented\n", &ip_dst);
1060 ret = true;
1061
1062out:
1063 if (dat_entry)
1064 batadv_dat_entry_free_ref(dat_entry);
1065 return ret;
1066}
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
new file mode 100644
index 000000000000..d060c033e7de
--- /dev/null
+++ b/net/batman-adv/distributed-arp-table.h
@@ -0,0 +1,167 @@
1/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors:
2 *
3 * Antonio Quartulli
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#ifndef _NET_BATMAN_ADV_ARP_H_
21#define _NET_BATMAN_ADV_ARP_H_
22
23#ifdef CONFIG_BATMAN_ADV_DAT
24
25#include "types.h"
26#include "originator.h"
27
28#include <linux/if_arp.h>
29
30#define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0)
31
32bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
33 struct sk_buff *skb);
34bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
35 struct sk_buff *skb, int hdr_size);
36void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
37 struct sk_buff *skb);
38bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
39 struct sk_buff *skb, int hdr_size);
40bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
41 struct batadv_forw_packet *forw_packet);
42
43/**
44 * batadv_dat_init_orig_node_addr - assign a DAT address to the orig_node
45 * @orig_node: the node to assign the DAT address to
46 */
47static inline void
48batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node)
49{
50 uint32_t addr;
51
52 addr = batadv_choose_orig(orig_node->orig, BATADV_DAT_ADDR_MAX);
53 orig_node->dat_addr = (batadv_dat_addr_t)addr;
54}
55
56/**
57 * batadv_dat_init_own_addr - assign a DAT address to the node itself
58 * @bat_priv: the bat priv with all the soft interface information
59 * @primary_if: a pointer to the primary interface
60 */
61static inline void
62batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
63 struct batadv_hard_iface *primary_if)
64{
65 uint32_t addr;
66
67 addr = batadv_choose_orig(primary_if->net_dev->dev_addr,
68 BATADV_DAT_ADDR_MAX);
69
70 bat_priv->dat.addr = (batadv_dat_addr_t)addr;
71}
72
73int batadv_dat_init(struct batadv_priv *bat_priv);
74void batadv_dat_free(struct batadv_priv *bat_priv);
75int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
76
77/**
78 * batadv_dat_inc_counter - increment the correct DAT packet counter
79 * @bat_priv: the bat priv with all the soft interface information
80 * @subtype: the 4addr subtype of the packet to be counted
81 *
82 * Updates the ethtool statistics for the received packet if it is a DAT subtype
83 */
84static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
85 uint8_t subtype)
86{
87 switch (subtype) {
88 case BATADV_P_DAT_DHT_GET:
89 batadv_inc_counter(bat_priv,
90 BATADV_CNT_DAT_GET_RX);
91 break;
92 case BATADV_P_DAT_DHT_PUT:
93 batadv_inc_counter(bat_priv,
94 BATADV_CNT_DAT_PUT_RX);
95 break;
96 }
97}
98
99#else
100
101static inline bool
102batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
103 struct sk_buff *skb)
104{
105 return false;
106}
107
108static inline bool
109batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
110 struct sk_buff *skb, int hdr_size)
111{
112 return false;
113}
114
115static inline bool
116batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
117 struct sk_buff *skb)
118{
119 return false;
120}
121
122static inline bool
123batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
124 struct sk_buff *skb, int hdr_size)
125{
126 return false;
127}
128
129static inline bool
130batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
131 struct batadv_forw_packet *forw_packet)
132{
133 return false;
134}
135
136static inline void
137batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node)
138{
139}
140
141static inline void batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
142 struct batadv_hard_iface *iface)
143{
144}
145
146static inline void batadv_arp_change_timeout(struct net_device *soft_iface,
147 const char *name)
148{
149}
150
151static inline int batadv_dat_init(struct batadv_priv *bat_priv)
152{
153 return 0;
154}
155
156static inline void batadv_dat_free(struct batadv_priv *bat_priv)
157{
158}
159
160static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
161 uint8_t subtype)
162{
163}
164
165#endif /* CONFIG_BATMAN_ADV_DAT */
166
167#endif /* _NET_BATMAN_ADV_ARP_H_ */
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 15d67abc10a4..dd07c7e3654f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -477,22 +477,11 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
477 struct batadv_hard_iface *primary_if; 477 struct batadv_hard_iface *primary_if;
478 struct batadv_gw_node *gw_node; 478 struct batadv_gw_node *gw_node;
479 struct hlist_node *node; 479 struct hlist_node *node;
480 int gw_count = 0, ret = 0; 480 int gw_count = 0;
481 481
482 primary_if = batadv_primary_if_get_selected(bat_priv); 482 primary_if = batadv_seq_print_text_primary_if_get(seq);
483 if (!primary_if) { 483 if (!primary_if)
484 ret = seq_printf(seq,
485 "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
486 net_dev->name);
487 goto out; 484 goto out;
488 }
489
490 if (primary_if->if_status != BATADV_IF_ACTIVE) {
491 ret = seq_printf(seq,
492 "BATMAN mesh %s disabled - primary interface not active\n",
493 net_dev->name);
494 goto out;
495 }
496 485
497 seq_printf(seq, 486 seq_printf(seq,
498 " %-12s (%s/%i) %17s [%10s]: gw_class ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", 487 " %-12s (%s/%i) %17s [%10s]: gw_class ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
@@ -519,7 +508,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
519out: 508out:
520 if (primary_if) 509 if (primary_if)
521 batadv_hardif_free_ref(primary_if); 510 batadv_hardif_free_ref(primary_if);
522 return ret; 511 return 0;
523} 512}
524 513
525static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len) 514static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index d112fd6750b0..6b7a5d3eeb77 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include "main.h" 20#include "main.h"
21#include "distributed-arp-table.h"
21#include "hard-interface.h" 22#include "hard-interface.h"
22#include "soft-interface.h" 23#include "soft-interface.h"
23#include "send.h" 24#include "send.h"
@@ -109,6 +110,8 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
109 if (!primary_if) 110 if (!primary_if)
110 goto out; 111 goto out;
111 112
113 batadv_dat_init_own_addr(bat_priv, primary_if);
114
112 skb = bat_priv->vis.my_info->skb_packet; 115 skb = bat_priv->vis.my_info->skb_packet;
113 vis_packet = (struct batadv_vis_packet *)skb->data; 116 vis_packet = (struct batadv_vis_packet *)skb->data;
114 memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN); 117 memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN);
@@ -450,8 +453,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
450 /* This can't be called via a bat_priv callback because 453 /* This can't be called via a bat_priv callback because
451 * we have no bat_priv yet. 454 * we have no bat_priv yet.
452 */ 455 */
453 atomic_set(&hard_iface->seqno, 1); 456 atomic_set(&hard_iface->bat_iv.ogm_seqno, 1);
454 hard_iface->packet_buff = NULL; 457 hard_iface->bat_iv.ogm_buff = NULL;
455 458
456 return hard_iface; 459 return hard_iface;
457 460
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index bde3cf747507..87ca8095b011 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -42,12 +42,16 @@ static int batadv_socket_open(struct inode *inode, struct file *file)
42 unsigned int i; 42 unsigned int i;
43 struct batadv_socket_client *socket_client; 43 struct batadv_socket_client *socket_client;
44 44
45 if (!try_module_get(THIS_MODULE))
46 return -EBUSY;
47
45 nonseekable_open(inode, file); 48 nonseekable_open(inode, file);
46 49
47 socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL); 50 socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL);
48 51 if (!socket_client) {
49 if (!socket_client) 52 module_put(THIS_MODULE);
50 return -ENOMEM; 53 return -ENOMEM;
54 }
51 55
52 for (i = 0; i < ARRAY_SIZE(batadv_socket_client_hash); i++) { 56 for (i = 0; i < ARRAY_SIZE(batadv_socket_client_hash); i++) {
53 if (!batadv_socket_client_hash[i]) { 57 if (!batadv_socket_client_hash[i]) {
@@ -59,6 +63,7 @@ static int batadv_socket_open(struct inode *inode, struct file *file)
59 if (i == ARRAY_SIZE(batadv_socket_client_hash)) { 63 if (i == ARRAY_SIZE(batadv_socket_client_hash)) {
60 pr_err("Error - can't add another packet client: maximum number of clients reached\n"); 64 pr_err("Error - can't add another packet client: maximum number of clients reached\n");
61 kfree(socket_client); 65 kfree(socket_client);
66 module_put(THIS_MODULE);
62 return -EXFULL; 67 return -EXFULL;
63 } 68 }
64 69
@@ -71,7 +76,6 @@ static int batadv_socket_open(struct inode *inode, struct file *file)
71 76
72 file->private_data = socket_client; 77 file->private_data = socket_client;
73 78
74 batadv_inc_module_count();
75 return 0; 79 return 0;
76} 80}
77 81
@@ -96,7 +100,7 @@ static int batadv_socket_release(struct inode *inode, struct file *file)
96 spin_unlock_bh(&socket_client->lock); 100 spin_unlock_bh(&socket_client->lock);
97 101
98 kfree(socket_client); 102 kfree(socket_client);
99 batadv_dec_module_count(); 103 module_put(THIS_MODULE);
100 104
101 return 0; 105 return 0;
102} 106}
@@ -173,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
173 if (len >= sizeof(struct batadv_icmp_packet_rr)) 177 if (len >= sizeof(struct batadv_icmp_packet_rr))
174 packet_len = sizeof(struct batadv_icmp_packet_rr); 178 packet_len = sizeof(struct batadv_icmp_packet_rr);
175 179
176 skb = dev_alloc_skb(packet_len + ETH_HLEN); 180 skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN);
177 if (!skb) { 181 if (!skb) {
178 len = -ENOMEM; 182 len = -ENOMEM;
179 goto out; 183 goto out;
180 } 184 }
181 185
182 skb_reserve(skb, ETH_HLEN); 186 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
183 icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); 187 icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len);
184 188
185 if (copy_from_user(icmp_packet, buff, packet_len)) { 189 if (copy_from_user(icmp_packet, buff, packet_len)) {
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index b4aa470bc4a6..dc33a0c484a4 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -29,6 +29,7 @@
29#include "hard-interface.h" 29#include "hard-interface.h"
30#include "gateway_client.h" 30#include "gateway_client.h"
31#include "bridge_loop_avoidance.h" 31#include "bridge_loop_avoidance.h"
32#include "distributed-arp-table.h"
32#include "vis.h" 33#include "vis.h"
33#include "hash.h" 34#include "hash.h"
34#include "bat_algo.h" 35#include "bat_algo.h"
@@ -128,6 +129,10 @@ int batadv_mesh_init(struct net_device *soft_iface)
128 if (ret < 0) 129 if (ret < 0)
129 goto err; 130 goto err;
130 131
132 ret = batadv_dat_init(bat_priv);
133 if (ret < 0)
134 goto err;
135
131 atomic_set(&bat_priv->gw.reselect, 0); 136 atomic_set(&bat_priv->gw.reselect, 0);
132 atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); 137 atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE);
133 138
@@ -155,21 +160,13 @@ void batadv_mesh_free(struct net_device *soft_iface)
155 160
156 batadv_bla_free(bat_priv); 161 batadv_bla_free(bat_priv);
157 162
163 batadv_dat_free(bat_priv);
164
158 free_percpu(bat_priv->bat_counters); 165 free_percpu(bat_priv->bat_counters);
159 166
160 atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); 167 atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
161} 168}
162 169
163void batadv_inc_module_count(void)
164{
165 try_module_get(THIS_MODULE);
166}
167
168void batadv_dec_module_count(void)
169{
170 module_put(THIS_MODULE);
171}
172
173int batadv_is_my_mac(const uint8_t *addr) 170int batadv_is_my_mac(const uint8_t *addr)
174{ 171{
175 const struct batadv_hard_iface *hard_iface; 172 const struct batadv_hard_iface *hard_iface;
@@ -188,6 +185,42 @@ int batadv_is_my_mac(const uint8_t *addr)
188 return 0; 185 return 0;
189} 186}
190 187
188/**
189 * batadv_seq_print_text_primary_if_get - called from debugfs table printing
190 * function that requires the primary interface
191 * @seq: debugfs table seq_file struct
192 *
193 * Returns primary interface if found or NULL otherwise.
194 */
195struct batadv_hard_iface *
196batadv_seq_print_text_primary_if_get(struct seq_file *seq)
197{
198 struct net_device *net_dev = (struct net_device *)seq->private;
199 struct batadv_priv *bat_priv = netdev_priv(net_dev);
200 struct batadv_hard_iface *primary_if;
201
202 primary_if = batadv_primary_if_get_selected(bat_priv);
203
204 if (!primary_if) {
205 seq_printf(seq,
206 "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
207 net_dev->name);
208 goto out;
209 }
210
211 if (primary_if->if_status == BATADV_IF_ACTIVE)
212 goto out;
213
214 seq_printf(seq,
215 "BATMAN mesh %s disabled - primary interface not active\n",
216 net_dev->name);
217 batadv_hardif_free_ref(primary_if);
218 primary_if = NULL;
219
220out:
221 return primary_if;
222}
223
191static int batadv_recv_unhandled_packet(struct sk_buff *skb, 224static int batadv_recv_unhandled_packet(struct sk_buff *skb,
192 struct batadv_hard_iface *recv_if) 225 struct batadv_hard_iface *recv_if)
193{ 226{
@@ -274,6 +307,8 @@ static void batadv_recv_handler_init(void)
274 307
275 /* batman icmp packet */ 308 /* batman icmp packet */
276 batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet; 309 batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet;
310 /* unicast with 4 addresses packet */
311 batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet;
277 /* unicast packet */ 312 /* unicast packet */
278 batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet; 313 batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet;
279 /* fragmented unicast packet */ 314 /* fragmented unicast packet */
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index d57b746219de..240c74ffeb93 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -44,6 +44,7 @@
44#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */ 44#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */
45#define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */ 45#define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */
46#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ 46#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
47#define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */
47/* sliding packet range of received originator messages in sequence numbers 48/* sliding packet range of received originator messages in sequence numbers
48 * (should be a multiple of our word size) 49 * (should be a multiple of our word size)
49 */ 50 */
@@ -73,6 +74,11 @@
73 74
74#define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ 75#define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */
75 76
77/* msecs after which an ARP_REQUEST is sent in broadcast as fallback */
78#define ARP_REQ_DELAY 250
79/* numbers of originator to contact for any PUT/GET DHT operation */
80#define BATADV_DAT_CANDIDATES_NUM 3
81
76#define BATADV_VIS_INTERVAL 5000 /* 5 seconds */ 82#define BATADV_VIS_INTERVAL 5000 /* 5 seconds */
77 83
78/* how much worse secondary interfaces may be to be considered as bonding 84/* how much worse secondary interfaces may be to be considered as bonding
@@ -117,6 +123,9 @@ enum batadv_uev_type {
117 123
118#define BATADV_GW_THRESHOLD 50 124#define BATADV_GW_THRESHOLD 50
119 125
126#define BATADV_DAT_CANDIDATE_NOT_FOUND 0
127#define BATADV_DAT_CANDIDATE_ORIG 1
128
120/* Debug Messages */ 129/* Debug Messages */
121#ifdef pr_fmt 130#ifdef pr_fmt
122#undef pr_fmt 131#undef pr_fmt
@@ -150,9 +159,9 @@ extern struct workqueue_struct *batadv_event_workqueue;
150 159
151int batadv_mesh_init(struct net_device *soft_iface); 160int batadv_mesh_init(struct net_device *soft_iface);
152void batadv_mesh_free(struct net_device *soft_iface); 161void batadv_mesh_free(struct net_device *soft_iface);
153void batadv_inc_module_count(void);
154void batadv_dec_module_count(void);
155int batadv_is_my_mac(const uint8_t *addr); 162int batadv_is_my_mac(const uint8_t *addr);
163struct batadv_hard_iface *
164batadv_seq_print_text_primary_if_get(struct seq_file *seq);
156int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, 165int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
157 struct packet_type *ptype, 166 struct packet_type *ptype,
158 struct net_device *orig_dev); 167 struct net_device *orig_dev);
@@ -165,13 +174,22 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
165int batadv_algo_select(struct batadv_priv *bat_priv, char *name); 174int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
166int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); 175int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
167 176
168/* all messages related to routing / flooding / broadcasting / etc */ 177/**
178 * enum batadv_dbg_level - available log levels
179 * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
180 * @BATADV_DBG_ROUTES: route added / changed / deleted
181 * @BATADV_DBG_TT: translation table messages
182 * @BATADV_DBG_BLA: bridge loop avoidance messages
183 * @BATADV_DBG_DAT: ARP snooping and DAT related messages
184 * @BATADV_DBG_ALL: the union of all the above log levels
185 */
169enum batadv_dbg_level { 186enum batadv_dbg_level {
170 BATADV_DBG_BATMAN = BIT(0), 187 BATADV_DBG_BATMAN = BIT(0),
171 BATADV_DBG_ROUTES = BIT(1), /* route added / changed / deleted */ 188 BATADV_DBG_ROUTES = BIT(1),
172 BATADV_DBG_TT = BIT(2), /* translation table operations */ 189 BATADV_DBG_TT = BIT(2),
173 BATADV_DBG_BLA = BIT(3), /* bridge loop avoidance */ 190 BATADV_DBG_BLA = BIT(3),
174 BATADV_DBG_ALL = 15, 191 BATADV_DBG_DAT = BIT(4),
192 BATADV_DBG_ALL = 31,
175}; 193};
176 194
177#ifdef CONFIG_BATMAN_ADV_DEBUG 195#ifdef CONFIG_BATMAN_ADV_DEBUG
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index ac9bdf8f80a6..84930a4f5369 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include "main.h" 20#include "main.h"
21#include "distributed-arp-table.h"
21#include "originator.h" 22#include "originator.h"
22#include "hash.h" 23#include "hash.h"
23#include "translation-table.h" 24#include "translation-table.h"
@@ -223,6 +224,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
223 orig_node->tt_poss_change = false; 224 orig_node->tt_poss_change = false;
224 orig_node->bat_priv = bat_priv; 225 orig_node->bat_priv = bat_priv;
225 memcpy(orig_node->orig, addr, ETH_ALEN); 226 memcpy(orig_node->orig, addr, ETH_ALEN);
227 batadv_dat_init_orig_node_addr(orig_node);
226 orig_node->router = NULL; 228 orig_node->router = NULL;
227 orig_node->tt_crc = 0; 229 orig_node->tt_crc = 0;
228 atomic_set(&orig_node->last_ttvn, 0); 230 atomic_set(&orig_node->last_ttvn, 0);
@@ -415,23 +417,10 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
415 int last_seen_msecs; 417 int last_seen_msecs;
416 unsigned long last_seen_jiffies; 418 unsigned long last_seen_jiffies;
417 uint32_t i; 419 uint32_t i;
418 int ret = 0;
419 420
420 primary_if = batadv_primary_if_get_selected(bat_priv); 421 primary_if = batadv_seq_print_text_primary_if_get(seq);
421 422 if (!primary_if)
422 if (!primary_if) {
423 ret = seq_printf(seq,
424 "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
425 net_dev->name);
426 goto out;
427 }
428
429 if (primary_if->if_status != BATADV_IF_ACTIVE) {
430 ret = seq_printf(seq,
431 "BATMAN mesh %s disabled - primary interface not active\n",
432 net_dev->name);
433 goto out; 423 goto out;
434 }
435 424
436 seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", 425 seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
437 BATADV_SOURCE_VERSION, primary_if->net_dev->name, 426 BATADV_SOURCE_VERSION, primary_if->net_dev->name,
@@ -485,7 +474,7 @@ next:
485out: 474out:
486 if (primary_if) 475 if (primary_if)
487 batadv_hardif_free_ref(primary_if); 476 batadv_hardif_free_ref(primary_if);
488 return ret; 477 return 0;
489} 478}
490 479
491static int batadv_orig_node_add_if(struct batadv_orig_node *orig_node, 480static int batadv_orig_node_add_if(struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 2d23a14c220e..df548ed196d3 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -23,14 +23,29 @@
23#define BATADV_ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */ 23#define BATADV_ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */
24 24
25enum batadv_packettype { 25enum batadv_packettype {
26 BATADV_IV_OGM = 0x01, 26 BATADV_IV_OGM = 0x01,
27 BATADV_ICMP = 0x02, 27 BATADV_ICMP = 0x02,
28 BATADV_UNICAST = 0x03, 28 BATADV_UNICAST = 0x03,
29 BATADV_BCAST = 0x04, 29 BATADV_BCAST = 0x04,
30 BATADV_VIS = 0x05, 30 BATADV_VIS = 0x05,
31 BATADV_UNICAST_FRAG = 0x06, 31 BATADV_UNICAST_FRAG = 0x06,
32 BATADV_TT_QUERY = 0x07, 32 BATADV_TT_QUERY = 0x07,
33 BATADV_ROAM_ADV = 0x08, 33 BATADV_ROAM_ADV = 0x08,
34 BATADV_UNICAST_4ADDR = 0x09,
35};
36
37/**
38 * enum batadv_subtype - packet subtype for unicast4addr
39 * @BATADV_P_DATA: user payload
40 * @BATADV_P_DAT_DHT_GET: DHT request message
41 * @BATADV_P_DAT_DHT_PUT: DHT store message
42 * @BATADV_P_DAT_CACHE_REPLY: ARP reply generated by DAT
43 */
44enum batadv_subtype {
45 BATADV_P_DATA = 0x01,
46 BATADV_P_DAT_DHT_GET = 0x02,
47 BATADV_P_DAT_DHT_PUT = 0x03,
48 BATADV_P_DAT_CACHE_REPLY = 0x04,
34}; 49};
35 50
36/* this file is included by batctl which needs these defines */ 51/* this file is included by batctl which needs these defines */
@@ -106,13 +121,16 @@ struct batadv_bla_claim_dst {
106 uint8_t magic[3]; /* FF:43:05 */ 121 uint8_t magic[3]; /* FF:43:05 */
107 uint8_t type; /* bla_claimframe */ 122 uint8_t type; /* bla_claimframe */
108 __be16 group; /* group id */ 123 __be16 group; /* group id */
109} __packed; 124};
110 125
111struct batadv_header { 126struct batadv_header {
112 uint8_t packet_type; 127 uint8_t packet_type;
113 uint8_t version; /* batman version field */ 128 uint8_t version; /* batman version field */
114 uint8_t ttl; 129 uint8_t ttl;
115} __packed; 130 /* the parent struct has to add a byte after the header to make
131 * everything 4 bytes aligned again
132 */
133};
116 134
117struct batadv_ogm_packet { 135struct batadv_ogm_packet {
118 struct batadv_header header; 136 struct batadv_header header;
@@ -137,7 +155,7 @@ struct batadv_icmp_packet {
137 __be16 seqno; 155 __be16 seqno;
138 uint8_t uid; 156 uint8_t uid;
139 uint8_t reserved; 157 uint8_t reserved;
140} __packed; 158};
141 159
142#define BATADV_RR_LEN 16 160#define BATADV_RR_LEN 16
143 161
@@ -153,13 +171,32 @@ struct batadv_icmp_packet_rr {
153 uint8_t uid; 171 uint8_t uid;
154 uint8_t rr_cur; 172 uint8_t rr_cur;
155 uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; 173 uint8_t rr[BATADV_RR_LEN][ETH_ALEN];
156} __packed; 174};
157 175
158struct batadv_unicast_packet { 176struct batadv_unicast_packet {
159 struct batadv_header header; 177 struct batadv_header header;
160 uint8_t ttvn; /* destination translation table version number */ 178 uint8_t ttvn; /* destination translation table version number */
161 uint8_t dest[ETH_ALEN]; 179 uint8_t dest[ETH_ALEN];
162} __packed; 180 /* "4 bytes boundary + 2 bytes" long to make the payload after the
181 * following ethernet header again 4 bytes boundary aligned
182 */
183};
184
185/**
186 * struct batadv_unicast_4addr_packet - extended unicast packet
187 * @u: common unicast packet header
188 * @src: address of the source
189 * @subtype: packet subtype
190 */
191struct batadv_unicast_4addr_packet {
192 struct batadv_unicast_packet u;
193 uint8_t src[ETH_ALEN];
194 uint8_t subtype;
195 uint8_t reserved;
196 /* "4 bytes boundary + 2 bytes" long to make the payload after the
197 * following ethernet header again 4 bytes boundary aligned
198 */
199};
163 200
164struct batadv_unicast_frag_packet { 201struct batadv_unicast_frag_packet {
165 struct batadv_header header; 202 struct batadv_header header;
@@ -176,6 +213,9 @@ struct batadv_bcast_packet {
176 uint8_t reserved; 213 uint8_t reserved;
177 __be32 seqno; 214 __be32 seqno;
178 uint8_t orig[ETH_ALEN]; 215 uint8_t orig[ETH_ALEN];
216 /* "4 bytes boundary + 2 bytes" long to make the payload after the
217 * following ethernet header again 4 bytes boundary aligned
218 */
179} __packed; 219} __packed;
180 220
181struct batadv_vis_packet { 221struct batadv_vis_packet {
@@ -187,7 +227,7 @@ struct batadv_vis_packet {
187 uint8_t vis_orig[ETH_ALEN]; /* originator reporting its neighbors */ 227 uint8_t vis_orig[ETH_ALEN]; /* originator reporting its neighbors */
188 uint8_t target_orig[ETH_ALEN]; /* who should receive this packet */ 228 uint8_t target_orig[ETH_ALEN]; /* who should receive this packet */
189 uint8_t sender_orig[ETH_ALEN]; /* who sent or forwarded this packet */ 229 uint8_t sender_orig[ETH_ALEN]; /* who sent or forwarded this packet */
190} __packed; 230};
191 231
192struct batadv_tt_query_packet { 232struct batadv_tt_query_packet {
193 struct batadv_header header; 233 struct batadv_header header;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 376b4cc6ca82..32aa4d460e1f 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -28,6 +28,7 @@
28#include "vis.h" 28#include "vis.h"
29#include "unicast.h" 29#include "unicast.h"
30#include "bridge_loop_avoidance.h" 30#include "bridge_loop_avoidance.h"
31#include "distributed-arp-table.h"
31 32
32static int batadv_route_unicast_packet(struct sk_buff *skb, 33static int batadv_route_unicast_packet(struct sk_buff *skb,
33 struct batadv_hard_iface *recv_if); 34 struct batadv_hard_iface *recv_if);
@@ -549,25 +550,18 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
549 if (tmp_neigh_node->if_incoming == recv_if) 550 if (tmp_neigh_node->if_incoming == recv_if)
550 continue; 551 continue;
551 552
552 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) 553 if (router && tmp_neigh_node->tq_avg <= router->tq_avg)
553 continue; 554 continue;
554 555
555 /* if we don't have a router yet 556 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
556 * or this one is better, choose it. 557 continue;
557 */
558 if ((!router) ||
559 (tmp_neigh_node->tq_avg > router->tq_avg)) {
560 /* decrement refcount of
561 * previously selected router
562 */
563 if (router)
564 batadv_neigh_node_free_ref(router);
565 558
566 router = tmp_neigh_node; 559 /* decrement refcount of previously selected router */
567 atomic_inc_not_zero(&router->refcount); 560 if (router)
568 } 561 batadv_neigh_node_free_ref(router);
569 562
570 batadv_neigh_node_free_ref(tmp_neigh_node); 563 /* we found a better router (or at least one valid router) */
564 router = tmp_neigh_node;
571 } 565 }
572 566
573 /* use the first candidate if nothing was found. */ 567 /* use the first candidate if nothing was found. */
@@ -687,21 +681,8 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
687 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); 681 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
688 struct batadv_roam_adv_packet *roam_adv_packet; 682 struct batadv_roam_adv_packet *roam_adv_packet;
689 struct batadv_orig_node *orig_node; 683 struct batadv_orig_node *orig_node;
690 struct ethhdr *ethhdr;
691 684
692 /* drop packet if it has not necessary minimum size */ 685 if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0)
693 if (unlikely(!pskb_may_pull(skb,
694 sizeof(struct batadv_roam_adv_packet))))
695 goto out;
696
697 ethhdr = (struct ethhdr *)skb_mac_header(skb);
698
699 /* packet with unicast indication but broadcast recipient */
700 if (is_broadcast_ether_addr(ethhdr->h_dest))
701 goto out;
702
703 /* packet with broadcast sender address */
704 if (is_broadcast_ether_addr(ethhdr->h_source))
705 goto out; 686 goto out;
706 687
707 batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); 688 batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX);
@@ -928,8 +909,12 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
928 bool tt_poss_change; 909 bool tt_poss_change;
929 int is_old_ttvn; 910 int is_old_ttvn;
930 911
931 /* I could need to modify it */ 912 /* check if there is enough data before accessing it */
932 if (skb_cow(skb, sizeof(struct batadv_unicast_packet)) < 0) 913 if (pskb_may_pull(skb, sizeof(*unicast_packet) + ETH_HLEN) < 0)
914 return 0;
915
916 /* create a copy of the skb (in case of for re-routing) to modify it. */
917 if (skb_cow(skb, sizeof(*unicast_packet)) < 0)
933 return 0; 918 return 0;
934 919
935 unicast_packet = (struct batadv_unicast_packet *)skb->data; 920 unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -985,10 +970,10 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
985 batadv_orig_node_free_ref(orig_node); 970 batadv_orig_node_free_ref(orig_node);
986 } 971 }
987 972
988 batadv_dbg(BATADV_DBG_ROUTES, bat_priv, 973 net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, bat_priv,
989 "TTVN mismatch (old_ttvn %u new_ttvn %u)! Rerouting unicast packet (for %pM) to %pM\n", 974 "TTVN mismatch (old_ttvn %u new_ttvn %u)! Rerouting unicast packet (for %pM) to %pM\n",
990 unicast_packet->ttvn, curr_ttvn, ethhdr->h_dest, 975 unicast_packet->ttvn, curr_ttvn,
991 unicast_packet->dest); 976 ethhdr->h_dest, unicast_packet->dest);
992 977
993 unicast_packet->ttvn = curr_ttvn; 978 unicast_packet->ttvn = curr_ttvn;
994 } 979 }
@@ -1000,7 +985,19 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
1000{ 985{
1001 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); 986 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1002 struct batadv_unicast_packet *unicast_packet; 987 struct batadv_unicast_packet *unicast_packet;
988 struct batadv_unicast_4addr_packet *unicast_4addr_packet;
989 uint8_t *orig_addr;
990 struct batadv_orig_node *orig_node = NULL;
1003 int hdr_size = sizeof(*unicast_packet); 991 int hdr_size = sizeof(*unicast_packet);
992 bool is4addr;
993
994 unicast_packet = (struct batadv_unicast_packet *)skb->data;
995 unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
996
997 is4addr = unicast_packet->header.packet_type == BATADV_UNICAST_4ADDR;
998 /* the caller function should have already pulled 2 bytes */
999 if (is4addr)
1000 hdr_size = sizeof(*unicast_4addr_packet);
1004 1001
1005 if (batadv_check_unicast_packet(skb, hdr_size) < 0) 1002 if (batadv_check_unicast_packet(skb, hdr_size) < 0)
1006 return NET_RX_DROP; 1003 return NET_RX_DROP;
@@ -1008,12 +1005,28 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
1008 if (!batadv_check_unicast_ttvn(bat_priv, skb)) 1005 if (!batadv_check_unicast_ttvn(bat_priv, skb))
1009 return NET_RX_DROP; 1006 return NET_RX_DROP;
1010 1007
1011 unicast_packet = (struct batadv_unicast_packet *)skb->data;
1012
1013 /* packet for me */ 1008 /* packet for me */
1014 if (batadv_is_my_mac(unicast_packet->dest)) { 1009 if (batadv_is_my_mac(unicast_packet->dest)) {
1010 if (is4addr) {
1011 batadv_dat_inc_counter(bat_priv,
1012 unicast_4addr_packet->subtype);
1013 orig_addr = unicast_4addr_packet->src;
1014 orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
1015 }
1016
1017 if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb,
1018 hdr_size))
1019 goto rx_success;
1020 if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb,
1021 hdr_size))
1022 goto rx_success;
1023
1015 batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, 1024 batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size,
1016 NULL); 1025 orig_node);
1026
1027rx_success:
1028 if (orig_node)
1029 batadv_orig_node_free_ref(orig_node);
1017 1030
1018 return NET_RX_SUCCESS; 1031 return NET_RX_SUCCESS;
1019 } 1032 }
@@ -1050,8 +1063,17 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
1050 if (!new_skb) 1063 if (!new_skb)
1051 return NET_RX_SUCCESS; 1064 return NET_RX_SUCCESS;
1052 1065
1066 if (batadv_dat_snoop_incoming_arp_request(bat_priv, new_skb,
1067 hdr_size))
1068 goto rx_success;
1069 if (batadv_dat_snoop_incoming_arp_reply(bat_priv, new_skb,
1070 hdr_size))
1071 goto rx_success;
1072
1053 batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if, 1073 batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if,
1054 sizeof(struct batadv_unicast_packet), NULL); 1074 sizeof(struct batadv_unicast_packet), NULL);
1075
1076rx_success:
1055 return NET_RX_SUCCESS; 1077 return NET_RX_SUCCESS;
1056 } 1078 }
1057 1079
@@ -1143,9 +1165,16 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1143 if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size)) 1165 if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size))
1144 goto out; 1166 goto out;
1145 1167
1168 if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size))
1169 goto rx_success;
1170 if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb, hdr_size))
1171 goto rx_success;
1172
1146 /* broadcast for me */ 1173 /* broadcast for me */
1147 batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, 1174 batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size,
1148 orig_node); 1175 orig_node);
1176
1177rx_success:
1149 ret = NET_RX_SUCCESS; 1178 ret = NET_RX_SUCCESS;
1150 goto out; 1179 goto out;
1151 1180
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 570a8bce0364..660d9bf7d219 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include "main.h" 20#include "main.h"
21#include "distributed-arp-table.h"
21#include "send.h" 22#include "send.h"
22#include "routing.h" 23#include "routing.h"
23#include "translation-table.h" 24#include "translation-table.h"
@@ -209,6 +210,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
209 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) 210 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
210 goto out; 211 goto out;
211 212
213 if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet))
214 goto out;
215
212 /* rebroadcast packet */ 216 /* rebroadcast packet */
213 rcu_read_lock(); 217 rcu_read_lock();
214 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 218 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index b9a28d2dd3e8..c283d87c4cce 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -20,6 +20,7 @@
20#include "main.h" 20#include "main.h"
21#include "soft-interface.h" 21#include "soft-interface.h"
22#include "hard-interface.h" 22#include "hard-interface.h"
23#include "distributed-arp-table.h"
23#include "routing.h" 24#include "routing.h"
24#include "send.h" 25#include "send.h"
25#include "debugfs.h" 26#include "debugfs.h"
@@ -146,13 +147,16 @@ static int batadv_interface_tx(struct sk_buff *skb,
146 struct batadv_bcast_packet *bcast_packet; 147 struct batadv_bcast_packet *bcast_packet;
147 struct vlan_ethhdr *vhdr; 148 struct vlan_ethhdr *vhdr;
148 __be16 ethertype = __constant_htons(BATADV_ETH_P_BATMAN); 149 __be16 ethertype = __constant_htons(BATADV_ETH_P_BATMAN);
149 static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 150 static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00,
150 0x00}; 151 0x00, 0x00};
152 static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00,
153 0x00, 0x00};
151 unsigned int header_len = 0; 154 unsigned int header_len = 0;
152 int data_len = skb->len, ret; 155 int data_len = skb->len, ret;
153 short vid __maybe_unused = -1; 156 short vid __maybe_unused = -1;
154 bool do_bcast = false; 157 bool do_bcast = false;
155 uint32_t seqno; 158 uint32_t seqno;
159 unsigned long brd_delay = 1;
156 160
157 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) 161 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
158 goto dropped; 162 goto dropped;
@@ -180,10 +184,16 @@ static int batadv_interface_tx(struct sk_buff *skb,
180 184
181 /* don't accept stp packets. STP does not help in meshes. 185 /* don't accept stp packets. STP does not help in meshes.
182 * better use the bridge loop avoidance ... 186 * better use the bridge loop avoidance ...
187 *
188 * The same goes for ECTP sent at least by some Cisco Switches,
189 * it might confuse the mesh when used with bridge loop avoidance.
183 */ 190 */
184 if (batadv_compare_eth(ethhdr->h_dest, stp_addr)) 191 if (batadv_compare_eth(ethhdr->h_dest, stp_addr))
185 goto dropped; 192 goto dropped;
186 193
194 if (batadv_compare_eth(ethhdr->h_dest, ectp_addr))
195 goto dropped;
196
187 if (is_multicast_ether_addr(ethhdr->h_dest)) { 197 if (is_multicast_ether_addr(ethhdr->h_dest)) {
188 do_bcast = true; 198 do_bcast = true;
189 199
@@ -216,6 +226,13 @@ static int batadv_interface_tx(struct sk_buff *skb,
216 if (!primary_if) 226 if (!primary_if)
217 goto dropped; 227 goto dropped;
218 228
229 /* in case of ARP request, we do not immediately broadcasti the
230 * packet, instead we first wait for DAT to try to retrieve the
231 * correct ARP entry
232 */
233 if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb))
234 brd_delay = msecs_to_jiffies(ARP_REQ_DELAY);
235
219 if (batadv_skb_head_push(skb, sizeof(*bcast_packet)) < 0) 236 if (batadv_skb_head_push(skb, sizeof(*bcast_packet)) < 0)
220 goto dropped; 237 goto dropped;
221 238
@@ -237,7 +254,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
237 seqno = atomic_inc_return(&bat_priv->bcast_seqno); 254 seqno = atomic_inc_return(&bat_priv->bcast_seqno);
238 bcast_packet->seqno = htonl(seqno); 255 bcast_packet->seqno = htonl(seqno);
239 256
240 batadv_add_bcast_packet_to_list(bat_priv, skb, 1); 257 batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay);
241 258
242 /* a copy is stored in the bcast list, therefore removing 259 /* a copy is stored in the bcast list, therefore removing
243 * the original skb. 260 * the original skb.
@@ -252,7 +269,12 @@ static int batadv_interface_tx(struct sk_buff *skb,
252 goto dropped; 269 goto dropped;
253 } 270 }
254 271
255 ret = batadv_unicast_send_skb(skb, bat_priv); 272 if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb))
273 goto dropped;
274
275 batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb);
276
277 ret = batadv_unicast_send_skb(bat_priv, skb);
256 if (ret != 0) 278 if (ret != 0)
257 goto dropped_freed; 279 goto dropped_freed;
258 } 280 }
@@ -347,7 +369,51 @@ out:
347 return; 369 return;
348} 370}
349 371
372/* batman-adv network devices have devices nesting below it and are a special
373 * "super class" of normal network devices; split their locks off into a
374 * separate class since they always nest.
375 */
376static struct lock_class_key batadv_netdev_xmit_lock_key;
377static struct lock_class_key batadv_netdev_addr_lock_key;
378
379/**
380 * batadv_set_lockdep_class_one - Set lockdep class for a single tx queue
381 * @dev: device which owns the tx queue
382 * @txq: tx queue to modify
383 * @_unused: always NULL
384 */
385static void batadv_set_lockdep_class_one(struct net_device *dev,
386 struct netdev_queue *txq,
387 void *_unused)
388{
389 lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key);
390}
391
392/**
393 * batadv_set_lockdep_class - Set txq and addr_list lockdep class
394 * @dev: network device to modify
395 */
396static void batadv_set_lockdep_class(struct net_device *dev)
397{
398 lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key);
399 netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
400}
401
402/**
403 * batadv_softif_init - Late stage initialization of soft interface
404 * @dev: registered network device to modify
405 *
406 * Returns error code on failures
407 */
408static int batadv_softif_init(struct net_device *dev)
409{
410 batadv_set_lockdep_class(dev);
411
412 return 0;
413}
414
350static const struct net_device_ops batadv_netdev_ops = { 415static const struct net_device_ops batadv_netdev_ops = {
416 .ndo_init = batadv_softif_init,
351 .ndo_open = batadv_interface_open, 417 .ndo_open = batadv_interface_open,
352 .ndo_stop = batadv_interface_release, 418 .ndo_stop = batadv_interface_release,
353 .ndo_get_stats = batadv_interface_stats, 419 .ndo_get_stats = batadv_interface_stats,
@@ -414,6 +480,9 @@ struct net_device *batadv_softif_create(const char *name)
414 atomic_set(&bat_priv->aggregated_ogms, 1); 480 atomic_set(&bat_priv->aggregated_ogms, 1);
415 atomic_set(&bat_priv->bonding, 0); 481 atomic_set(&bat_priv->bonding, 0);
416 atomic_set(&bat_priv->bridge_loop_avoidance, 0); 482 atomic_set(&bat_priv->bridge_loop_avoidance, 0);
483#ifdef CONFIG_BATMAN_ADV_DAT
484 atomic_set(&bat_priv->distributed_arp_table, 1);
485#endif
417 atomic_set(&bat_priv->ap_isolation, 0); 486 atomic_set(&bat_priv->ap_isolation, 0);
418 atomic_set(&bat_priv->vis_mode, BATADV_VIS_TYPE_CLIENT_UPDATE); 487 atomic_set(&bat_priv->vis_mode, BATADV_VIS_TYPE_CLIENT_UPDATE);
419 atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF); 488 atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF);
@@ -556,6 +625,13 @@ static const struct {
556 { "tt_response_rx" }, 625 { "tt_response_rx" },
557 { "tt_roam_adv_tx" }, 626 { "tt_roam_adv_tx" },
558 { "tt_roam_adv_rx" }, 627 { "tt_roam_adv_rx" },
628#ifdef CONFIG_BATMAN_ADV_DAT
629 { "dat_get_tx" },
630 { "dat_get_rx" },
631 { "dat_put_tx" },
632 { "dat_put_rx" },
633 { "dat_cached_reply_tx" },
634#endif
559}; 635};
560 636
561static void batadv_get_strings(struct net_device *dev, uint32_t stringset, 637static void batadv_get_strings(struct net_device *dev, uint32_t stringset,
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 66518c75c217..fa3cc1af0918 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -20,6 +20,7 @@
20#include "main.h" 20#include "main.h"
21#include "sysfs.h" 21#include "sysfs.h"
22#include "translation-table.h" 22#include "translation-table.h"
23#include "distributed-arp-table.h"
23#include "originator.h" 24#include "originator.h"
24#include "hard-interface.h" 25#include "hard-interface.h"
25#include "gateway_common.h" 26#include "gateway_common.h"
@@ -122,55 +123,6 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \
122 batadv_store_##_name) 123 batadv_store_##_name)
123 124
124 125
125#define BATADV_ATTR_HIF_STORE_UINT(_name, _min, _max, _post_func) \
126ssize_t batadv_store_##_name(struct kobject *kobj, \
127 struct attribute *attr, char *buff, \
128 size_t count) \
129{ \
130 struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
131 struct batadv_hard_iface *hard_iface; \
132 ssize_t length; \
133 \
134 hard_iface = batadv_hardif_get_by_netdev(net_dev); \
135 if (!hard_iface) \
136 return 0; \
137 \
138 length = __batadv_store_uint_attr(buff, count, _min, _max, \
139 _post_func, attr, \
140 &hard_iface->_name, net_dev); \
141 \
142 batadv_hardif_free_ref(hard_iface); \
143 return length; \
144}
145
146#define BATADV_ATTR_HIF_SHOW_UINT(_name) \
147ssize_t batadv_show_##_name(struct kobject *kobj, \
148 struct attribute *attr, char *buff) \
149{ \
150 struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
151 struct batadv_hard_iface *hard_iface; \
152 ssize_t length; \
153 \
154 hard_iface = batadv_hardif_get_by_netdev(net_dev); \
155 if (!hard_iface) \
156 return 0; \
157 \
158 length = sprintf(buff, "%i\n", atomic_read(&hard_iface->_name));\
159 \
160 batadv_hardif_free_ref(hard_iface); \
161 return length; \
162}
163
164/* Use this, if you are going to set [name] in hard_iface to an
165 * unsigned integer value
166 */
167#define BATADV_ATTR_HIF_UINT(_name, _mode, _min, _max, _post_func) \
168 static BATADV_ATTR_HIF_STORE_UINT(_name, _min, _max, _post_func)\
169 static BATADV_ATTR_HIF_SHOW_UINT(_name) \
170 static BATADV_ATTR(_name, _mode, batadv_show_##_name, \
171 batadv_store_##_name)
172
173
174static int batadv_store_bool_attr(char *buff, size_t count, 126static int batadv_store_bool_attr(char *buff, size_t count,
175 struct net_device *net_dev, 127 struct net_device *net_dev,
176 const char *attr_name, atomic_t *attr) 128 const char *attr_name, atomic_t *attr)
@@ -469,6 +421,9 @@ BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
469#ifdef CONFIG_BATMAN_ADV_BLA 421#ifdef CONFIG_BATMAN_ADV_BLA
470BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL); 422BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL);
471#endif 423#endif
424#ifdef CONFIG_BATMAN_ADV_DAT
425BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, NULL);
426#endif
472BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu); 427BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu);
473BATADV_ATTR_SIF_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); 428BATADV_ATTR_SIF_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
474static BATADV_ATTR(vis_mode, S_IRUGO | S_IWUSR, batadv_show_vis_mode, 429static BATADV_ATTR(vis_mode, S_IRUGO | S_IWUSR, batadv_show_vis_mode,
@@ -494,6 +449,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
494#ifdef CONFIG_BATMAN_ADV_BLA 449#ifdef CONFIG_BATMAN_ADV_BLA
495 &batadv_attr_bridge_loop_avoidance, 450 &batadv_attr_bridge_loop_avoidance,
496#endif 451#endif
452#ifdef CONFIG_BATMAN_ADV_DAT
453 &batadv_attr_distributed_arp_table,
454#endif
497 &batadv_attr_fragmentation, 455 &batadv_attr_fragmentation,
498 &batadv_attr_ap_isolation, 456 &batadv_attr_ap_isolation,
499 &batadv_attr_vis_mode, 457 &batadv_attr_vis_mode,
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 112edd371b2f..f8b9c32c29a5 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -434,22 +434,10 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
434 struct hlist_node *node; 434 struct hlist_node *node;
435 struct hlist_head *head; 435 struct hlist_head *head;
436 uint32_t i; 436 uint32_t i;
437 int ret = 0;
438
439 primary_if = batadv_primary_if_get_selected(bat_priv);
440 if (!primary_if) {
441 ret = seq_printf(seq,
442 "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
443 net_dev->name);
444 goto out;
445 }
446 437
447 if (primary_if->if_status != BATADV_IF_ACTIVE) { 438 primary_if = batadv_seq_print_text_primary_if_get(seq);
448 ret = seq_printf(seq, 439 if (!primary_if)
449 "BATMAN mesh %s disabled - primary interface not active\n",
450 net_dev->name);
451 goto out; 440 goto out;
452 }
453 441
454 seq_printf(seq, 442 seq_printf(seq,
455 "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", 443 "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n",
@@ -479,7 +467,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
479out: 467out:
480 if (primary_if) 468 if (primary_if)
481 batadv_hardif_free_ref(primary_if); 469 batadv_hardif_free_ref(primary_if);
482 return ret; 470 return 0;
483} 471}
484 472
485static void 473static void
@@ -501,24 +489,39 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
501 tt_local_entry->common.addr, message); 489 tt_local_entry->common.addr, message);
502} 490}
503 491
504void batadv_tt_local_remove(struct batadv_priv *bat_priv, const uint8_t *addr, 492/**
505 const char *message, bool roaming) 493 * batadv_tt_local_remove - logically remove an entry from the local table
494 * @bat_priv: the bat priv with all the soft interface information
495 * @addr: the MAC address of the client to remove
496 * @message: message to append to the log on deletion
497 * @roaming: true if the deletion is due to a roaming event
498 *
499 * Returns the flags assigned to the local entry before being deleted
500 */
501uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
502 const uint8_t *addr, const char *message,
503 bool roaming)
506{ 504{
507 struct batadv_tt_local_entry *tt_local_entry = NULL; 505 struct batadv_tt_local_entry *tt_local_entry = NULL;
508 uint16_t flags; 506 uint16_t flags, curr_flags = BATADV_NO_FLAGS;
509 507
510 tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr); 508 tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
511 if (!tt_local_entry) 509 if (!tt_local_entry)
512 goto out; 510 goto out;
513 511
512 curr_flags = tt_local_entry->common.flags;
513
514 flags = BATADV_TT_CLIENT_DEL; 514 flags = BATADV_TT_CLIENT_DEL;
515 if (roaming) 515 if (roaming)
516 flags |= BATADV_TT_CLIENT_ROAM; 516 flags |= BATADV_TT_CLIENT_ROAM;
517 517
518 batadv_tt_local_set_pending(bat_priv, tt_local_entry, flags, message); 518 batadv_tt_local_set_pending(bat_priv, tt_local_entry, flags, message);
519
519out: 520out:
520 if (tt_local_entry) 521 if (tt_local_entry)
521 batadv_tt_local_entry_free_ref(tt_local_entry); 522 batadv_tt_local_entry_free_ref(tt_local_entry);
523
524 return curr_flags;
522} 525}
523 526
524static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, 527static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
@@ -725,6 +728,7 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
725 int ret = 0; 728 int ret = 0;
726 int hash_added; 729 int hash_added;
727 struct batadv_tt_common_entry *common; 730 struct batadv_tt_common_entry *common;
731 uint16_t local_flags;
728 732
729 tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr); 733 tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr);
730 734
@@ -738,6 +742,12 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
738 742
739 common->flags = flags; 743 common->flags = flags;
740 tt_global_entry->roam_at = 0; 744 tt_global_entry->roam_at = 0;
745 /* node must store current time in case of roaming. This is
746 * needed to purge this entry out on timeout (if nobody claims
747 * it)
748 */
749 if (flags & BATADV_TT_CLIENT_ROAM)
750 tt_global_entry->roam_at = jiffies;
741 atomic_set(&common->refcount, 2); 751 atomic_set(&common->refcount, 2);
742 common->added_at = jiffies; 752 common->added_at = jiffies;
743 753
@@ -788,13 +798,16 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
788 batadv_dbg(BATADV_DBG_TT, bat_priv, 798 batadv_dbg(BATADV_DBG_TT, bat_priv,
789 "Creating new global tt entry: %pM (via %pM)\n", 799 "Creating new global tt entry: %pM (via %pM)\n",
790 tt_global_entry->common.addr, orig_node->orig); 800 tt_global_entry->common.addr, orig_node->orig);
801 ret = 1;
791 802
792out_remove: 803out_remove:
804
793 /* remove address from local hash if present */ 805 /* remove address from local hash if present */
794 batadv_tt_local_remove(bat_priv, tt_global_entry->common.addr, 806 local_flags = batadv_tt_local_remove(bat_priv, tt_addr,
795 "global tt received", 807 "global tt received",
796 flags & BATADV_TT_CLIENT_ROAM); 808 flags & BATADV_TT_CLIENT_ROAM);
797 ret = 1; 809 tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI;
810
798out: 811out:
799 if (tt_global_entry) 812 if (tt_global_entry)
800 batadv_tt_global_entry_free_ref(tt_global_entry); 813 batadv_tt_global_entry_free_ref(tt_global_entry);
@@ -842,22 +855,10 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
842 struct hlist_node *node; 855 struct hlist_node *node;
843 struct hlist_head *head; 856 struct hlist_head *head;
844 uint32_t i; 857 uint32_t i;
845 int ret = 0;
846
847 primary_if = batadv_primary_if_get_selected(bat_priv);
848 if (!primary_if) {
849 ret = seq_printf(seq,
850 "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
851 net_dev->name);
852 goto out;
853 }
854 858
855 if (primary_if->if_status != BATADV_IF_ACTIVE) { 859 primary_if = batadv_seq_print_text_primary_if_get(seq);
856 ret = seq_printf(seq, 860 if (!primary_if)
857 "BATMAN mesh %s disabled - primary interface not active\n",
858 net_dev->name);
859 goto out; 861 goto out;
860 }
861 862
862 seq_printf(seq, 863 seq_printf(seq,
863 "Globally announced TT entries received via the mesh %s\n", 864 "Globally announced TT entries received via the mesh %s\n",
@@ -881,7 +882,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
881out: 882out:
882 if (primary_if) 883 if (primary_if)
883 batadv_hardif_free_ref(primary_if); 884 batadv_hardif_free_ref(primary_if);
884 return ret; 885 return 0;
885} 886}
886 887
887/* deletes the orig list of a tt_global_entry */ 888/* deletes the orig list of a tt_global_entry */
@@ -1471,11 +1472,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
1471 tt_tot = tt_len / sizeof(struct batadv_tt_change); 1472 tt_tot = tt_len / sizeof(struct batadv_tt_change);
1472 1473
1473 len = tt_query_size + tt_len; 1474 len = tt_query_size + tt_len;
1474 skb = dev_alloc_skb(len + ETH_HLEN); 1475 skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
1475 if (!skb) 1476 if (!skb)
1476 goto out; 1477 goto out;
1477 1478
1478 skb_reserve(skb, ETH_HLEN); 1479 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
1479 tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); 1480 tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len);
1480 tt_response->ttvn = ttvn; 1481 tt_response->ttvn = ttvn;
1481 1482
@@ -1537,11 +1538,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
1537 if (!tt_req_node) 1538 if (!tt_req_node)
1538 goto out; 1539 goto out;
1539 1540
1540 skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN); 1541 skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN);
1541 if (!skb) 1542 if (!skb)
1542 goto out; 1543 goto out;
1543 1544
1544 skb_reserve(skb, ETH_HLEN); 1545 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
1545 1546
1546 tt_req_len = sizeof(*tt_request); 1547 tt_req_len = sizeof(*tt_request);
1547 tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); 1548 tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len);
@@ -1652,11 +1653,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1652 tt_tot = tt_len / sizeof(struct batadv_tt_change); 1653 tt_tot = tt_len / sizeof(struct batadv_tt_change);
1653 1654
1654 len = sizeof(*tt_response) + tt_len; 1655 len = sizeof(*tt_response) + tt_len;
1655 skb = dev_alloc_skb(len + ETH_HLEN); 1656 skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
1656 if (!skb) 1657 if (!skb)
1657 goto unlock; 1658 goto unlock;
1658 1659
1659 skb_reserve(skb, ETH_HLEN); 1660 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
1660 packet_pos = skb_put(skb, len); 1661 packet_pos = skb_put(skb, len);
1661 tt_response = (struct batadv_tt_query_packet *)packet_pos; 1662 tt_response = (struct batadv_tt_query_packet *)packet_pos;
1662 tt_response->ttvn = req_ttvn; 1663 tt_response->ttvn = req_ttvn;
@@ -1779,11 +1780,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
1779 tt_tot = tt_len / sizeof(struct batadv_tt_change); 1780 tt_tot = tt_len / sizeof(struct batadv_tt_change);
1780 1781
1781 len = sizeof(*tt_response) + tt_len; 1782 len = sizeof(*tt_response) + tt_len;
1782 skb = dev_alloc_skb(len + ETH_HLEN); 1783 skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
1783 if (!skb) 1784 if (!skb)
1784 goto unlock; 1785 goto unlock;
1785 1786
1786 skb_reserve(skb, ETH_HLEN); 1787 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
1787 packet_pos = skb_put(skb, len); 1788 packet_pos = skb_put(skb, len);
1788 tt_response = (struct batadv_tt_query_packet *)packet_pos; 1789 tt_response = (struct batadv_tt_query_packet *)packet_pos;
1789 tt_response->ttvn = req_ttvn; 1790 tt_response->ttvn = req_ttvn;
@@ -2117,11 +2118,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
2117 if (!batadv_tt_check_roam_count(bat_priv, client)) 2118 if (!batadv_tt_check_roam_count(bat_priv, client))
2118 goto out; 2119 goto out;
2119 2120
2120 skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN); 2121 skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN);
2121 if (!skb) 2122 if (!skb)
2122 goto out; 2123 goto out;
2123 2124
2124 skb_reserve(skb, ETH_HLEN); 2125 skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN);
2125 2126
2126 roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); 2127 roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len);
2127 2128
@@ -2438,7 +2439,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
2438 if (!tt_global_entry) 2439 if (!tt_global_entry)
2439 goto out; 2440 goto out;
2440 2441
2441 ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM; 2442 ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM);
2442 batadv_tt_global_entry_free_ref(tt_global_entry); 2443 batadv_tt_global_entry_free_ref(tt_global_entry);
2443out: 2444out:
2444 return ret; 2445 return ret;
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 811fffd4760c..9fa4fe41c868 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -24,9 +24,9 @@ int batadv_tt_len(int changes_num);
24int batadv_tt_init(struct batadv_priv *bat_priv); 24int batadv_tt_init(struct batadv_priv *bat_priv);
25void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, 25void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
26 int ifindex); 26 int ifindex);
27void batadv_tt_local_remove(struct batadv_priv *bat_priv, 27uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
28 const uint8_t *addr, const char *message, 28 const uint8_t *addr, const char *message,
29 bool roaming); 29 bool roaming);
30int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); 30int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
31void batadv_tt_global_add_orig(struct batadv_priv *bat_priv, 31void batadv_tt_global_add_orig(struct batadv_priv *bat_priv,
32 struct batadv_orig_node *orig_node, 32 struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ac1e07a80454..8ce16c1cbafb 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -28,20 +28,41 @@
28 (ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \ 28 (ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \
29 sizeof(struct batadv_bcast_packet))) 29 sizeof(struct batadv_bcast_packet)))
30 30
31#ifdef CONFIG_BATMAN_ADV_DAT
32
33/* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed,
34 * BATADV_DAT_ADDR_MAX is changed as well.
35 *
36 * *Please be careful: batadv_dat_addr_t must be UNSIGNED*
37 */
38#define batadv_dat_addr_t uint16_t
39
40#endif /* CONFIG_BATMAN_ADV_DAT */
41
42/**
43 * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data
44 * @ogm_buff: buffer holding the OGM packet
45 * @ogm_buff_len: length of the OGM packet buffer
46 * @ogm_seqno: OGM sequence number - used to identify each OGM
47 */
48struct batadv_hard_iface_bat_iv {
49 unsigned char *ogm_buff;
50 int ogm_buff_len;
51 atomic_t ogm_seqno;
52};
53
31struct batadv_hard_iface { 54struct batadv_hard_iface {
32 struct list_head list; 55 struct list_head list;
33 int16_t if_num; 56 int16_t if_num;
34 char if_status; 57 char if_status;
35 struct net_device *net_dev; 58 struct net_device *net_dev;
36 atomic_t seqno;
37 atomic_t frag_seqno; 59 atomic_t frag_seqno;
38 unsigned char *packet_buff;
39 int packet_len;
40 struct kobject *hardif_obj; 60 struct kobject *hardif_obj;
41 atomic_t refcount; 61 atomic_t refcount;
42 struct packet_type batman_adv_ptype; 62 struct packet_type batman_adv_ptype;
43 struct net_device *soft_iface; 63 struct net_device *soft_iface;
44 struct rcu_head rcu; 64 struct rcu_head rcu;
65 struct batadv_hard_iface_bat_iv bat_iv;
45}; 66};
46 67
47/** 68/**
@@ -63,6 +84,9 @@ struct batadv_orig_node {
63 uint8_t orig[ETH_ALEN]; 84 uint8_t orig[ETH_ALEN];
64 uint8_t primary_addr[ETH_ALEN]; 85 uint8_t primary_addr[ETH_ALEN];
65 struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ 86 struct batadv_neigh_node __rcu *router; /* rcu protected pointer */
87#ifdef CONFIG_BATMAN_ADV_DAT
88 batadv_dat_addr_t dat_addr;
89#endif
66 unsigned long *bcast_own; 90 unsigned long *bcast_own;
67 uint8_t *bcast_own_sum; 91 uint8_t *bcast_own_sum;
68 unsigned long last_seen; 92 unsigned long last_seen;
@@ -162,6 +186,13 @@ enum batadv_counters {
162 BATADV_CNT_TT_RESPONSE_RX, 186 BATADV_CNT_TT_RESPONSE_RX,
163 BATADV_CNT_TT_ROAM_ADV_TX, 187 BATADV_CNT_TT_ROAM_ADV_TX,
164 BATADV_CNT_TT_ROAM_ADV_RX, 188 BATADV_CNT_TT_ROAM_ADV_RX,
189#ifdef CONFIG_BATMAN_ADV_DAT
190 BATADV_CNT_DAT_GET_TX,
191 BATADV_CNT_DAT_GET_RX,
192 BATADV_CNT_DAT_PUT_TX,
193 BATADV_CNT_DAT_PUT_RX,
194 BATADV_CNT_DAT_CACHED_REPLY_TX,
195#endif
165 BATADV_CNT_NUM, 196 BATADV_CNT_NUM,
166}; 197};
167 198
@@ -228,6 +259,20 @@ struct batadv_priv_vis {
228 struct batadv_vis_info *my_info; 259 struct batadv_vis_info *my_info;
229}; 260};
230 261
262/**
263 * struct batadv_priv_dat - per mesh interface DAT private data
264 * @addr: node DAT address
265 * @hash: hashtable representing the local ARP cache
266 * @work: work queue callback item for cache purging
267 */
268#ifdef CONFIG_BATMAN_ADV_DAT
269struct batadv_priv_dat {
270 batadv_dat_addr_t addr;
271 struct batadv_hashtable *hash;
272 struct delayed_work work;
273};
274#endif
275
231struct batadv_priv { 276struct batadv_priv {
232 atomic_t mesh_state; 277 atomic_t mesh_state;
233 struct net_device_stats stats; 278 struct net_device_stats stats;
@@ -237,6 +282,9 @@ struct batadv_priv {
237 atomic_t fragmentation; /* boolean */ 282 atomic_t fragmentation; /* boolean */
238 atomic_t ap_isolation; /* boolean */ 283 atomic_t ap_isolation; /* boolean */
239 atomic_t bridge_loop_avoidance; /* boolean */ 284 atomic_t bridge_loop_avoidance; /* boolean */
285#ifdef CONFIG_BATMAN_ADV_DAT
286 atomic_t distributed_arp_table; /* boolean */
287#endif
240 atomic_t vis_mode; /* VIS_TYPE_* */ 288 atomic_t vis_mode; /* VIS_TYPE_* */
241 atomic_t gw_mode; /* GW_MODE_* */ 289 atomic_t gw_mode; /* GW_MODE_* */
242 atomic_t gw_sel_class; /* uint */ 290 atomic_t gw_sel_class; /* uint */
@@ -265,6 +313,9 @@ struct batadv_priv {
265 struct batadv_priv_gw gw; 313 struct batadv_priv_gw gw;
266 struct batadv_priv_tt tt; 314 struct batadv_priv_tt tt;
267 struct batadv_priv_vis vis; 315 struct batadv_priv_vis vis;
316#ifdef CONFIG_BATMAN_ADV_DAT
317 struct batadv_priv_dat dat;
318#endif
268}; 319};
269 320
270struct batadv_socket_client { 321struct batadv_socket_client {
@@ -437,4 +488,36 @@ struct batadv_algo_ops {
437 void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); 488 void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
438}; 489};
439 490
491/**
492 * struct batadv_dat_entry - it is a single entry of batman-adv ARP backend. It
493 * is used to stored ARP entries needed for the global DAT cache
494 * @ip: the IPv4 corresponding to this DAT/ARP entry
495 * @mac_addr: the MAC address associated to the stored IPv4
496 * @last_update: time in jiffies when this entry was refreshed last time
497 * @hash_entry: hlist node for batadv_priv_dat::hash
498 * @refcount: number of contexts the object is used
499 * @rcu: struct used for freeing in an RCU-safe manner
500 */
501struct batadv_dat_entry {
502 __be32 ip;
503 uint8_t mac_addr[ETH_ALEN];
504 unsigned long last_update;
505 struct hlist_node hash_entry;
506 atomic_t refcount;
507 struct rcu_head rcu;
508};
509
510/**
511 * struct batadv_dat_candidate - candidate destination for DAT operations
512 * @type: the type of the selected candidate. It can one of the following:
513 * - BATADV_DAT_CANDIDATE_NOT_FOUND
514 * - BATADV_DAT_CANDIDATE_ORIG
515 * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to the
516 * corresponding originator node structure
517 */
518struct batadv_dat_candidate {
519 int type;
520 struct batadv_orig_node *orig_node;
521};
522
440#endif /* _NET_BATMAN_ADV_TYPES_H_ */ 523#endif /* _NET_BATMAN_ADV_TYPES_H_ */
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index f39723281ca1..c9a1f6523c36 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -291,7 +291,111 @@ out:
291 return ret; 291 return ret;
292} 292}
293 293
294int batadv_unicast_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv) 294/**
295 * batadv_unicast_push_and_fill_skb - extends the buffer and initializes the
296 * common fields for unicast packets
297 * @skb: packet
298 * @hdr_size: amount of bytes to push at the beginning of the skb
299 * @orig_node: the destination node
300 *
301 * Returns false if the buffer extension was not possible or true otherwise
302 */
303static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size,
304 struct batadv_orig_node *orig_node)
305{
306 struct batadv_unicast_packet *unicast_packet;
307 uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
308
309 if (batadv_skb_head_push(skb, hdr_size) < 0)
310 return false;
311
312 unicast_packet = (struct batadv_unicast_packet *)skb->data;
313 unicast_packet->header.version = BATADV_COMPAT_VERSION;
314 /* batman packet type: unicast */
315 unicast_packet->header.packet_type = BATADV_UNICAST;
316 /* set unicast ttl */
317 unicast_packet->header.ttl = BATADV_TTL;
318 /* copy the destination for faster routing */
319 memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
320 /* set the destination tt version number */
321 unicast_packet->ttvn = ttvn;
322
323 return true;
324}
325
326/**
327 * batadv_unicast_prepare_skb - encapsulate an skb with a unicast header
328 * @skb: the skb containing the payload to encapsulate
329 * @orig_node: the destination node
330 *
331 * Returns false if the payload could not be encapsulated or true otherwise
332 */
333static bool batadv_unicast_prepare_skb(struct sk_buff *skb,
334 struct batadv_orig_node *orig_node)
335{
336 size_t uni_size = sizeof(struct batadv_unicast_packet);
337 return batadv_unicast_push_and_fill_skb(skb, uni_size, orig_node);
338}
339
340/**
341 * batadv_unicast_4addr_prepare_skb - encapsulate an skb with a unicast4addr
342 * header
343 * @bat_priv: the bat priv with all the soft interface information
344 * @skb: the skb containing the payload to encapsulate
345 * @orig_node: the destination node
346 * @packet_subtype: the batman 4addr packet subtype to use
347 *
348 * Returns false if the payload could not be encapsulated or true otherwise
349 */
350bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
351 struct sk_buff *skb,
352 struct batadv_orig_node *orig,
353 int packet_subtype)
354{
355 struct batadv_hard_iface *primary_if;
356 struct batadv_unicast_4addr_packet *unicast_4addr_packet;
357 bool ret = false;
358
359 primary_if = batadv_primary_if_get_selected(bat_priv);
360 if (!primary_if)
361 goto out;
362
363 /* pull the header space and fill the unicast_packet substructure.
364 * We can do that because the first member of the unicast_4addr_packet
365 * is of type struct unicast_packet
366 */
367 if (!batadv_unicast_push_and_fill_skb(skb,
368 sizeof(*unicast_4addr_packet),
369 orig))
370 goto out;
371
372 unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
373 unicast_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR;
374 memcpy(unicast_4addr_packet->src, primary_if->net_dev->dev_addr,
375 ETH_ALEN);
376 unicast_4addr_packet->subtype = packet_subtype;
377 unicast_4addr_packet->reserved = 0;
378
379 ret = true;
380out:
381 if (primary_if)
382 batadv_hardif_free_ref(primary_if);
383 return ret;
384}
385
386/**
387 * batadv_unicast_generic_send_skb - send an skb as unicast
388 * @bat_priv: the bat priv with all the soft interface information
389 * @skb: payload to send
390 * @packet_type: the batman unicast packet type to use
391 * @packet_subtype: the batman packet subtype. It is ignored if packet_type is
392 * not BATADV_UNICAT_4ADDR
393 *
394 * Returns 1 in case of error or 0 otherwise
395 */
396int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv,
397 struct sk_buff *skb, int packet_type,
398 int packet_subtype)
295{ 399{
296 struct ethhdr *ethhdr = (struct ethhdr *)skb->data; 400 struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
297 struct batadv_unicast_packet *unicast_packet; 401 struct batadv_unicast_packet *unicast_packet;
@@ -324,21 +428,23 @@ find_router:
324 if (!neigh_node) 428 if (!neigh_node)
325 goto out; 429 goto out;
326 430
327 if (batadv_skb_head_push(skb, sizeof(*unicast_packet)) < 0) 431 switch (packet_type) {
432 case BATADV_UNICAST:
433 batadv_unicast_prepare_skb(skb, orig_node);
434 break;
435 case BATADV_UNICAST_4ADDR:
436 batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
437 packet_subtype);
438 break;
439 default:
440 /* this function supports UNICAST and UNICAST_4ADDR only. It
441 * should never be invoked with any other packet type
442 */
328 goto out; 443 goto out;
444 }
329 445
330 unicast_packet = (struct batadv_unicast_packet *)skb->data; 446 unicast_packet = (struct batadv_unicast_packet *)skb->data;
331 447
332 unicast_packet->header.version = BATADV_COMPAT_VERSION;
333 /* batman packet type: unicast */
334 unicast_packet->header.packet_type = BATADV_UNICAST;
335 /* set unicast ttl */
336 unicast_packet->header.ttl = BATADV_TTL;
337 /* copy the destination for faster routing */
338 memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
339 /* set the destination tt version number */
340 unicast_packet->ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
341
342 /* inform the destination node that we are still missing a correct route 448 /* inform the destination node that we are still missing a correct route
343 * for this client. The destination will receive this packet and will 449 * for this client. The destination will receive this packet and will
344 * try to reroute it because the ttvn contained in the header is less 450 * try to reroute it because the ttvn contained in the header is less
@@ -348,7 +454,9 @@ find_router:
348 unicast_packet->ttvn = unicast_packet->ttvn - 1; 454 unicast_packet->ttvn = unicast_packet->ttvn - 1;
349 455
350 dev_mtu = neigh_node->if_incoming->net_dev->mtu; 456 dev_mtu = neigh_node->if_incoming->net_dev->mtu;
351 if (atomic_read(&bat_priv->fragmentation) && 457 /* fragmentation mechanism only works for UNICAST (now) */
458 if (packet_type == BATADV_UNICAST &&
459 atomic_read(&bat_priv->fragmentation) &&
352 data_len + sizeof(*unicast_packet) > dev_mtu) { 460 data_len + sizeof(*unicast_packet) > dev_mtu) {
353 /* send frag skb decreases ttl */ 461 /* send frag skb decreases ttl */
354 unicast_packet->header.ttl++; 462 unicast_packet->header.ttl++;
@@ -360,7 +468,6 @@ find_router:
360 468
361 batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); 469 batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
362 ret = 0; 470 ret = 0;
363 goto out;
364 471
365out: 472out:
366 if (neigh_node) 473 if (neigh_node)
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
index 1c46e2eb1ef9..61abba58bd8f 100644
--- a/net/batman-adv/unicast.h
+++ b/net/batman-adv/unicast.h
@@ -29,10 +29,44 @@ int batadv_frag_reassemble_skb(struct sk_buff *skb,
29 struct batadv_priv *bat_priv, 29 struct batadv_priv *bat_priv,
30 struct sk_buff **new_skb); 30 struct sk_buff **new_skb);
31void batadv_frag_list_free(struct list_head *head); 31void batadv_frag_list_free(struct list_head *head);
32int batadv_unicast_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv);
33int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv, 32int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv,
34 struct batadv_hard_iface *hard_iface, 33 struct batadv_hard_iface *hard_iface,
35 const uint8_t dstaddr[]); 34 const uint8_t dstaddr[]);
35bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
36 struct sk_buff *skb,
37 struct batadv_orig_node *orig_node,
38 int packet_subtype);
39int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv,
40 struct sk_buff *skb, int packet_type,
41 int packet_subtype);
42
43
44/**
45 * batadv_unicast_send_skb - send the skb encapsulated in a unicast packet
46 * @bat_priv: the bat priv with all the soft interface information
47 * @skb: the payload to send
48 */
49static inline int batadv_unicast_send_skb(struct batadv_priv *bat_priv,
50 struct sk_buff *skb)
51{
52 return batadv_unicast_generic_send_skb(bat_priv, skb, BATADV_UNICAST,
53 0);
54}
55
56/**
57 * batadv_unicast_send_skb - send the skb encapsulated in a unicast4addr packet
58 * @bat_priv: the bat priv with all the soft interface information
59 * @skb: the payload to send
60 * @packet_subtype: the batman 4addr packet subtype to use
61 */
62static inline int batadv_unicast_4addr_send_skb(struct batadv_priv *bat_priv,
63 struct sk_buff *skb,
64 int packet_subtype)
65{
66 return batadv_unicast_generic_send_skb(bat_priv, skb,
67 BATADV_UNICAST_4ADDR,
68 packet_subtype);
69}
36 70
37static inline int batadv_frag_can_reassemble(const struct sk_buff *skb, int mtu) 71static inline int batadv_frag_can_reassemble(const struct sk_buff *skb, int mtu)
38{ 72{
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 5abd1454fb07..ad14a6c91d6a 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -396,12 +396,12 @@ batadv_add_packet(struct batadv_priv *bat_priv,
396 return NULL; 396 return NULL;
397 397
398 len = sizeof(*packet) + vis_info_len; 398 len = sizeof(*packet) + vis_info_len;
399 info->skb_packet = dev_alloc_skb(len + ETH_HLEN); 399 info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN);
400 if (!info->skb_packet) { 400 if (!info->skb_packet) {
401 kfree(info); 401 kfree(info);
402 return NULL; 402 return NULL;
403 } 403 }
404 skb_reserve(info->skb_packet, ETH_HLEN); 404 skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN);
405 packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); 405 packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len);
406 406
407 kref_init(&info->refcount); 407 kref_init(&info->refcount);
@@ -873,12 +873,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
873 if (!bat_priv->vis.my_info) 873 if (!bat_priv->vis.my_info)
874 goto err; 874 goto err;
875 875
876 len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; 876 len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE;
877 len += ETH_HLEN + NET_IP_ALIGN;
877 bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); 878 bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len);
878 if (!bat_priv->vis.my_info->skb_packet) 879 if (!bat_priv->vis.my_info->skb_packet)
879 goto free_info; 880 goto free_info;
880 881
881 skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); 882 skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN);
882 tmp_skb = bat_priv->vis.my_info->skb_packet; 883 tmp_skb = bat_priv->vis.my_info->skb_packet;
883 packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); 884 packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet));
884 885
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 070e8a68cfc6..7c78e2640190 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -313,6 +313,8 @@ static const struct net_device_ops br_netdev_ops = {
313 .ndo_fdb_add = br_fdb_add, 313 .ndo_fdb_add = br_fdb_add,
314 .ndo_fdb_del = br_fdb_delete, 314 .ndo_fdb_del = br_fdb_delete,
315 .ndo_fdb_dump = br_fdb_dump, 315 .ndo_fdb_dump = br_fdb_dump,
316 .ndo_bridge_getlink = br_getlink,
317 .ndo_bridge_setlink = br_setlink,
316}; 318};
317 319
318static void br_dev_free(struct net_device *dev) 320static void br_dev_free(struct net_device *dev)
@@ -356,7 +358,7 @@ void br_dev_setup(struct net_device *dev)
356 br->bridge_id.prio[0] = 0x80; 358 br->bridge_id.prio[0] = 0x80;
357 br->bridge_id.prio[1] = 0x00; 359 br->bridge_id.prio[1] = 0x00;
358 360
359 memcpy(br->group_addr, br_group_address, ETH_ALEN); 361 memcpy(br->group_addr, eth_reserved_addr_base, ETH_ALEN);
360 362
361 br->stp_enabled = BR_NO_STP; 363 br->stp_enabled = BR_NO_STP;
362 br->group_fwd_mask = BR_GROUPFWD_DEFAULT; 364 br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 76f15fda0212..4b34207419b1 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -19,9 +19,6 @@
19#include <linux/export.h> 19#include <linux/export.h>
20#include "br_private.h" 20#include "br_private.h"
21 21
22/* Bridge group multicast address 802.1d (pg 51). */
23const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
24
25/* Hook for brouter */ 22/* Hook for brouter */
26br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; 23br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
27EXPORT_SYMBOL(br_should_route_hook); 24EXPORT_SYMBOL(br_should_route_hook);
@@ -127,18 +124,6 @@ static int br_handle_local_finish(struct sk_buff *skb)
127 return 0; /* process further */ 124 return 0; /* process further */
128} 125}
129 126
130/* Does address match the link local multicast address.
131 * 01:80:c2:00:00:0X
132 */
133static inline int is_link_local(const unsigned char *dest)
134{
135 __be16 *a = (__be16 *)dest;
136 static const __be16 *b = (const __be16 *)br_group_address;
137 static const __be16 m = cpu_to_be16(0xfff0);
138
139 return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
140}
141
142/* 127/*
143 * Return NULL if skb is handled 128 * Return NULL if skb is handled
144 * note: already called with rcu_read_lock 129 * note: already called with rcu_read_lock
@@ -162,7 +147,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
162 147
163 p = br_port_get_rcu(skb->dev); 148 p = br_port_get_rcu(skb->dev);
164 149
165 if (unlikely(is_link_local(dest))) { 150 if (unlikely(is_link_local_ether_addr(dest))) {
166 /* 151 /*
167 * See IEEE 802.1D Table 7-10 Reserved addresses 152 * See IEEE 802.1D Table 7-10 Reserved addresses
168 * 153 *
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 093f527276a3..14b065cbd214 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -111,54 +111,33 @@ errout:
111/* 111/*
112 * Dump information about all ports, in response to GETLINK 112 * Dump information about all ports, in response to GETLINK
113 */ 113 */
114static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 114int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
115 struct net_device *dev)
115{ 116{
116 struct net *net = sock_net(skb->sk); 117 int err = 0;
117 struct net_device *dev; 118 struct net_bridge_port *port = br_port_get_rcu(dev);
118 int idx; 119
119 120 /* not a bridge port */
120 idx = 0; 121 if (!port)
121 rcu_read_lock(); 122 goto out;
122 for_each_netdev_rcu(net, dev) {
123 struct net_bridge_port *port = br_port_get_rcu(dev);
124
125 /* not a bridge port */
126 if (!port || idx < cb->args[0])
127 goto skip;
128
129 if (br_fill_ifinfo(skb, port,
130 NETLINK_CB(cb->skb).portid,
131 cb->nlh->nlmsg_seq, RTM_NEWLINK,
132 NLM_F_MULTI) < 0)
133 break;
134skip:
135 ++idx;
136 }
137 rcu_read_unlock();
138 cb->args[0] = idx;
139 123
140 return skb->len; 124 err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI);
125out:
126 return err;
141} 127}
142 128
143/* 129/*
144 * Change state of port (ie from forwarding to blocking etc) 130 * Change state of port (ie from forwarding to blocking etc)
145 * Used by spanning tree in user space. 131 * Used by spanning tree in user space.
146 */ 132 */
147static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 133int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
148{ 134{
149 struct net *net = sock_net(skb->sk);
150 struct ifinfomsg *ifm; 135 struct ifinfomsg *ifm;
151 struct nlattr *protinfo; 136 struct nlattr *protinfo;
152 struct net_device *dev;
153 struct net_bridge_port *p; 137 struct net_bridge_port *p;
154 u8 new_state; 138 u8 new_state;
155 139
156 if (nlmsg_len(nlh) < sizeof(*ifm))
157 return -EINVAL;
158
159 ifm = nlmsg_data(nlh); 140 ifm = nlmsg_data(nlh);
160 if (ifm->ifi_family != AF_BRIDGE)
161 return -EPFNOSUPPORT;
162 141
163 protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); 142 protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
164 if (!protinfo || nla_len(protinfo) < sizeof(u8)) 143 if (!protinfo || nla_len(protinfo) < sizeof(u8))
@@ -168,10 +147,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
168 if (new_state > BR_STATE_BLOCKING) 147 if (new_state > BR_STATE_BLOCKING)
169 return -EINVAL; 148 return -EINVAL;
170 149
171 dev = __dev_get_by_index(net, ifm->ifi_index);
172 if (!dev)
173 return -ENODEV;
174
175 p = br_port_get_rtnl(dev); 150 p = br_port_get_rtnl(dev);
176 if (!p) 151 if (!p)
177 return -EINVAL; 152 return -EINVAL;
@@ -191,8 +166,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
191 br_port_state_selection(p->br); 166 br_port_state_selection(p->br);
192 spin_unlock_bh(&p->br->lock); 167 spin_unlock_bh(&p->br->lock);
193 168
194 br_ifinfo_notify(RTM_NEWLINK, p);
195
196 return 0; 169 return 0;
197} 170}
198 171
@@ -218,29 +191,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
218 191
219int __init br_netlink_init(void) 192int __init br_netlink_init(void)
220{ 193{
221 int err; 194 return rtnl_link_register(&br_link_ops);
222
223 err = rtnl_link_register(&br_link_ops);
224 if (err < 0)
225 goto err1;
226
227 err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL,
228 br_dump_ifinfo, NULL);
229 if (err)
230 goto err2;
231 err = __rtnl_register(PF_BRIDGE, RTM_SETLINK,
232 br_rtm_setlink, NULL, NULL);
233 if (err)
234 goto err3;
235
236 return 0;
237
238err3:
239 rtnl_unregister_all(PF_BRIDGE);
240err2:
241 rtnl_link_unregister(&br_link_ops);
242err1:
243 return err;
244} 195}
245 196
246void __exit br_netlink_fini(void) 197void __exit br_netlink_fini(void)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 9b278c4ebee1..22111ffd68df 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -158,7 +158,9 @@ struct net_bridge_port
158 158
159static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) 159static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
160{ 160{
161 struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data); 161 struct net_bridge_port *port =
162 rcu_dereference_rtnl(dev->rx_handler_data);
163
162 return br_port_exists(dev) ? port : NULL; 164 return br_port_exists(dev) ? port : NULL;
163} 165}
164 166
@@ -288,7 +290,6 @@ struct br_input_skb_cb {
288 pr_debug("%s: " format, (br)->dev->name, ##args) 290 pr_debug("%s: " format, (br)->dev->name, ##args)
289 291
290extern struct notifier_block br_device_notifier; 292extern struct notifier_block br_device_notifier;
291extern const u8 br_group_address[ETH_ALEN];
292 293
293/* called under bridge lock */ 294/* called under bridge lock */
294static inline int br_is_root_bridge(const struct net_bridge *br) 295static inline int br_is_root_bridge(const struct net_bridge *br)
@@ -553,6 +554,9 @@ extern struct rtnl_link_ops br_link_ops;
553extern int br_netlink_init(void); 554extern int br_netlink_init(void);
554extern void br_netlink_fini(void); 555extern void br_netlink_fini(void);
555extern void br_ifinfo_notify(int event, struct net_bridge_port *port); 556extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
557extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
558extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
559 struct net_device *dev);
556 560
557#ifdef CONFIG_SYSFS 561#ifdef CONFIG_SYSFS
558/* br_sysfs_if.c */ 562/* br_sysfs_if.c */
@@ -566,10 +570,10 @@ extern void br_sysfs_delbr(struct net_device *dev);
566 570
567#else 571#else
568 572
569#define br_sysfs_addif(p) (0) 573static inline int br_sysfs_addif(struct net_bridge_port *p) { return 0; }
570#define br_sysfs_renameif(p) (0) 574static inline int br_sysfs_renameif(struct net_bridge_port *p) { return 0; }
571#define br_sysfs_addbr(dev) (0) 575static inline int br_sysfs_addbr(struct net_device *dev) { return 0; }
572#define br_sysfs_delbr(dev) do { } while(0) 576static inline void br_sysfs_delbr(struct net_device *dev) { return; }
573#endif /* CONFIG_SYSFS */ 577#endif /* CONFIG_SYSFS */
574 578
575#endif 579#endif
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c5c059333eab..cffb76e2161c 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -14,6 +14,7 @@
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/netdevice.h> 16#include <linux/netdevice.h>
17#include <linux/etherdevice.h>
17#include <linux/if_bridge.h> 18#include <linux/if_bridge.h>
18#include <linux/rtnetlink.h> 19#include <linux/rtnetlink.h>
19#include <linux/spinlock.h> 20#include <linux/spinlock.h>
@@ -297,23 +298,18 @@ static ssize_t store_group_addr(struct device *d,
297 const char *buf, size_t len) 298 const char *buf, size_t len)
298{ 299{
299 struct net_bridge *br = to_bridge(d); 300 struct net_bridge *br = to_bridge(d);
300 unsigned int new_addr[6]; 301 u8 new_addr[6];
301 int i; 302 int i;
302 303
303 if (!capable(CAP_NET_ADMIN)) 304 if (!capable(CAP_NET_ADMIN))
304 return -EPERM; 305 return -EPERM;
305 306
306 if (sscanf(buf, "%x:%x:%x:%x:%x:%x", 307 if (sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
307 &new_addr[0], &new_addr[1], &new_addr[2], 308 &new_addr[0], &new_addr[1], &new_addr[2],
308 &new_addr[3], &new_addr[4], &new_addr[5]) != 6) 309 &new_addr[3], &new_addr[4], &new_addr[5]) != 6)
309 return -EINVAL; 310 return -EINVAL;
310 311
311 /* Must be 01:80:c2:00:00:0X */ 312 if (!is_link_local_ether_addr(new_addr))
312 for (i = 0; i < 5; i++)
313 if (new_addr[i] != br_group_address[i])
314 return -EINVAL;
315
316 if (new_addr[5] & ~0xf)
317 return -EINVAL; 313 return -EINVAL;
318 314
319 if (new_addr[5] == 1 || /* 802.3x Pause address */ 315 if (new_addr[5] == 1 || /* 802.3x Pause address */
diff --git a/net/core/dev.c b/net/core/dev.c
index bda6d004f9f0..83232a1be1e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6264,7 +6264,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6264 goto out; 6264 goto out;
6265 6265
6266 /* Ensure the device has been registrered */ 6266 /* Ensure the device has been registrered */
6267 err = -EINVAL;
6268 if (dev->reg_state != NETREG_REGISTERED) 6267 if (dev->reg_state != NETREG_REGISTERED)
6269 goto out; 6268 goto out;
6270 6269
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d92ebb7fbcf..c23543cba132 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,6 +39,7 @@
39#include <linux/reciprocal_div.h> 39#include <linux/reciprocal_div.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/seccomp.h> 41#include <linux/seccomp.h>
42#include <linux/if_vlan.h>
42 43
43/* No hurry in this branch 44/* No hurry in this branch
44 * 45 *
@@ -341,6 +342,12 @@ load_b:
341 case BPF_S_ANC_CPU: 342 case BPF_S_ANC_CPU:
342 A = raw_smp_processor_id(); 343 A = raw_smp_processor_id();
343 continue; 344 continue;
345 case BPF_S_ANC_VLAN_TAG:
346 A = vlan_tx_tag_get(skb);
347 continue;
348 case BPF_S_ANC_VLAN_TAG_PRESENT:
349 A = !!vlan_tx_tag_present(skb);
350 continue;
344 case BPF_S_ANC_NLATTR: { 351 case BPF_S_ANC_NLATTR: {
345 struct nlattr *nla; 352 struct nlattr *nla;
346 353
@@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
600 ANCILLARY(RXHASH); 607 ANCILLARY(RXHASH);
601 ANCILLARY(CPU); 608 ANCILLARY(CPU);
602 ANCILLARY(ALU_XOR_X); 609 ANCILLARY(ALU_XOR_X);
610 ANCILLARY(VLAN_TAG);
611 ANCILLARY(VLAN_TAG_PRESENT);
603 } 612 }
604 } 613 }
605 ftest->code = code; 614 ftest->code = code;
@@ -751,3 +760,133 @@ int sk_detach_filter(struct sock *sk)
751 return ret; 760 return ret;
752} 761}
753EXPORT_SYMBOL_GPL(sk_detach_filter); 762EXPORT_SYMBOL_GPL(sk_detach_filter);
763
764static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
765{
766 static const u16 decodes[] = {
767 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
768 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
769 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
770 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
771 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
772 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
773 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
774 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
775 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
776 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
777 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
778 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
779 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
780 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
781 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
782 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
783 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
784 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
785 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
786 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
787 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
788 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
789 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
790 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
791 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
792 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
793 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
794 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
795 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
796 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
797 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
798 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
799 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
800 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
801 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
802 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
803 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
804 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
805 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
806 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
807 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
808 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
809 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
810 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
811 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
812 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
813 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
814 [BPF_S_RET_K] = BPF_RET|BPF_K,
815 [BPF_S_RET_A] = BPF_RET|BPF_A,
816 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
817 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
818 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
819 [BPF_S_ST] = BPF_ST,
820 [BPF_S_STX] = BPF_STX,
821 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
822 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
823 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
824 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
825 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
826 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
827 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
828 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
829 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
830 };
831 u16 code;
832
833 code = filt->code;
834
835 to->code = decodes[code];
836 to->jt = filt->jt;
837 to->jf = filt->jf;
838
839 if (code == BPF_S_ALU_DIV_K) {
840 /*
841 * When loaded this rule user gave us X, which was
842 * translated into R = r(X). Now we calculate the
843 * RR = r(R) and report it back. If next time this
844 * value is loaded and RRR = r(RR) is calculated
845 * then the R == RRR will be true.
846 *
847 * One exception. X == 1 translates into R == 0 and
848 * we can't calculate RR out of it with r().
849 */
850
851 if (filt->k == 0)
852 to->k = 1;
853 else
854 to->k = reciprocal_value(filt->k);
855
856 BUG_ON(reciprocal_value(to->k) != filt->k);
857 } else
858 to->k = filt->k;
859}
860
861int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
862{
863 struct sk_filter *filter;
864 int i, ret;
865
866 lock_sock(sk);
867 filter = rcu_dereference_protected(sk->sk_filter,
868 sock_owned_by_user(sk));
869 ret = 0;
870 if (!filter)
871 goto out;
872 ret = filter->len;
873 if (!len)
874 goto out;
875 ret = -EINVAL;
876 if (len < filter->len)
877 goto out;
878
879 ret = -EFAULT;
880 for (i = 0; i < filter->len; i++) {
881 struct sock_filter fb;
882
883 sk_decode_filter(&filter->insns[i], &fb);
884 if (copy_to_user(&ubuf[i], &fb, sizeof(fb)))
885 goto out;
886 }
887
888 ret = filter->len;
889out:
890 release_sock(sk);
891 return ret;
892}
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 79285a36035f..847c02b197b0 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -248,7 +248,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
248 return 0; 248 return 0;
249} 249}
250 250
251void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 251static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
252{ 252{
253 struct task_struct *p; 253 struct task_struct *p;
254 void *v; 254 void *v;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d1dc14c2aac4..b29dacf900f9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -419,20 +419,6 @@ struct pktgen_thread {
419#define REMOVE 1 419#define REMOVE 1
420#define FIND 0 420#define FIND 0
421 421
422static inline ktime_t ktime_now(void)
423{
424 struct timespec ts;
425 ktime_get_ts(&ts);
426
427 return timespec_to_ktime(ts);
428}
429
430/* This works even if 32 bit because of careful byte order choice */
431static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
432{
433 return cmp1.tv64 < cmp2.tv64;
434}
435
436static const char version[] = 422static const char version[] =
437 "Packet Generator for packet performance testing. " 423 "Packet Generator for packet performance testing. "
438 "Version: " VERSION "\n"; 424 "Version: " VERSION "\n";
@@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
675 seq_puts(seq, "\n"); 661 seq_puts(seq, "\n");
676 662
677 /* not really stopped, more like last-running-at */ 663 /* not really stopped, more like last-running-at */
678 stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at; 664 stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at;
679 idle = pkt_dev->idle_acc; 665 idle = pkt_dev->idle_acc;
680 do_div(idle, NSEC_PER_USEC); 666 do_div(idle, NSEC_PER_USEC);
681 667
@@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2141 return; 2127 return;
2142 } 2128 }
2143 2129
2144 start_time = ktime_now(); 2130 start_time = ktime_get();
2145 if (remaining < 100000) { 2131 if (remaining < 100000) {
2146 /* for small delays (<100us), just loop until limit is reached */ 2132 /* for small delays (<100us), just loop until limit is reached */
2147 do { 2133 do {
2148 end_time = ktime_now(); 2134 end_time = ktime_get();
2149 } while (ktime_lt(end_time, spin_until)); 2135 } while (ktime_compare(end_time, spin_until) < 0);
2150 } else { 2136 } else {
2151 /* see do_nanosleep */ 2137 /* see do_nanosleep */
2152 hrtimer_init_sleeper(&t, current); 2138 hrtimer_init_sleeper(&t, current);
@@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2162 hrtimer_cancel(&t.timer); 2148 hrtimer_cancel(&t.timer);
2163 } while (t.task && pkt_dev->running && !signal_pending(current)); 2149 } while (t.task && pkt_dev->running && !signal_pending(current));
2164 __set_current_state(TASK_RUNNING); 2150 __set_current_state(TASK_RUNNING);
2165 end_time = ktime_now(); 2151 end_time = ktime_get();
2166 } 2152 }
2167 2153
2168 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); 2154 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
@@ -2427,11 +2413,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2427 } 2413 }
2428 } else { /* IPV6 * */ 2414 } else { /* IPV6 * */
2429 2415
2430 if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && 2416 if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) {
2431 pkt_dev->min_in6_daddr.s6_addr32[1] == 0 &&
2432 pkt_dev->min_in6_daddr.s6_addr32[2] == 0 &&
2433 pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ;
2434 else {
2435 int i; 2417 int i;
2436 2418
2437 /* Only random destinations yet */ 2419 /* Only random destinations yet */
@@ -2916,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t)
2916 pktgen_clear_counters(pkt_dev); 2898 pktgen_clear_counters(pkt_dev);
2917 pkt_dev->running = 1; /* Cranke yeself! */ 2899 pkt_dev->running = 1; /* Cranke yeself! */
2918 pkt_dev->skb = NULL; 2900 pkt_dev->skb = NULL;
2919 pkt_dev->started_at = 2901 pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
2920 pkt_dev->next_tx = ktime_now();
2921 2902
2922 set_pkt_overhead(pkt_dev); 2903 set_pkt_overhead(pkt_dev);
2923 2904
@@ -3076,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
3076 3057
3077 kfree_skb(pkt_dev->skb); 3058 kfree_skb(pkt_dev->skb);
3078 pkt_dev->skb = NULL; 3059 pkt_dev->skb = NULL;
3079 pkt_dev->stopped_at = ktime_now(); 3060 pkt_dev->stopped_at = ktime_get();
3080 pkt_dev->running = 0; 3061 pkt_dev->running = 0;
3081 3062
3082 show_results(pkt_dev, nr_frags); 3063 show_results(pkt_dev, nr_frags);
@@ -3095,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
3095 continue; 3076 continue;
3096 if (best == NULL) 3077 if (best == NULL)
3097 best = pkt_dev; 3078 best = pkt_dev;
3098 else if (ktime_lt(pkt_dev->next_tx, best->next_tx)) 3079 else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
3099 best = pkt_dev; 3080 best = pkt_dev;
3100 } 3081 }
3101 if_unlock(t); 3082 if_unlock(t);
@@ -3180,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
3180 3161
3181static void pktgen_resched(struct pktgen_dev *pkt_dev) 3162static void pktgen_resched(struct pktgen_dev *pkt_dev)
3182{ 3163{
3183 ktime_t idle_start = ktime_now(); 3164 ktime_t idle_start = ktime_get();
3184 schedule(); 3165 schedule();
3185 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); 3166 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
3186} 3167}
3187 3168
3188static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) 3169static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
3189{ 3170{
3190 ktime_t idle_start = ktime_now(); 3171 ktime_t idle_start = ktime_get();
3191 3172
3192 while (atomic_read(&(pkt_dev->skb->users)) != 1) { 3173 while (atomic_read(&(pkt_dev->skb->users)) != 1) {
3193 if (signal_pending(current)) 3174 if (signal_pending(current))
@@ -3198,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
3198 else 3179 else
3199 cpu_relax(); 3180 cpu_relax();
3200 } 3181 }
3201 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); 3182 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
3202} 3183}
3203 3184
3204static void pktgen_xmit(struct pktgen_dev *pkt_dev) 3185static void pktgen_xmit(struct pktgen_dev *pkt_dev)
@@ -3220,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3220 * "never transmit" 3201 * "never transmit"
3221 */ 3202 */
3222 if (unlikely(pkt_dev->delay == ULLONG_MAX)) { 3203 if (unlikely(pkt_dev->delay == ULLONG_MAX)) {
3223 pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); 3204 pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX);
3224 return; 3205 return;
3225 } 3206 }
3226 3207
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fad649ae4dec..a810f6a61372 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
128 if (tab == NULL || tab[msgindex].doit == NULL) 128 if (tab == NULL || tab[msgindex].doit == NULL)
129 tab = rtnl_msg_handlers[PF_UNSPEC]; 129 tab = rtnl_msg_handlers[PF_UNSPEC];
130 130
131 return tab ? tab[msgindex].doit : NULL; 131 return tab[msgindex].doit;
132} 132}
133 133
134static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) 134static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
@@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
143 if (tab == NULL || tab[msgindex].dumpit == NULL) 143 if (tab == NULL || tab[msgindex].dumpit == NULL)
144 tab = rtnl_msg_handlers[PF_UNSPEC]; 144 tab = rtnl_msg_handlers[PF_UNSPEC];
145 145
146 return tab ? tab[msgindex].dumpit : NULL; 146 return tab[msgindex].dumpit;
147} 147}
148 148
149static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) 149static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
@@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
158 if (tab == NULL || tab[msgindex].calcit == NULL) 158 if (tab == NULL || tab[msgindex].calcit == NULL)
159 tab = rtnl_msg_handlers[PF_UNSPEC]; 159 tab = rtnl_msg_handlers[PF_UNSPEC];
160 160
161 return tab ? tab[msgindex].calcit : NULL; 161 return tab[msgindex].calcit;
162} 162}
163 163
164/** 164/**
@@ -2253,6 +2253,211 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2253 return skb->len; 2253 return skb->len;
2254} 2254}
2255 2255
2256int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
2257 struct net_device *dev, u16 mode)
2258{
2259 struct nlmsghdr *nlh;
2260 struct ifinfomsg *ifm;
2261 struct nlattr *br_afspec;
2262 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
2263
2264 nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
2265 if (nlh == NULL)
2266 return -EMSGSIZE;
2267
2268 ifm = nlmsg_data(nlh);
2269 ifm->ifi_family = AF_BRIDGE;
2270 ifm->__ifi_pad = 0;
2271 ifm->ifi_type = dev->type;
2272 ifm->ifi_index = dev->ifindex;
2273 ifm->ifi_flags = dev_get_flags(dev);
2274 ifm->ifi_change = 0;
2275
2276
2277 if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
2278 nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
2279 nla_put_u8(skb, IFLA_OPERSTATE, operstate) ||
2280 (dev->master &&
2281 nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) ||
2282 (dev->addr_len &&
2283 nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
2284 (dev->ifindex != dev->iflink &&
2285 nla_put_u32(skb, IFLA_LINK, dev->iflink)))
2286 goto nla_put_failure;
2287
2288 br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
2289 if (!br_afspec)
2290 goto nla_put_failure;
2291
2292 if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) ||
2293 nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) {
2294 nla_nest_cancel(skb, br_afspec);
2295 goto nla_put_failure;
2296 }
2297 nla_nest_end(skb, br_afspec);
2298
2299 return nlmsg_end(skb, nlh);
2300nla_put_failure:
2301 nlmsg_cancel(skb, nlh);
2302 return -EMSGSIZE;
2303}
2304EXPORT_SYMBOL(ndo_dflt_bridge_getlink);
2305
2306static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
2307{
2308 struct net *net = sock_net(skb->sk);
2309 struct net_device *dev;
2310 int idx = 0;
2311 u32 portid = NETLINK_CB(cb->skb).portid;
2312 u32 seq = cb->nlh->nlmsg_seq;
2313
2314 rcu_read_lock();
2315 for_each_netdev_rcu(net, dev) {
2316 const struct net_device_ops *ops = dev->netdev_ops;
2317 struct net_device *master = dev->master;
2318
2319 if (master && master->netdev_ops->ndo_bridge_getlink) {
2320 if (idx >= cb->args[0] &&
2321 master->netdev_ops->ndo_bridge_getlink(
2322 skb, portid, seq, dev) < 0)
2323 break;
2324 idx++;
2325 }
2326
2327 if (ops->ndo_bridge_getlink) {
2328 if (idx >= cb->args[0] &&
2329 ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0)
2330 break;
2331 idx++;
2332 }
2333 }
2334 rcu_read_unlock();
2335 cb->args[0] = idx;
2336
2337 return skb->len;
2338}
2339
2340static inline size_t bridge_nlmsg_size(void)
2341{
2342 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
2343 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
2344 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
2345 + nla_total_size(sizeof(u32)) /* IFLA_MASTER */
2346 + nla_total_size(sizeof(u32)) /* IFLA_MTU */
2347 + nla_total_size(sizeof(u32)) /* IFLA_LINK */
2348 + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */
2349 + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */
2350 + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */
2351 + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */
2352 + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */
2353}
2354
2355static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
2356{
2357 struct net *net = dev_net(dev);
2358 struct net_device *master = dev->master;
2359 struct sk_buff *skb;
2360 int err = -EOPNOTSUPP;
2361
2362 skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
2363 if (!skb) {
2364 err = -ENOMEM;
2365 goto errout;
2366 }
2367
2368 if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
2369 master && master->netdev_ops->ndo_bridge_getlink) {
2370 err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
2371 if (err < 0)
2372 goto errout;
2373 }
2374
2375 if ((flags & BRIDGE_FLAGS_SELF) &&
2376 dev->netdev_ops->ndo_bridge_getlink) {
2377 err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
2378 if (err < 0)
2379 goto errout;
2380 }
2381
2382 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
2383 return 0;
2384errout:
2385 WARN_ON(err == -EMSGSIZE);
2386 kfree_skb(skb);
2387 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
2388 return err;
2389}
2390
2391static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
2392 void *arg)
2393{
2394 struct net *net = sock_net(skb->sk);
2395 struct ifinfomsg *ifm;
2396 struct net_device *dev;
2397 struct nlattr *br_spec, *attr = NULL;
2398 int rem, err = -EOPNOTSUPP;
2399 u16 oflags, flags = 0;
2400 bool have_flags = false;
2401
2402 if (nlmsg_len(nlh) < sizeof(*ifm))
2403 return -EINVAL;
2404
2405 ifm = nlmsg_data(nlh);
2406 if (ifm->ifi_family != AF_BRIDGE)
2407 return -EPFNOSUPPORT;
2408
2409 dev = __dev_get_by_index(net, ifm->ifi_index);
2410 if (!dev) {
2411 pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
2412 return -ENODEV;
2413 }
2414
2415 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
2416 if (br_spec) {
2417 nla_for_each_nested(attr, br_spec, rem) {
2418 if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
2419 have_flags = true;
2420 flags = nla_get_u16(attr);
2421 break;
2422 }
2423 }
2424 }
2425
2426 oflags = flags;
2427
2428 if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
2429 if (!dev->master ||
2430 !dev->master->netdev_ops->ndo_bridge_setlink) {
2431 err = -EOPNOTSUPP;
2432 goto out;
2433 }
2434
2435 err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh);
2436 if (err)
2437 goto out;
2438
2439 flags &= ~BRIDGE_FLAGS_MASTER;
2440 }
2441
2442 if ((flags & BRIDGE_FLAGS_SELF)) {
2443 if (!dev->netdev_ops->ndo_bridge_setlink)
2444 err = -EOPNOTSUPP;
2445 else
2446 err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
2447
2448 if (!err)
2449 flags &= ~BRIDGE_FLAGS_SELF;
2450 }
2451
2452 if (have_flags)
2453 memcpy(nla_data(attr), &flags, sizeof(flags));
2454 /* Generate event to notify upper layer of bridge change */
2455 if (!err)
2456 err = rtnl_bridge_notify(dev, oflags);
2457out:
2458 return err;
2459}
2460
2256/* Protected by RTNL sempahore. */ 2461/* Protected by RTNL sempahore. */
2257static struct rtattr **rta_buf; 2462static struct rtattr **rta_buf;
2258static int rtattr_max; 2463static int rtattr_max;
@@ -2434,5 +2639,8 @@ void __init rtnetlink_init(void)
2434 rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); 2639 rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
2435 rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); 2640 rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
2436 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); 2641 rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
2642
2643 rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
2644 rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
2437} 2645}
2438 2646
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4007c1437fda..880722e22cc5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb)
519 519
520 uarg = skb_shinfo(skb)->destructor_arg; 520 uarg = skb_shinfo(skb)->destructor_arg;
521 if (uarg->callback) 521 if (uarg->callback)
522 uarg->callback(uarg); 522 uarg->callback(uarg, true);
523 } 523 }
524 524
525 if (skb_has_frag_list(skb)) 525 if (skb_has_frag_list(skb))
@@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb)
635EXPORT_SYMBOL(kfree_skb); 635EXPORT_SYMBOL(kfree_skb);
636 636
637/** 637/**
638 * skb_tx_error - report an sk_buff xmit error
639 * @skb: buffer that triggered an error
640 *
641 * Report xmit error if a device callback is tracking this skb.
642 * skb must be freed afterwards.
643 */
644void skb_tx_error(struct sk_buff *skb)
645{
646 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
647 struct ubuf_info *uarg;
648
649 uarg = skb_shinfo(skb)->destructor_arg;
650 if (uarg->callback)
651 uarg->callback(uarg, false);
652 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
653 }
654}
655EXPORT_SYMBOL(skb_tx_error);
656
657/**
638 * consume_skb - free an skbuff 658 * consume_skb - free an skbuff
639 * @skb: buffer to free 659 * @skb: buffer to free
640 * 660 *
@@ -797,7 +817,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
797 for (i = 0; i < num_frags; i++) 817 for (i = 0; i < num_frags; i++)
798 skb_frag_unref(skb, i); 818 skb_frag_unref(skb, i);
799 819
800 uarg->callback(uarg); 820 uarg->callback(uarg, false);
801 821
802 /* skb frags point to kernel buffers */ 822 /* skb frags point to kernel buffers */
803 for (i = num_frags - 1; i >= 0; i--) { 823 for (i = num_frags - 1; i >= 0; i--) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 8a146cfcc366..06286006a2cc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1074,6 +1074,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1074 case SO_NOFCS: 1074 case SO_NOFCS:
1075 v.val = sock_flag(sk, SOCK_NOFCS); 1075 v.val = sock_flag(sk, SOCK_NOFCS);
1076 break; 1076 break;
1077 case SO_BINDTODEVICE:
1078 v.val = sk->sk_bound_dev_if;
1079 break;
1080 case SO_GET_FILTER:
1081 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1082 if (len < 0)
1083 return len;
1084
1085 goto lenout;
1077 default: 1086 default:
1078 return -ENOPROTOOPT; 1087 return -ENOPROTOOPT;
1079 } 1088 }
@@ -1214,13 +1223,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1214 1223
1215#ifdef CONFIG_CGROUPS 1224#ifdef CONFIG_CGROUPS
1216#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) 1225#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1217void sock_update_classid(struct sock *sk) 1226void sock_update_classid(struct sock *sk, struct task_struct *task)
1218{ 1227{
1219 u32 classid; 1228 u32 classid;
1220 1229
1221 rcu_read_lock(); /* doing current task, which cannot vanish. */ 1230 classid = task_cls_classid(task);
1222 classid = task_cls_classid(current);
1223 rcu_read_unlock();
1224 if (classid != sk->sk_classid) 1231 if (classid != sk->sk_classid)
1225 sk->sk_classid = classid; 1232 sk->sk_classid = classid;
1226} 1233}
@@ -1263,7 +1270,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1263 sock_net_set(sk, get_net(net)); 1270 sock_net_set(sk, get_net(net));
1264 atomic_set(&sk->sk_wmem_alloc, 1); 1271 atomic_set(&sk->sk_wmem_alloc, 1);
1265 1272
1266 sock_update_classid(sk); 1273 sock_update_classid(sk, current);
1267 sock_update_netprioidx(sk, current); 1274 sock_update_netprioidx(sk, current);
1268 } 1275 }
1269 1276
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index ea850ce35d4a..662071b249cc 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -174,8 +174,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
174 * To protect against Request floods, increment retrans 174 * To protect against Request floods, increment retrans
175 * counter (backoff, monitored by dccp_response_timer). 175 * counter (backoff, monitored by dccp_response_timer).
176 */ 176 */
177 req->retrans++; 177 inet_rtx_syn_ack(sk, req);
178 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
179 } 178 }
180 /* Network Duplicate, discard packet */ 179 /* Network Duplicate, discard packet */
181 return NULL; 180 return NULL;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2a6abc163ed2..f6db227c1fd9 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -55,6 +55,7 @@
55#include <linux/sysctl.h> 55#include <linux/sysctl.h>
56#endif 56#endif
57#include <linux/kmod.h> 57#include <linux/kmod.h>
58#include <linux/netconf.h>
58 59
59#include <net/arp.h> 60#include <net/arp.h>
60#include <net/ip.h> 61#include <net/ip.h>
@@ -1442,6 +1443,149 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1442 return 0; 1443 return 0;
1443} 1444}
1444 1445
1446static int inet_netconf_msgsize_devconf(int type)
1447{
1448 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1449 + nla_total_size(4); /* NETCONFA_IFINDEX */
1450
1451 /* type -1 is used for ALL */
1452 if (type == -1 || type == NETCONFA_FORWARDING)
1453 size += nla_total_size(4);
1454 if (type == -1 || type == NETCONFA_RP_FILTER)
1455 size += nla_total_size(4);
1456
1457 return size;
1458}
1459
1460static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1461 struct ipv4_devconf *devconf, u32 portid,
1462 u32 seq, int event, unsigned int flags,
1463 int type)
1464{
1465 struct nlmsghdr *nlh;
1466 struct netconfmsg *ncm;
1467
1468 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1469 flags);
1470 if (nlh == NULL)
1471 return -EMSGSIZE;
1472
1473 ncm = nlmsg_data(nlh);
1474 ncm->ncm_family = AF_INET;
1475
1476 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1477 goto nla_put_failure;
1478
1479 /* type -1 is used for ALL */
1480 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1481 nla_put_s32(skb, NETCONFA_FORWARDING,
1482 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1483 goto nla_put_failure;
1484 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1485 nla_put_s32(skb, NETCONFA_RP_FILTER,
1486 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1487 goto nla_put_failure;
1488
1489 return nlmsg_end(skb, nlh);
1490
1491nla_put_failure:
1492 nlmsg_cancel(skb, nlh);
1493 return -EMSGSIZE;
1494}
1495
1496static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1497 struct ipv4_devconf *devconf)
1498{
1499 struct sk_buff *skb;
1500 int err = -ENOBUFS;
1501
1502 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1503 if (skb == NULL)
1504 goto errout;
1505
1506 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1507 RTM_NEWNETCONF, 0, type);
1508 if (err < 0) {
1509 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1510 WARN_ON(err == -EMSGSIZE);
1511 kfree_skb(skb);
1512 goto errout;
1513 }
1514 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1515 return;
1516errout:
1517 if (err < 0)
1518 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1519}
1520
1521static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1522 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1523 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1524 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1525};
1526
1527static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1528 struct nlmsghdr *nlh,
1529 void *arg)
1530{
1531 struct net *net = sock_net(in_skb->sk);
1532 struct nlattr *tb[NETCONFA_MAX+1];
1533 struct netconfmsg *ncm;
1534 struct sk_buff *skb;
1535 struct ipv4_devconf *devconf;
1536 struct in_device *in_dev;
1537 struct net_device *dev;
1538 int ifindex;
1539 int err;
1540
1541 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1542 devconf_ipv4_policy);
1543 if (err < 0)
1544 goto errout;
1545
1546 err = EINVAL;
1547 if (!tb[NETCONFA_IFINDEX])
1548 goto errout;
1549
1550 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1551 switch (ifindex) {
1552 case NETCONFA_IFINDEX_ALL:
1553 devconf = net->ipv4.devconf_all;
1554 break;
1555 case NETCONFA_IFINDEX_DEFAULT:
1556 devconf = net->ipv4.devconf_dflt;
1557 break;
1558 default:
1559 dev = __dev_get_by_index(net, ifindex);
1560 if (dev == NULL)
1561 goto errout;
1562 in_dev = __in_dev_get_rtnl(dev);
1563 if (in_dev == NULL)
1564 goto errout;
1565 devconf = &in_dev->cnf;
1566 break;
1567 }
1568
1569 err = -ENOBUFS;
1570 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1571 if (skb == NULL)
1572 goto errout;
1573
1574 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1575 NETLINK_CB(in_skb).portid,
1576 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1577 -1);
1578 if (err < 0) {
1579 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1580 WARN_ON(err == -EMSGSIZE);
1581 kfree_skb(skb);
1582 goto errout;
1583 }
1584 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1585errout:
1586 return err;
1587}
1588
1445#ifdef CONFIG_SYSCTL 1589#ifdef CONFIG_SYSCTL
1446 1590
1447static void devinet_copy_dflt_conf(struct net *net, int i) 1591static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -1467,6 +1611,12 @@ static void inet_forward_change(struct net *net)
1467 1611
1468 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1612 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1469 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1613 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1614 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1615 NETCONFA_IFINDEX_ALL,
1616 net->ipv4.devconf_all);
1617 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1618 NETCONFA_IFINDEX_DEFAULT,
1619 net->ipv4.devconf_dflt);
1470 1620
1471 for_each_netdev(net, dev) { 1621 for_each_netdev(net, dev) {
1472 struct in_device *in_dev; 1622 struct in_device *in_dev;
@@ -1474,8 +1624,11 @@ static void inet_forward_change(struct net *net)
1474 dev_disable_lro(dev); 1624 dev_disable_lro(dev);
1475 rcu_read_lock(); 1625 rcu_read_lock();
1476 in_dev = __in_dev_get_rcu(dev); 1626 in_dev = __in_dev_get_rcu(dev);
1477 if (in_dev) 1627 if (in_dev) {
1478 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1628 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1629 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1630 dev->ifindex, &in_dev->cnf);
1631 }
1479 rcu_read_unlock(); 1632 rcu_read_unlock();
1480 } 1633 }
1481} 1634}
@@ -1501,6 +1654,23 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1501 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 1654 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1502 if ((new_value == 0) && (old_value != 0)) 1655 if ((new_value == 0) && (old_value != 0))
1503 rt_cache_flush(net); 1656 rt_cache_flush(net);
1657 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1658 new_value != old_value) {
1659 int ifindex;
1660
1661 if (cnf == net->ipv4.devconf_dflt)
1662 ifindex = NETCONFA_IFINDEX_DEFAULT;
1663 else if (cnf == net->ipv4.devconf_all)
1664 ifindex = NETCONFA_IFINDEX_ALL;
1665 else {
1666 struct in_device *idev =
1667 container_of(cnf, struct in_device,
1668 cnf);
1669 ifindex = idev->dev->ifindex;
1670 }
1671 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1672 ifindex, cnf);
1673 }
1504 } 1674 }
1505 1675
1506 return ret; 1676 return ret;
@@ -1527,15 +1697,23 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
1527 } 1697 }
1528 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 1698 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529 inet_forward_change(net); 1699 inet_forward_change(net);
1530 } else if (*valp) { 1700 } else {
1531 struct ipv4_devconf *cnf = ctl->extra1; 1701 struct ipv4_devconf *cnf = ctl->extra1;
1532 struct in_device *idev = 1702 struct in_device *idev =
1533 container_of(cnf, struct in_device, cnf); 1703 container_of(cnf, struct in_device, cnf);
1534 dev_disable_lro(idev->dev); 1704 if (*valp)
1705 dev_disable_lro(idev->dev);
1706 inet_netconf_notify_devconf(net,
1707 NETCONFA_FORWARDING,
1708 idev->dev->ifindex,
1709 cnf);
1535 } 1710 }
1536 rtnl_unlock(); 1711 rtnl_unlock();
1537 rt_cache_flush(net); 1712 rt_cache_flush(net);
1538 } 1713 } else
1714 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1715 NETCONFA_IFINDEX_DEFAULT,
1716 net->ipv4.devconf_dflt);
1539 } 1717 }
1540 1718
1541 return ret; 1719 return ret;
@@ -1809,5 +1987,7 @@ void __init devinet_init(void)
1809 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); 1987 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 1988 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 1989 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1990 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
1991 NULL, NULL);
1812} 1992}
1813 1993
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 71b125cd5db1..4797a800faf8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -803,7 +803,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
803 unsigned int bytes; 803 unsigned int bytes;
804 804
805 if (!new_size) 805 if (!new_size)
806 new_size = 1; 806 new_size = 16;
807 bytes = new_size * sizeof(struct hlist_head *); 807 bytes = new_size * sizeof(struct hlist_head *);
808 new_info_hash = fib_info_hash_alloc(bytes); 808 new_info_hash = fib_info_hash_alloc(bytes);
809 new_laddrhash = fib_info_hash_alloc(bytes); 809 new_laddrhash = fib_info_hash_alloc(bytes);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d34ce2972c8f..2026542d6836 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -521,21 +521,31 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
521 int *expire, int *resend) 521 int *expire, int *resend)
522{ 522{
523 if (!rskq_defer_accept) { 523 if (!rskq_defer_accept) {
524 *expire = req->retrans >= thresh; 524 *expire = req->num_timeout >= thresh;
525 *resend = 1; 525 *resend = 1;
526 return; 526 return;
527 } 527 }
528 *expire = req->retrans >= thresh && 528 *expire = req->num_timeout >= thresh &&
529 (!inet_rsk(req)->acked || req->retrans >= max_retries); 529 (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
530 /* 530 /*
531 * Do not resend while waiting for data after ACK, 531 * Do not resend while waiting for data after ACK,
532 * start to resend on end of deferring period to give 532 * start to resend on end of deferring period to give
533 * last chance for data or ACK to create established socket. 533 * last chance for data or ACK to create established socket.
534 */ 534 */
535 *resend = !inet_rsk(req)->acked || 535 *resend = !inet_rsk(req)->acked ||
536 req->retrans >= rskq_defer_accept - 1; 536 req->num_timeout >= rskq_defer_accept - 1;
537} 537}
538 538
539int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
540{
541 int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL);
542
543 if (!err)
544 req->num_retrans++;
545 return err;
546}
547EXPORT_SYMBOL(inet_rtx_syn_ack);
548
539void inet_csk_reqsk_queue_prune(struct sock *parent, 549void inet_csk_reqsk_queue_prune(struct sock *parent,
540 const unsigned long interval, 550 const unsigned long interval,
541 const unsigned long timeout, 551 const unsigned long timeout,
@@ -599,13 +609,14 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
599 req->rsk_ops->syn_ack_timeout(parent, req); 609 req->rsk_ops->syn_ack_timeout(parent, req);
600 if (!expire && 610 if (!expire &&
601 (!resend || 611 (!resend ||
602 !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || 612 !inet_rtx_syn_ack(parent, req) ||
603 inet_rsk(req)->acked)) { 613 inet_rsk(req)->acked)) {
604 unsigned long timeo; 614 unsigned long timeo;
605 615
606 if (req->retrans++ == 0) 616 if (req->num_timeout++ == 0)
607 lopt->qlen_young--; 617 lopt->qlen_young--;
608 timeo = min((timeout << req->retrans), max_rto); 618 timeo = min(timeout << req->num_timeout,
619 max_rto);
609 req->expires = now + timeo; 620 req->expires = now + timeo;
610 reqp = &req->dl_next; 621 reqp = &req->dl_next;
611 continue; 622 continue;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 0c34bfabc11f..cb98cbed1973 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -105,6 +105,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
105 r->id.idiag_src[0] = inet->inet_rcv_saddr; 105 r->id.idiag_src[0] = inet->inet_rcv_saddr;
106 r->id.idiag_dst[0] = inet->inet_daddr; 106 r->id.idiag_dst[0] = inet->inet_daddr;
107 107
108 if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
109 goto errout;
110
108 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, 111 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
109 * hence this needs to be included regardless of socket family. 112 * hence this needs to be included regardless of socket family.
110 */ 113 */
@@ -617,7 +620,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
617 r->idiag_family = sk->sk_family; 620 r->idiag_family = sk->sk_family;
618 r->idiag_state = TCP_SYN_RECV; 621 r->idiag_state = TCP_SYN_RECV;
619 r->idiag_timer = 1; 622 r->idiag_timer = 1;
620 r->idiag_retrans = req->retrans; 623 r->idiag_retrans = req->num_retrans;
621 624
622 r->id.idiag_if = sk->sk_bound_dev_if; 625 r->id.idiag_if = sk->sk_bound_dev_if;
623 sock_diag_save_cookie(req, r->id.idiag_cookie); 626 sock_diag_save_cookie(req, r->id.idiag_cookie);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 798358b10717..d763701cff1b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1500,8 +1500,10 @@ static int __init ip_auto_config(void)
1500 * Clue in the operator. 1500 * Clue in the operator.
1501 */ 1501 */
1502 pr_info("IP-Config: Complete:\n"); 1502 pr_info("IP-Config: Complete:\n");
1503 pr_info(" device=%s, addr=%pI4, mask=%pI4, gw=%pI4\n", 1503
1504 ic_dev->name, &ic_myaddr, &ic_netmask, &ic_gateway); 1504 pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n",
1505 ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr,
1506 &ic_myaddr, &ic_netmask, &ic_gateway);
1505 pr_info(" host=%s, domain=%s, nis-domain=%s\n", 1507 pr_info(" host=%s, domain=%s, nis-domain=%s\n",
1506 utsname()->nodename, ic_domain, utsname()->domainname); 1508 utsname()->nodename, ic_domain, utsname()->domainname);
1507 pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s", 1509 pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s",
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e15b45297c09..720855e41100 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -138,6 +138,7 @@ struct ipip_net {
138static int ipip_tunnel_init(struct net_device *dev); 138static int ipip_tunnel_init(struct net_device *dev);
139static void ipip_tunnel_setup(struct net_device *dev); 139static void ipip_tunnel_setup(struct net_device *dev);
140static void ipip_dev_free(struct net_device *dev); 140static void ipip_dev_free(struct net_device *dev);
141static struct rtnl_link_ops ipip_link_ops __read_mostly;
141 142
142/* 143/*
143 * Locking : hash tables are protected by RCU and RTNL 144 * Locking : hash tables are protected by RCU and RTNL
@@ -305,6 +306,7 @@ static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
305 goto failed_free; 306 goto failed_free;
306 307
307 strcpy(nt->parms.name, dev->name); 308 strcpy(nt->parms.name, dev->name);
309 dev->rtnl_link_ops = &ipip_link_ops;
308 310
309 dev_hold(dev); 311 dev_hold(dev);
310 ipip_tunnel_link(ipn, nt); 312 ipip_tunnel_link(ipn, nt);
@@ -479,6 +481,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
479 if (skb->protocol != htons(ETH_P_IP)) 481 if (skb->protocol != htons(ETH_P_IP))
480 goto tx_error; 482 goto tx_error;
481 483
484 if (skb->ip_summed == CHECKSUM_PARTIAL &&
485 skb_checksum_help(skb))
486 goto tx_error;
487
482 if (tos & 1) 488 if (tos & 1)
483 tos = old_iph->tos; 489 tos = old_iph->tos;
484 490
@@ -773,6 +779,11 @@ static void ipip_dev_free(struct net_device *dev)
773 free_netdev(dev); 779 free_netdev(dev);
774} 780}
775 781
782#define IPIP_FEATURES (NETIF_F_SG | \
783 NETIF_F_FRAGLIST | \
784 NETIF_F_HIGHDMA | \
785 NETIF_F_HW_CSUM)
786
776static void ipip_tunnel_setup(struct net_device *dev) 787static void ipip_tunnel_setup(struct net_device *dev)
777{ 788{
778 dev->netdev_ops = &ipip_netdev_ops; 789 dev->netdev_ops = &ipip_netdev_ops;
@@ -787,6 +798,9 @@ static void ipip_tunnel_setup(struct net_device *dev)
787 dev->features |= NETIF_F_NETNS_LOCAL; 798 dev->features |= NETIF_F_NETNS_LOCAL;
788 dev->features |= NETIF_F_LLTX; 799 dev->features |= NETIF_F_LLTX;
789 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 800 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
801
802 dev->features |= IPIP_FEATURES;
803 dev->hw_features |= IPIP_FEATURES;
790} 804}
791 805
792static int ipip_tunnel_init(struct net_device *dev) 806static int ipip_tunnel_init(struct net_device *dev)
@@ -829,6 +843,47 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
829 return 0; 843 return 0;
830} 844}
831 845
846static size_t ipip_get_size(const struct net_device *dev)
847{
848 return
849 /* IFLA_IPTUN_LINK */
850 nla_total_size(4) +
851 /* IFLA_IPTUN_LOCAL */
852 nla_total_size(4) +
853 /* IFLA_IPTUN_REMOTE */
854 nla_total_size(4) +
855 /* IFLA_IPTUN_TTL */
856 nla_total_size(1) +
857 /* IFLA_IPTUN_TOS */
858 nla_total_size(1) +
859 0;
860}
861
862static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
863{
864 struct ip_tunnel *tunnel = netdev_priv(dev);
865 struct ip_tunnel_parm *parm = &tunnel->parms;
866
867 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
868 nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
869 nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
870 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
871 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos))
872 goto nla_put_failure;
873 return 0;
874
875nla_put_failure:
876 return -EMSGSIZE;
877}
878
879static struct rtnl_link_ops ipip_link_ops __read_mostly = {
880 .kind = "ipip",
881 .maxtype = IFLA_IPTUN_MAX,
882 .priv_size = sizeof(struct ip_tunnel),
883 .get_size = ipip_get_size,
884 .fill_info = ipip_fill_info,
885};
886
832static struct xfrm_tunnel ipip_handler __read_mostly = { 887static struct xfrm_tunnel ipip_handler __read_mostly = {
833 .handler = ipip_rcv, 888 .handler = ipip_rcv,
834 .err_handler = ipip_err, 889 .err_handler = ipip_err,
@@ -925,14 +980,26 @@ static int __init ipip_init(void)
925 return err; 980 return err;
926 err = xfrm4_tunnel_register(&ipip_handler, AF_INET); 981 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
927 if (err < 0) { 982 if (err < 0) {
928 unregister_pernet_device(&ipip_net_ops);
929 pr_info("%s: can't register tunnel\n", __func__); 983 pr_info("%s: can't register tunnel\n", __func__);
984 goto xfrm_tunnel_failed;
930 } 985 }
986 err = rtnl_link_register(&ipip_link_ops);
987 if (err < 0)
988 goto rtnl_link_failed;
989
990out:
931 return err; 991 return err;
992
993rtnl_link_failed:
994 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
995xfrm_tunnel_failed:
996 unregister_pernet_device(&ipip_net_ops);
997 goto out;
932} 998}
933 999
934static void __exit ipip_fini(void) 1000static void __exit ipip_fini(void)
935{ 1001{
1002 rtnl_link_unregister(&ipip_link_ops);
936 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) 1003 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
937 pr_info("%s: can't deregister tunnel\n", __func__); 1004 pr_info("%s: can't deregister tunnel\n", __func__);
938 1005
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index ba48e799b031..b236ef04914f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -340,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
340 } 340 }
341 341
342 req->expires = 0UL; 342 req->expires = 0UL;
343 req->retrans = 0; 343 req->num_retrans = 0;
344 344
345 /* 345 /*
346 * We need to lookup the route here to get at the correct 346 * We need to lookup the route here to get at the correct
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 197c0008503c..733f48593ec3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -536,13 +536,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
536{ 536{
537 struct tcp_sock *tp = tcp_sk(sk); 537 struct tcp_sock *tp = tcp_sk(sk);
538 int answ; 538 int answ;
539 bool slow;
539 540
540 switch (cmd) { 541 switch (cmd) {
541 case SIOCINQ: 542 case SIOCINQ:
542 if (sk->sk_state == TCP_LISTEN) 543 if (sk->sk_state == TCP_LISTEN)
543 return -EINVAL; 544 return -EINVAL;
544 545
545 lock_sock(sk); 546 slow = lock_sock_fast(sk);
546 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) 547 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
547 answ = 0; 548 answ = 0;
548 else if (sock_flag(sk, SOCK_URGINLINE) || 549 else if (sock_flag(sk, SOCK_URGINLINE) ||
@@ -557,7 +558,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
557 answ--; 558 answ--;
558 } else 559 } else
559 answ = tp->urg_seq - tp->copied_seq; 560 answ = tp->urg_seq - tp->copied_seq;
560 release_sock(sk); 561 unlock_sock_fast(sk, slow);
561 break; 562 break;
562 case SIOCATMARK: 563 case SIOCATMARK:
563 answ = tp->urg_data && tp->urg_seq == tp->copied_seq; 564 answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2c2b13a999ea..7839d51fb65b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3552,6 +3552,24 @@ static bool tcp_process_frto(struct sock *sk, int flag)
3552 return false; 3552 return false;
3553} 3553}
3554 3554
3555/* RFC 5961 7 [ACK Throttling] */
3556static void tcp_send_challenge_ack(struct sock *sk)
3557{
3558 /* unprotected vars, we dont care of overwrites */
3559 static u32 challenge_timestamp;
3560 static unsigned int challenge_count;
3561 u32 now = jiffies / HZ;
3562
3563 if (now != challenge_timestamp) {
3564 challenge_timestamp = now;
3565 challenge_count = 0;
3566 }
3567 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3568 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3569 tcp_send_ack(sk);
3570 }
3571}
3572
3555/* This routine deals with incoming acks, but not outgoing ones. */ 3573/* This routine deals with incoming acks, but not outgoing ones. */
3556static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3574static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3557{ 3575{
@@ -3571,8 +3589,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3571 /* If the ack is older than previous acks 3589 /* If the ack is older than previous acks
3572 * then we can probably ignore it. 3590 * then we can probably ignore it.
3573 */ 3591 */
3574 if (before(ack, prior_snd_una)) 3592 if (before(ack, prior_snd_una)) {
3593 /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
3594 if (before(ack, prior_snd_una - tp->max_window)) {
3595 tcp_send_challenge_ack(sk);
3596 return -1;
3597 }
3575 goto old_ack; 3598 goto old_ack;
3599 }
3576 3600
3577 /* If the ack includes data we haven't sent yet, discard 3601 /* If the ack includes data we haven't sent yet, discard
3578 * this segment (RFC793 Section 3.9). 3602 * this segment (RFC793 Section 3.9).
@@ -5244,23 +5268,6 @@ out:
5244} 5268}
5245#endif /* CONFIG_NET_DMA */ 5269#endif /* CONFIG_NET_DMA */
5246 5270
5247static void tcp_send_challenge_ack(struct sock *sk)
5248{
5249 /* unprotected vars, we dont care of overwrites */
5250 static u32 challenge_timestamp;
5251 static unsigned int challenge_count;
5252 u32 now = jiffies / HZ;
5253
5254 if (now != challenge_timestamp) {
5255 challenge_timestamp = now;
5256 challenge_count = 0;
5257 }
5258 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
5259 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
5260 tcp_send_ack(sk);
5261 }
5262}
5263
5264/* Does PAWS and seqno based validation of an incoming segment, flags will 5271/* Does PAWS and seqno based validation of an incoming segment, flags will
5265 * play significant role here. 5272 * play significant role here.
5266 */ 5273 */
@@ -5988,7 +5995,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5988 */ 5995 */
5989 if (req) { 5996 if (req) {
5990 tcp_synack_rtt_meas(sk, req); 5997 tcp_synack_rtt_meas(sk, req);
5991 tp->total_retrans = req->retrans; 5998 tp->total_retrans = req->num_retrans;
5992 5999
5993 reqsk_fastopen_remove(sk, req, false); 6000 reqsk_fastopen_remove(sk, req, false);
5994 } else { 6001 } else {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0c4a64355603..9dd5b34eb112 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -877,10 +877,13 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
877} 877}
878 878
879static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 879static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
880 struct request_values *rvp) 880 struct request_values *rvp)
881{ 881{
882 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 882 int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
883 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); 883
884 if (!res)
885 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
886 return res;
884} 887}
885 888
886/* 889/*
@@ -1070,7 +1073,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1070} 1073}
1071EXPORT_SYMBOL(tcp_md5_do_del); 1074EXPORT_SYMBOL(tcp_md5_do_del);
1072 1075
1073void tcp_clear_md5_list(struct sock *sk) 1076static void tcp_clear_md5_list(struct sock *sk)
1074{ 1077{
1075 struct tcp_sock *tp = tcp_sk(sk); 1078 struct tcp_sock *tp = tcp_sk(sk);
1076 struct tcp_md5sig_key *key; 1079 struct tcp_md5sig_key *key;
@@ -1386,7 +1389,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
1386 struct sock *child; 1389 struct sock *child;
1387 int err; 1390 int err;
1388 1391
1389 req->retrans = 0; 1392 req->num_retrans = 0;
1393 req->num_timeout = 0;
1390 req->sk = NULL; 1394 req->sk = NULL;
1391 1395
1392 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 1396 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
@@ -1741,7 +1745,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1741 1745
1742 tcp_initialize_rcv_mss(newsk); 1746 tcp_initialize_rcv_mss(newsk);
1743 tcp_synack_rtt_meas(newsk, req); 1747 tcp_synack_rtt_meas(newsk, req);
1744 newtp->total_retrans = req->retrans; 1748 newtp->total_retrans = req->num_retrans;
1745 1749
1746#ifdef CONFIG_TCP_MD5SIG 1750#ifdef CONFIG_TCP_MD5SIG
1747 /* Copy over the MD5 key from the original socket */ 1751 /* Copy over the MD5 key from the original socket */
@@ -1919,7 +1923,6 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
1919 1923
1920void tcp_v4_early_demux(struct sk_buff *skb) 1924void tcp_v4_early_demux(struct sk_buff *skb)
1921{ 1925{
1922 struct net *net = dev_net(skb->dev);
1923 const struct iphdr *iph; 1926 const struct iphdr *iph;
1924 const struct tcphdr *th; 1927 const struct tcphdr *th;
1925 struct sock *sk; 1928 struct sock *sk;
@@ -1927,16 +1930,16 @@ void tcp_v4_early_demux(struct sk_buff *skb)
1927 if (skb->pkt_type != PACKET_HOST) 1930 if (skb->pkt_type != PACKET_HOST)
1928 return; 1931 return;
1929 1932
1930 if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) 1933 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1931 return; 1934 return;
1932 1935
1933 iph = ip_hdr(skb); 1936 iph = ip_hdr(skb);
1934 th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); 1937 th = tcp_hdr(skb);
1935 1938
1936 if (th->doff < sizeof(struct tcphdr) / 4) 1939 if (th->doff < sizeof(struct tcphdr) / 4)
1937 return; 1940 return;
1938 1941
1939 sk = __inet_lookup_established(net, &tcp_hashinfo, 1942 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1940 iph->saddr, th->source, 1943 iph->saddr, th->source,
1941 iph->daddr, ntohs(th->dest), 1944 iph->daddr, ntohs(th->dest),
1942 skb->skb_iif); 1945 skb->skb_iif);
@@ -2640,7 +2643,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2640 0, 0, /* could print option size, but that is af dependent. */ 2643 0, 0, /* could print option size, but that is af dependent. */
2641 1, /* timers active (only the expire timer) */ 2644 1, /* timers active (only the expire timer) */
2642 jiffies_delta_to_clock_t(delta), 2645 jiffies_delta_to_clock_t(delta),
2643 req->retrans, 2646 req->num_timeout,
2644 from_kuid_munged(seq_user_ns(f), uid), 2647 from_kuid_munged(seq_user_ns(f), uid),
2645 0, /* non standard timer */ 2648 0, /* non standard timer */
2646 0, /* open_requests have no inode */ 2649 0, /* open_requests have no inode */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a7302d974f32..f35f2dfb6401 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -553,7 +553,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
553 * it can be estimated (approximately) 553 * it can be estimated (approximately)
554 * from another data. 554 * from another data.
555 */ 555 */
556 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); 556 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
557 paws_reject = tcp_paws_reject(&tmp_opt, th->rst); 557 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
558 } 558 }
559 } 559 }
@@ -582,7 +582,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
582 * Note that even if there is new data in the SYN packet 582 * Note that even if there is new data in the SYN packet
583 * they will be thrown away too. 583 * they will be thrown away too.
584 */ 584 */
585 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 585 inet_rtx_syn_ack(sk, req);
586 return NULL; 586 return NULL;
587 } 587 }
588 588
@@ -696,7 +696,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
696 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ 696 /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
697 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) 697 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
698 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; 698 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
699 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ 699 else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */
700 tcp_rsk(req)->snt_synack = 0; 700 tcp_rsk(req)->snt_synack = 0;
701 701
702 /* For Fast Open no more processing is needed (sk is the 702 /* For Fast Open no more processing is needed (sk is the
@@ -706,7 +706,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
706 return sk; 706 return sk;
707 707
708 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ 708 /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
709 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 709 if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
710 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 710 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
711 inet_rsk(req)->acked = 1; 711 inet_rsk(req)->acked = 1;
712 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); 712 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d47c1b4421a3..b78aac30c498 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -318,7 +318,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
318 req = tcp_sk(sk)->fastopen_rsk; 318 req = tcp_sk(sk)->fastopen_rsk;
319 req->rsk_ops->syn_ack_timeout(sk, req); 319 req->rsk_ops->syn_ack_timeout(sk, req);
320 320
321 if (req->retrans >= max_retries) { 321 if (req->num_timeout >= max_retries) {
322 tcp_write_err(sk); 322 tcp_write_err(sk);
323 return; 323 return;
324 } 324 }
@@ -327,10 +327,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
327 * regular retransmit because if the child socket has been accepted 327 * regular retransmit because if the child socket has been accepted
328 * it's not good to give up too easily. 328 * it's not good to give up too easily.
329 */ 329 */
330 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 330 inet_rtx_syn_ack(sk, req);
331 req->retrans++; 331 req->num_timeout++;
332 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 332 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
333 TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); 333 TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
334} 334}
335 335
336/* 336/*
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0424e4e27414..fab23db8ee73 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
81#include <net/pkt_sched.h> 81#include <net/pkt_sched.h>
82#include <linux/if_tunnel.h> 82#include <linux/if_tunnel.h>
83#include <linux/rtnetlink.h> 83#include <linux/rtnetlink.h>
84#include <linux/netconf.h>
84 85
85#ifdef CONFIG_IPV6_PRIVACY 86#ifdef CONFIG_IPV6_PRIVACY
86#include <linux/random.h> 87#include <linux/random.h>
@@ -401,7 +402,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
401 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) 402 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
402 ndev->cnf.accept_dad = -1; 403 ndev->cnf.accept_dad = -1;
403 404
404#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 405#if IS_ENABLED(CONFIG_IPV6_SIT)
405 if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { 406 if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
406 pr_info("%s: Disabled Multicast RS\n", dev->name); 407 pr_info("%s: Disabled Multicast RS\n", dev->name);
407 ndev->cnf.rtr_solicits = 0; 408 ndev->cnf.rtr_solicits = 0;
@@ -460,6 +461,141 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev)
460 return idev; 461 return idev;
461} 462}
462 463
464static int inet6_netconf_msgsize_devconf(int type)
465{
466 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
467 + nla_total_size(4); /* NETCONFA_IFINDEX */
468
469 /* type -1 is used for ALL */
470 if (type == -1 || type == NETCONFA_FORWARDING)
471 size += nla_total_size(4);
472
473 return size;
474}
475
476static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
477 struct ipv6_devconf *devconf, u32 portid,
478 u32 seq, int event, unsigned int flags,
479 int type)
480{
481 struct nlmsghdr *nlh;
482 struct netconfmsg *ncm;
483
484 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
485 flags);
486 if (nlh == NULL)
487 return -EMSGSIZE;
488
489 ncm = nlmsg_data(nlh);
490 ncm->ncm_family = AF_INET6;
491
492 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
493 goto nla_put_failure;
494
495 /* type -1 is used for ALL */
496 if ((type == -1 || type == NETCONFA_FORWARDING) &&
497 nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
498 goto nla_put_failure;
499
500 return nlmsg_end(skb, nlh);
501
502nla_put_failure:
503 nlmsg_cancel(skb, nlh);
504 return -EMSGSIZE;
505}
506
507static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
508 struct ipv6_devconf *devconf)
509{
510 struct sk_buff *skb;
511 int err = -ENOBUFS;
512
513 skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
514 if (skb == NULL)
515 goto errout;
516
517 err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
518 RTM_NEWNETCONF, 0, type);
519 if (err < 0) {
520 /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
521 WARN_ON(err == -EMSGSIZE);
522 kfree_skb(skb);
523 goto errout;
524 }
525 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
526 return;
527errout:
528 if (err < 0)
529 rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
530}
531
532static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
533 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
534 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
535};
536
537static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
538 struct nlmsghdr *nlh,
539 void *arg)
540{
541 struct net *net = sock_net(in_skb->sk);
542 struct nlattr *tb[NETCONFA_MAX+1];
543 struct netconfmsg *ncm;
544 struct sk_buff *skb;
545 struct ipv6_devconf *devconf;
546 struct inet6_dev *in6_dev;
547 struct net_device *dev;
548 int ifindex;
549 int err;
550
551 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
552 devconf_ipv6_policy);
553 if (err < 0)
554 goto errout;
555
556 err = EINVAL;
557 if (!tb[NETCONFA_IFINDEX])
558 goto errout;
559
560 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
561 switch (ifindex) {
562 case NETCONFA_IFINDEX_ALL:
563 devconf = net->ipv6.devconf_all;
564 break;
565 case NETCONFA_IFINDEX_DEFAULT:
566 devconf = net->ipv6.devconf_dflt;
567 break;
568 default:
569 dev = __dev_get_by_index(net, ifindex);
570 if (dev == NULL)
571 goto errout;
572 in6_dev = __in6_dev_get(dev);
573 if (in6_dev == NULL)
574 goto errout;
575 devconf = &in6_dev->cnf;
576 break;
577 }
578
579 err = -ENOBUFS;
580 skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC);
581 if (skb == NULL)
582 goto errout;
583
584 err = inet6_netconf_fill_devconf(skb, ifindex, devconf,
585 NETLINK_CB(in_skb).portid,
586 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
587 -1);
588 if (err < 0) {
589 /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
590 WARN_ON(err == -EMSGSIZE);
591 kfree_skb(skb);
592 goto errout;
593 }
594 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
595errout:
596 return err;
597}
598
463#ifdef CONFIG_SYSCTL 599#ifdef CONFIG_SYSCTL
464static void dev_forward_change(struct inet6_dev *idev) 600static void dev_forward_change(struct inet6_dev *idev)
465{ 601{
@@ -471,7 +607,7 @@ static void dev_forward_change(struct inet6_dev *idev)
471 dev = idev->dev; 607 dev = idev->dev;
472 if (idev->cnf.forwarding) 608 if (idev->cnf.forwarding)
473 dev_disable_lro(dev); 609 dev_disable_lro(dev);
474 if (dev && (dev->flags & IFF_MULTICAST)) { 610 if (dev->flags & IFF_MULTICAST) {
475 if (idev->cnf.forwarding) 611 if (idev->cnf.forwarding)
476 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); 612 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
477 else 613 else
@@ -486,6 +622,8 @@ static void dev_forward_change(struct inet6_dev *idev)
486 else 622 else
487 addrconf_leave_anycast(ifa); 623 addrconf_leave_anycast(ifa);
488 } 624 }
625 inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
626 dev->ifindex, &idev->cnf);
489} 627}
490 628
491 629
@@ -518,6 +656,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
518 *p = newf; 656 *p = newf;
519 657
520 if (p == &net->ipv6.devconf_dflt->forwarding) { 658 if (p == &net->ipv6.devconf_dflt->forwarding) {
659 if ((!newf) ^ (!old))
660 inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
661 NETCONFA_IFINDEX_DEFAULT,
662 net->ipv6.devconf_dflt);
521 rtnl_unlock(); 663 rtnl_unlock();
522 return 0; 664 return 0;
523 } 665 }
@@ -525,6 +667,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
525 if (p == &net->ipv6.devconf_all->forwarding) { 667 if (p == &net->ipv6.devconf_all->forwarding) {
526 net->ipv6.devconf_dflt->forwarding = newf; 668 net->ipv6.devconf_dflt->forwarding = newf;
527 addrconf_forward_change(net, newf); 669 addrconf_forward_change(net, newf);
670 if ((!newf) ^ (!old))
671 inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
672 NETCONFA_IFINDEX_ALL,
673 net->ipv6.devconf_all);
528 } else if ((!newf) ^ (!old)) 674 } else if ((!newf) ^ (!old))
529 dev_forward_change((struct inet6_dev *)table->extra1); 675 dev_forward_change((struct inet6_dev *)table->extra1);
530 rtnl_unlock(); 676 rtnl_unlock();
@@ -553,7 +699,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
553 pr_warn("Freeing alive inet6 address %p\n", ifp); 699 pr_warn("Freeing alive inet6 address %p\n", ifp);
554 return; 700 return;
555 } 701 }
556 dst_release(&ifp->rt->dst); 702 ip6_rt_put(ifp->rt);
557 703
558 kfree_rcu(ifp, rcu); 704 kfree_rcu(ifp, rcu);
559} 705}
@@ -805,7 +951,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
805 rt6_set_expires(rt, expires); 951 rt6_set_expires(rt, expires);
806 } 952 }
807 } 953 }
808 dst_release(&rt->dst); 954 ip6_rt_put(rt);
809 } 955 }
810 956
811 /* clean up prefsrc entries */ 957 /* clean up prefsrc entries */
@@ -1692,7 +1838,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1692 This thing is done here expecting that the whole 1838 This thing is done here expecting that the whole
1693 class of non-broadcast devices need not cloning. 1839 class of non-broadcast devices need not cloning.
1694 */ 1840 */
1695#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 1841#if IS_ENABLED(CONFIG_IPV6_SIT)
1696 if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) 1842 if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
1697 cfg.fc_flags |= RTF_NONEXTHOP; 1843 cfg.fc_flags |= RTF_NONEXTHOP;
1698#endif 1844#endif
@@ -1752,7 +1898,7 @@ static void addrconf_add_mroute(struct net_device *dev)
1752 ip6_route_add(&cfg); 1898 ip6_route_add(&cfg);
1753} 1899}
1754 1900
1755#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 1901#if IS_ENABLED(CONFIG_IPV6_SIT)
1756static void sit_route_add(struct net_device *dev) 1902static void sit_route_add(struct net_device *dev)
1757{ 1903{
1758 struct fib6_config cfg = { 1904 struct fib6_config cfg = {
@@ -1881,8 +2027,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
1881 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, 2027 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
1882 dev, expires, flags); 2028 dev, expires, flags);
1883 } 2029 }
1884 if (rt) 2030 ip6_rt_put(rt);
1885 dst_release(&rt->dst);
1886 } 2031 }
1887 2032
1888 /* Try to figure out our local address for this prefix */ 2033 /* Try to figure out our local address for this prefix */
@@ -2104,7 +2249,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
2104 if (dev == NULL) 2249 if (dev == NULL)
2105 goto err_exit; 2250 goto err_exit;
2106 2251
2107#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 2252#if IS_ENABLED(CONFIG_IPV6_SIT)
2108 if (dev->type == ARPHRD_SIT) { 2253 if (dev->type == ARPHRD_SIT) {
2109 const struct net_device_ops *ops = dev->netdev_ops; 2254 const struct net_device_ops *ops = dev->netdev_ops;
2110 struct ifreq ifr; 2255 struct ifreq ifr;
@@ -2315,7 +2460,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
2315 } 2460 }
2316} 2461}
2317 2462
2318#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 2463#if IS_ENABLED(CONFIG_IPV6_SIT)
2319static void sit_add_v4_addrs(struct inet6_dev *idev) 2464static void sit_add_v4_addrs(struct inet6_dev *idev)
2320{ 2465{
2321 struct in6_addr addr; 2466 struct in6_addr addr;
@@ -2434,7 +2579,7 @@ static void addrconf_dev_config(struct net_device *dev)
2434 addrconf_add_linklocal(idev, &addr); 2579 addrconf_add_linklocal(idev, &addr);
2435} 2580}
2436 2581
2437#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 2582#if IS_ENABLED(CONFIG_IPV6_SIT)
2438static void addrconf_sit_config(struct net_device *dev) 2583static void addrconf_sit_config(struct net_device *dev)
2439{ 2584{
2440 struct inet6_dev *idev; 2585 struct inet6_dev *idev;
@@ -2471,7 +2616,7 @@ static void addrconf_sit_config(struct net_device *dev)
2471} 2616}
2472#endif 2617#endif
2473 2618
2474#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) 2619#if IS_ENABLED(CONFIG_NET_IPGRE)
2475static void addrconf_gre_config(struct net_device *dev) 2620static void addrconf_gre_config(struct net_device *dev)
2476{ 2621{
2477 struct inet6_dev *idev; 2622 struct inet6_dev *idev;
@@ -2601,12 +2746,12 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2601 } 2746 }
2602 2747
2603 switch (dev->type) { 2748 switch (dev->type) {
2604#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 2749#if IS_ENABLED(CONFIG_IPV6_SIT)
2605 case ARPHRD_SIT: 2750 case ARPHRD_SIT:
2606 addrconf_sit_config(dev); 2751 addrconf_sit_config(dev);
2607 break; 2752 break;
2608#endif 2753#endif
2609#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) 2754#if IS_ENABLED(CONFIG_NET_IPGRE)
2610 case ARPHRD_IPGRE: 2755 case ARPHRD_IPGRE:
2611 addrconf_gre_config(dev); 2756 addrconf_gre_config(dev);
2612 break; 2757 break;
@@ -3194,7 +3339,7 @@ void if6_proc_exit(void)
3194} 3339}
3195#endif /* CONFIG_PROC_FS */ 3340#endif /* CONFIG_PROC_FS */
3196 3341
3197#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 3342#if IS_ENABLED(CONFIG_IPV6_MIP6)
3198/* Check if address is a home address configured on any interface. */ 3343/* Check if address is a home address configured on any interface. */
3199int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) 3344int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
3200{ 3345{
@@ -4784,6 +4929,8 @@ int __init addrconf_init(void)
4784 inet6_dump_ifmcaddr, NULL); 4929 inet6_dump_ifmcaddr, NULL);
4785 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, 4930 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
4786 inet6_dump_ifacaddr, NULL); 4931 inet6_dump_ifacaddr, NULL);
4932 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
4933 NULL, NULL);
4787 4934
4788 ipv6_addr_label_rtnl_register(); 4935 ipv6_addr_label_rtnl_register();
4789 4936
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 7e6139508ee7..ecc35b93314b 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -44,7 +44,7 @@
44#define IPV6HDR_BASELEN 8 44#define IPV6HDR_BASELEN 8
45 45
46struct tmp_ext { 46struct tmp_ext {
47#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 47#if IS_ENABLED(CONFIG_IPV6_MIP6)
48 struct in6_addr saddr; 48 struct in6_addr saddr;
49#endif 49#endif
50 struct in6_addr daddr; 50 struct in6_addr daddr;
@@ -152,7 +152,7 @@ bad:
152 return false; 152 return false;
153} 153}
154 154
155#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 155#if IS_ENABLED(CONFIG_IPV6_MIP6)
156/** 156/**
157 * ipv6_rearrange_destopt - rearrange IPv6 destination options header 157 * ipv6_rearrange_destopt - rearrange IPv6 destination options header
158 * @iph: IPv6 header 158 * @iph: IPv6 header
@@ -320,7 +320,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err)
320 memcpy(top_iph, iph_base, IPV6HDR_BASELEN); 320 memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
321 321
322 if (extlen) { 322 if (extlen) {
323#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 323#if IS_ENABLED(CONFIG_IPV6_MIP6)
324 memcpy(&top_iph->saddr, iph_ext, extlen); 324 memcpy(&top_iph->saddr, iph_ext, extlen);
325#else 325#else
326 memcpy(&top_iph->daddr, iph_ext, extlen); 326 memcpy(&top_iph->daddr, iph_ext, extlen);
@@ -385,7 +385,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
385 memcpy(iph_base, top_iph, IPV6HDR_BASELEN); 385 memcpy(iph_base, top_iph, IPV6HDR_BASELEN);
386 386
387 if (extlen) { 387 if (extlen) {
388#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 388#if IS_ENABLED(CONFIG_IPV6_MIP6)
389 memcpy(iph_ext, &top_iph->saddr, extlen); 389 memcpy(iph_ext, &top_iph->saddr, extlen);
390#else 390#else
391 memcpy(iph_ext, &top_iph->daddr, extlen); 391 memcpy(iph_ext, &top_iph->daddr, extlen);
@@ -434,7 +434,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
434 memcpy(top_iph, iph_base, IPV6HDR_BASELEN); 434 memcpy(top_iph, iph_base, IPV6HDR_BASELEN);
435 435
436 if (extlen) { 436 if (extlen) {
437#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 437#if IS_ENABLED(CONFIG_IPV6_MIP6)
438 memcpy(&top_iph->saddr, iph_ext, extlen); 438 memcpy(&top_iph->saddr, iph_ext, extlen);
439#else 439#else
440 memcpy(&top_iph->daddr, iph_ext, extlen); 440 memcpy(&top_iph->daddr, iph_ext, extlen);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index cdf02be5f191..4963c769a13f 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
84 rt = rt6_lookup(net, addr, NULL, 0, 0); 84 rt = rt6_lookup(net, addr, NULL, 0, 0);
85 if (rt) { 85 if (rt) {
86 dev = rt->dst.dev; 86 dev = rt->dst.dev;
87 dst_release(&rt->dst); 87 ip6_rt_put(rt);
88 } else if (ishost) { 88 } else if (ishost) {
89 err = -EADDRNOTAVAIL; 89 err = -EADDRNOTAVAIL;
90 goto error; 90 goto error;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index be2b67d631e5..93cbad2c0aa7 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -769,7 +769,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
769 rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); 769 rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
770 770
771 switch (rthdr->type) { 771 switch (rthdr->type) {
772#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 772#if IS_ENABLED(CONFIG_IPV6_MIP6)
773 case IPV6_SRCRT_TYPE_2: 773 case IPV6_SRCRT_TYPE_2:
774 if (rthdr->hdrlen != 2 || 774 if (rthdr->hdrlen != 2 ||
775 rthdr->segments_left != 1) { 775 rthdr->segments_left != 1) {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index fa3d9c328092..f005acc58b2a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -43,7 +43,7 @@
43#include <net/ndisc.h> 43#include <net/ndisc.h>
44#include <net/ip6_route.h> 44#include <net/ip6_route.h>
45#include <net/addrconf.h> 45#include <net/addrconf.h>
46#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 46#if IS_ENABLED(CONFIG_IPV6_MIP6)
47#include <net/xfrm.h> 47#include <net/xfrm.h>
48#endif 48#endif
49 49
@@ -224,7 +224,7 @@ bad:
224 Destination options header. 224 Destination options header.
225 *****************************/ 225 *****************************/
226 226
227#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 227#if IS_ENABLED(CONFIG_IPV6_MIP6)
228static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) 228static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
229{ 229{
230 struct ipv6_destopt_hao *hao; 230 struct ipv6_destopt_hao *hao;
@@ -288,7 +288,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
288#endif 288#endif
289 289
290static const struct tlvtype_proc tlvprocdestopt_lst[] = { 290static const struct tlvtype_proc tlvprocdestopt_lst[] = {
291#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 291#if IS_ENABLED(CONFIG_IPV6_MIP6)
292 { 292 {
293 .type = IPV6_TLV_HAO, 293 .type = IPV6_TLV_HAO,
294 .func = ipv6_dest_hao, 294 .func = ipv6_dest_hao,
@@ -300,7 +300,7 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = {
300static int ipv6_destopt_rcv(struct sk_buff *skb) 300static int ipv6_destopt_rcv(struct sk_buff *skb)
301{ 301{
302 struct inet6_skb_parm *opt = IP6CB(skb); 302 struct inet6_skb_parm *opt = IP6CB(skb);
303#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 303#if IS_ENABLED(CONFIG_IPV6_MIP6)
304 __u16 dstbuf; 304 __u16 dstbuf;
305#endif 305#endif
306 struct dst_entry *dst = skb_dst(skb); 306 struct dst_entry *dst = skb_dst(skb);
@@ -315,14 +315,14 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
315 } 315 }
316 316
317 opt->lastopt = opt->dst1 = skb_network_header_len(skb); 317 opt->lastopt = opt->dst1 = skb_network_header_len(skb);
318#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 318#if IS_ENABLED(CONFIG_IPV6_MIP6)
319 dstbuf = opt->dst1; 319 dstbuf = opt->dst1;
320#endif 320#endif
321 321
322 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { 322 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
323 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; 323 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
324 opt = IP6CB(skb); 324 opt = IP6CB(skb);
325#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 325#if IS_ENABLED(CONFIG_IPV6_MIP6)
326 opt->nhoff = dstbuf; 326 opt->nhoff = dstbuf;
327#else 327#else
328 opt->nhoff = opt->dst1; 328 opt->nhoff = opt->dst1;
@@ -378,7 +378,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
378looped_back: 378looped_back:
379 if (hdr->segments_left == 0) { 379 if (hdr->segments_left == 0) {
380 switch (hdr->type) { 380 switch (hdr->type) {
381#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 381#if IS_ENABLED(CONFIG_IPV6_MIP6)
382 case IPV6_SRCRT_TYPE_2: 382 case IPV6_SRCRT_TYPE_2:
383 /* Silently discard type 2 header unless it was 383 /* Silently discard type 2 header unless it was
384 * processed by own 384 * processed by own
@@ -404,7 +404,7 @@ looped_back:
404 } 404 }
405 405
406 switch (hdr->type) { 406 switch (hdr->type) {
407#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 407#if IS_ENABLED(CONFIG_IPV6_MIP6)
408 case IPV6_SRCRT_TYPE_2: 408 case IPV6_SRCRT_TYPE_2:
409 if (accept_source_route < 0) 409 if (accept_source_route < 0)
410 goto unknown_rh; 410 goto unknown_rh;
@@ -461,7 +461,7 @@ looped_back:
461 addr += i - 1; 461 addr += i - 1;
462 462
463 switch (hdr->type) { 463 switch (hdr->type) {
464#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 464#if IS_ENABLED(CONFIG_IPV6_MIP6)
465 case IPV6_SRCRT_TYPE_2: 465 case IPV6_SRCRT_TYPE_2:
466 if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, 466 if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
467 (xfrm_address_t *)&ipv6_hdr(skb)->saddr, 467 (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index d9fb9110f607..2e1a432867c0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -100,7 +100,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
100 goto out; 100 goto out;
101 } 101 }
102again: 102again:
103 dst_release(&rt->dst); 103 ip6_rt_put(rt);
104 rt = NULL; 104 rt = NULL;
105 goto out; 105 goto out;
106 106
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 24d69dbca4d6..b4a9fd51dae7 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -280,7 +280,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
280 return 0; 280 return 0;
281} 281}
282 282
283#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 283#if IS_ENABLED(CONFIG_IPV6_MIP6)
284static void mip6_addr_swap(struct sk_buff *skb) 284static void mip6_addr_swap(struct sk_buff *skb)
285{ 285{
286 struct ipv6hdr *iph = ipv6_hdr(skb); 286 struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 24995a93ef8c..710cafd2e1a9 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -672,6 +672,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
672 iter->rt6i_idev == rt->rt6i_idev && 672 iter->rt6i_idev == rt->rt6i_idev &&
673 ipv6_addr_equal(&iter->rt6i_gateway, 673 ipv6_addr_equal(&iter->rt6i_gateway,
674 &rt->rt6i_gateway)) { 674 &rt->rt6i_gateway)) {
675 if (rt->rt6i_nsiblings)
676 rt->rt6i_nsiblings = 0;
675 if (!(iter->rt6i_flags & RTF_EXPIRES)) 677 if (!(iter->rt6i_flags & RTF_EXPIRES))
676 return -EEXIST; 678 return -EEXIST;
677 if (!(rt->rt6i_flags & RTF_EXPIRES)) 679 if (!(rt->rt6i_flags & RTF_EXPIRES))
@@ -680,6 +682,21 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
680 rt6_set_expires(iter, rt->dst.expires); 682 rt6_set_expires(iter, rt->dst.expires);
681 return -EEXIST; 683 return -EEXIST;
682 } 684 }
685 /* If we have the same destination and the same metric,
686 * but not the same gateway, then the route we try to
687 * add is sibling to this route, increment our counter
688 * of siblings, and later we will add our route to the
689 * list.
690 * Only static routes (which don't have flag
691 * RTF_EXPIRES) are used for ECMPv6.
692 *
693 * To avoid long list, we only had siblings if the
694 * route have a gateway.
695 */
696 if (rt->rt6i_flags & RTF_GATEWAY &&
697 !(rt->rt6i_flags & RTF_EXPIRES) &&
698 !(iter->rt6i_flags & RTF_EXPIRES))
699 rt->rt6i_nsiblings++;
683 } 700 }
684 701
685 if (iter->rt6i_metric > rt->rt6i_metric) 702 if (iter->rt6i_metric > rt->rt6i_metric)
@@ -692,6 +709,35 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
692 if (ins == &fn->leaf) 709 if (ins == &fn->leaf)
693 fn->rr_ptr = NULL; 710 fn->rr_ptr = NULL;
694 711
712 /* Link this route to others same route. */
713 if (rt->rt6i_nsiblings) {
714 unsigned int rt6i_nsiblings;
715 struct rt6_info *sibling, *temp_sibling;
716
717 /* Find the first route that have the same metric */
718 sibling = fn->leaf;
719 while (sibling) {
720 if (sibling->rt6i_metric == rt->rt6i_metric) {
721 list_add_tail(&rt->rt6i_siblings,
722 &sibling->rt6i_siblings);
723 break;
724 }
725 sibling = sibling->dst.rt6_next;
726 }
727 /* For each sibling in the list, increment the counter of
728 * siblings. BUG() if counters does not match, list of siblings
729 * is broken!
730 */
731 rt6i_nsiblings = 0;
732 list_for_each_entry_safe(sibling, temp_sibling,
733 &rt->rt6i_siblings, rt6i_siblings) {
734 sibling->rt6i_nsiblings++;
735 BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings);
736 rt6i_nsiblings++;
737 }
738 BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
739 }
740
695 /* 741 /*
696 * insert node 742 * insert node
697 */ 743 */
@@ -1193,6 +1239,17 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1193 if (fn->rr_ptr == rt) 1239 if (fn->rr_ptr == rt)
1194 fn->rr_ptr = NULL; 1240 fn->rr_ptr = NULL;
1195 1241
1242 /* Remove this entry from other siblings */
1243 if (rt->rt6i_nsiblings) {
1244 struct rt6_info *sibling, *next_sibling;
1245
1246 list_for_each_entry_safe(sibling, next_sibling,
1247 &rt->rt6i_siblings, rt6i_siblings)
1248 sibling->rt6i_nsiblings--;
1249 rt->rt6i_nsiblings = 0;
1250 list_del_init(&rt->rt6i_siblings);
1251 }
1252
1196 /* Adjust walkers */ 1253 /* Adjust walkers */
1197 read_lock(&fib6_walker_lock); 1254 read_lock(&fib6_walker_lock);
1198 FOR_WALKERS(w) { 1255 FOR_WALKERS(w) {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index d5cb3c4e66f8..12aa473e9793 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1069,7 +1069,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
1069 dev->mtu = IPV6_MIN_MTU; 1069 dev->mtu = IPV6_MIN_MTU;
1070 } 1070 }
1071 } 1071 }
1072 dst_release(&rt->dst); 1072 ip6_rt_put(rt);
1073 } 1073 }
1074 1074
1075 t->hlen = addend; 1075 t->hlen = addend;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index aece3e792f84..3deaa4e2e8e2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -538,8 +538,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
538 to->tc_index = from->tc_index; 538 to->tc_index = from->tc_index;
539#endif 539#endif
540 nf_copy(to, from); 540 nf_copy(to, from);
541#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 541#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
542 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
543 to->nf_trace = from->nf_trace; 542 to->nf_trace = from->nf_trace;
544#endif 543#endif
545 skb_copy_secmark(to, from); 544 skb_copy_secmark(to, from);
@@ -564,7 +563,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
564 found_rhdr = 1; 563 found_rhdr = 1;
565 break; 564 break;
566 case NEXTHDR_DEST: 565 case NEXTHDR_DEST:
567#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 566#if IS_ENABLED(CONFIG_IPV6_MIP6)
568 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) 567 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
569 break; 568 break;
570#endif 569#endif
@@ -756,7 +755,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
756 if (err == 0) { 755 if (err == 0) {
757 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 756 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
758 IPSTATS_MIB_FRAGOKS); 757 IPSTATS_MIB_FRAGOKS);
759 dst_release(&rt->dst); 758 ip6_rt_put(rt);
760 return 0; 759 return 0;
761 } 760 }
762 761
@@ -768,7 +767,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
768 767
769 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), 768 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
770 IPSTATS_MIB_FRAGFAILS); 769 IPSTATS_MIB_FRAGFAILS);
771 dst_release(&rt->dst); 770 ip6_rt_put(rt);
772 return err; 771 return err;
773 772
774slow_path_clean: 773slow_path_clean:
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cb7e2ded6f08..424ed45ef122 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -83,6 +83,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
83 83
84static int ip6_tnl_dev_init(struct net_device *dev); 84static int ip6_tnl_dev_init(struct net_device *dev);
85static void ip6_tnl_dev_setup(struct net_device *dev); 85static void ip6_tnl_dev_setup(struct net_device *dev);
86static struct rtnl_link_ops ip6_link_ops __read_mostly;
86 87
87static int ip6_tnl_net_id __read_mostly; 88static int ip6_tnl_net_id __read_mostly;
88struct ip6_tnl_net { 89struct ip6_tnl_net {
@@ -299,6 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
299 goto failed_free; 300 goto failed_free;
300 301
301 strcpy(t->parms.name, dev->name); 302 strcpy(t->parms.name, dev->name);
303 dev->rtnl_link_ops = &ip6_link_ops;
302 304
303 dev_hold(dev); 305 dev_hold(dev);
304 ip6_tnl_link(ip6n, t); 306 ip6_tnl_link(ip6n, t);
@@ -663,8 +665,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
663 665
664 icmpv6_send(skb2, rel_type, rel_code, rel_info); 666 icmpv6_send(skb2, rel_type, rel_code, rel_info);
665 667
666 if (rt) 668 ip6_rt_put(rt);
667 dst_release(&rt->dst);
668 669
669 kfree_skb(skb2); 670 kfree_skb(skb2);
670 } 671 }
@@ -1208,7 +1209,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1208 if (dev->mtu < IPV6_MIN_MTU) 1209 if (dev->mtu < IPV6_MIN_MTU)
1209 dev->mtu = IPV6_MIN_MTU; 1210 dev->mtu = IPV6_MIN_MTU;
1210 } 1211 }
1211 dst_release(&rt->dst); 1212 ip6_rt_put(rt);
1212 } 1213 }
1213} 1214}
1214 1215
@@ -1505,6 +1506,55 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1505 return 0; 1506 return 0;
1506} 1507}
1507 1508
1509static size_t ip6_get_size(const struct net_device *dev)
1510{
1511 return
1512 /* IFLA_IPTUN_LINK */
1513 nla_total_size(4) +
1514 /* IFLA_IPTUN_LOCAL */
1515 nla_total_size(sizeof(struct in6_addr)) +
1516 /* IFLA_IPTUN_REMOTE */
1517 nla_total_size(sizeof(struct in6_addr)) +
1518 /* IFLA_IPTUN_TTL */
1519 nla_total_size(1) +
1520 /* IFLA_IPTUN_ENCAP_LIMIT */
1521 nla_total_size(1) +
1522 /* IFLA_IPTUN_FLOWINFO */
1523 nla_total_size(4) +
1524 /* IFLA_IPTUN_FLAGS */
1525 nla_total_size(4) +
1526 0;
1527}
1528
1529static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
1530{
1531 struct ip6_tnl *tunnel = netdev_priv(dev);
1532 struct __ip6_tnl_parm *parm = &tunnel->parms;
1533
1534 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1535 nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
1536 &parm->raddr) ||
1537 nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
1538 &parm->laddr) ||
1539 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
1540 nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
1541 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
1542 nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags))
1543 goto nla_put_failure;
1544 return 0;
1545
1546nla_put_failure:
1547 return -EMSGSIZE;
1548}
1549
1550static struct rtnl_link_ops ip6_link_ops __read_mostly = {
1551 .kind = "ip6tnl",
1552 .maxtype = IFLA_IPTUN_MAX,
1553 .priv_size = sizeof(struct ip6_tnl),
1554 .get_size = ip6_get_size,
1555 .fill_info = ip6_fill_info,
1556};
1557
1508static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { 1558static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1509 .handler = ip4ip6_rcv, 1559 .handler = ip4ip6_rcv,
1510 .err_handler = ip4ip6_err, 1560 .err_handler = ip4ip6_err,
@@ -1613,9 +1663,14 @@ static int __init ip6_tunnel_init(void)
1613 pr_err("%s: can't register ip6ip6\n", __func__); 1663 pr_err("%s: can't register ip6ip6\n", __func__);
1614 goto out_ip6ip6; 1664 goto out_ip6ip6;
1615 } 1665 }
1666 err = rtnl_link_register(&ip6_link_ops);
1667 if (err < 0)
1668 goto rtnl_link_failed;
1616 1669
1617 return 0; 1670 return 0;
1618 1671
1672rtnl_link_failed:
1673 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
1619out_ip6ip6: 1674out_ip6ip6:
1620 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); 1675 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
1621out_ip4ip6: 1676out_ip4ip6:
@@ -1630,6 +1685,7 @@ out_pernet:
1630 1685
1631static void __exit ip6_tunnel_cleanup(void) 1686static void __exit ip6_tunnel_cleanup(void)
1632{ 1687{
1688 rtnl_link_unregister(&ip6_link_ops);
1633 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) 1689 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
1634 pr_info("%s: can't deregister ip4ip6\n", __func__); 1690 pr_info("%s: can't deregister ip4ip6\n", __func__);
1635 1691
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ba6d13d1f1e1..a7bee6a91335 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -397,7 +397,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
397 if (optname == IPV6_RTHDR && opt && opt->srcrt) { 397 if (optname == IPV6_RTHDR && opt && opt->srcrt) {
398 struct ipv6_rt_hdr *rthdr = opt->srcrt; 398 struct ipv6_rt_hdr *rthdr = opt->srcrt;
399 switch (rthdr->type) { 399 switch (rthdr->type) {
400#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 400#if IS_ENABLED(CONFIG_IPV6_MIP6)
401 case IPV6_SRCRT_TYPE_2: 401 case IPV6_SRCRT_TYPE_2:
402 if (rthdr->hdrlen != 2 || 402 if (rthdr->hdrlen != 2 ||
403 rthdr->segments_left != 1) 403 rthdr->segments_left != 1)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 92f8e48e4ba4..b19ed51a45bb 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -163,7 +163,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
163 rt = rt6_lookup(net, addr, NULL, 0, 0); 163 rt = rt6_lookup(net, addr, NULL, 0, 0);
164 if (rt) { 164 if (rt) {
165 dev = rt->dst.dev; 165 dev = rt->dst.dev;
166 dst_release(&rt->dst); 166 ip6_rt_put(rt);
167 } 167 }
168 } else 168 } else
169 dev = dev_get_by_index_rcu(net, ifindex); 169 dev = dev_get_by_index_rcu(net, ifindex);
@@ -260,7 +260,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
260 260
261 if (rt) { 261 if (rt) {
262 dev = rt->dst.dev; 262 dev = rt->dst.dev;
263 dst_release(&rt->dst); 263 ip6_rt_put(rt);
264 } 264 }
265 } else 265 } else
266 dev = dev_get_by_index_rcu(net, ifindex); 266 dev = dev_get_by_index_rcu(net, ifindex);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2edce30ef733..4f47aa5183ae 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -905,7 +905,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
905 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && 905 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
906 net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && 906 net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
907 pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { 907 pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
908 /* XXX: idev->cnf.prixy_ndp */ 908 /* XXX: idev->cnf.proxy_ndp */
909 goto out; 909 goto out;
910 } 910 }
911 911
@@ -1144,7 +1144,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1144 ND_PRINTK(0, err, 1144 ND_PRINTK(0, err,
1145 "RA: %s got default router without neighbour\n", 1145 "RA: %s got default router without neighbour\n",
1146 __func__); 1146 __func__);
1147 dst_release(&rt->dst); 1147 ip6_rt_put(rt);
1148 return; 1148 return;
1149 } 1149 }
1150 } 1150 }
@@ -1169,7 +1169,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1169 ND_PRINTK(0, err, 1169 ND_PRINTK(0, err,
1170 "RA: %s got default router without neighbour\n", 1170 "RA: %s got default router without neighbour\n",
1171 __func__); 1171 __func__);
1172 dst_release(&rt->dst); 1172 ip6_rt_put(rt);
1173 return; 1173 return;
1174 } 1174 }
1175 neigh->flags |= NTF_ROUTER; 1175 neigh->flags |= NTF_ROUTER;
@@ -1325,8 +1325,7 @@ skip_routeinfo:
1325 ND_PRINTK(2, warn, "RA: invalid RA options\n"); 1325 ND_PRINTK(2, warn, "RA: invalid RA options\n");
1326 } 1326 }
1327out: 1327out:
1328 if (rt) 1328 ip6_rt_put(rt);
1329 dst_release(&rt->dst);
1330 if (neigh) 1329 if (neigh)
1331 neigh_release(neigh); 1330 neigh_release(neigh);
1332} 1331}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d7cb04506c3d..10ce76a2cb94 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -207,8 +207,7 @@ ip6t_get_target_c(const struct ip6t_entry *e)
207 return ip6t_get_target((struct ip6t_entry *)e); 207 return ip6t_get_target((struct ip6t_entry *)e);
208} 208}
209 209
210#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 210#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
211 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
212/* This cries for unification! */ 211/* This cries for unification! */
213static const char *const hooknames[] = { 212static const char *const hooknames[] = {
214 [NF_INET_PRE_ROUTING] = "PREROUTING", 213 [NF_INET_PRE_ROUTING] = "PREROUTING",
@@ -381,8 +380,7 @@ ip6t_do_table(struct sk_buff *skb,
381 t = ip6t_get_target_c(e); 380 t = ip6t_get_target_c(e);
382 IP_NF_ASSERT(t->u.kernel.target); 381 IP_NF_ASSERT(t->u.kernel.target);
383 382
384#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 383#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
385 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
386 /* The packet is traced: log it */ 384 /* The packet is traced: log it */
387 if (unlikely(skb->nf_trace)) 385 if (unlikely(skb->nf_trace))
388 trace_packet(skb, hook, in, out, 386 trace_packet(skb, hook, in, out,
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 5d1d8b04d694..5060d54199ab 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -67,7 +67,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
67 if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) 67 if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
68 ret = true; 68 ret = true;
69 out: 69 out:
70 dst_release(&rt->dst); 70 ip6_rt_put(rt);
71 return ret; 71 return ret;
72} 72}
73 73
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 8860d23e61cf..ccb5cbe93549 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -295,7 +295,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
295 }, 295 },
296}; 296};
297 297
298#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 298#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
299 299
300#include <linux/netfilter/nfnetlink.h> 300#include <linux/netfilter/nfnetlink.h>
301#include <linux/netfilter/nfnetlink_conntrack.h> 301#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -346,7 +346,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
346 .invert_tuple = ipv6_invert_tuple, 346 .invert_tuple = ipv6_invert_tuple,
347 .print_tuple = ipv6_print_tuple, 347 .print_tuple = ipv6_print_tuple,
348 .get_l4proto = ipv6_get_l4proto, 348 .get_l4proto = ipv6_get_l4proto,
349#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 349#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
350 .tuple_to_nlattr = ipv6_tuple_to_nlattr, 350 .tuple_to_nlattr = ipv6_tuple_to_nlattr,
351 .nlattr_tuple_size = ipv6_nlattr_tuple_size, 351 .nlattr_tuple_size = ipv6_nlattr_tuple_size,
352 .nlattr_to_tuple = ipv6_nlattr_to_tuple, 352 .nlattr_to_tuple = ipv6_nlattr_to_tuple,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 2d54b2061d68..24df3dde0076 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -232,7 +232,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
232 return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); 232 return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum);
233} 233}
234 234
235#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 235#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
236 236
237#include <linux/netfilter/nfnetlink.h> 237#include <linux/netfilter/nfnetlink.h>
238#include <linux/netfilter/nfnetlink_conntrack.h> 238#include <linux/netfilter/nfnetlink_conntrack.h>
@@ -375,7 +375,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
375 .get_timeouts = icmpv6_get_timeouts, 375 .get_timeouts = icmpv6_get_timeouts,
376 .new = icmpv6_new, 376 .new = icmpv6_new,
377 .error = icmpv6_error, 377 .error = icmpv6_error,
378#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 378#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
379 .tuple_to_nlattr = icmpv6_tuple_to_nlattr, 379 .tuple_to_nlattr = icmpv6_tuple_to_nlattr,
380 .nlattr_tuple_size = icmpv6_nlattr_tuple_size, 380 .nlattr_tuple_size = icmpv6_nlattr_tuple_size,
381 .nlattr_to_tuple = icmpv6_nlattr_to_tuple, 381 .nlattr_to_tuple = icmpv6_nlattr_to_tuple,
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index cdd6d045e42e..aacd121fe8c5 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -19,7 +19,7 @@
19 19
20#include <linux/netfilter_ipv6.h> 20#include <linux/netfilter_ipv6.h>
21#include <linux/netfilter_bridge.h> 21#include <linux/netfilter_bridge.h>
22#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 22#if IS_ENABLED(CONFIG_NF_CONNTRACK)
23#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_conntrack_helper.h> 24#include <net/netfilter/nf_conntrack_helper.h>
25#include <net/netfilter/nf_conntrack_l4proto.h> 25#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -35,7 +35,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
35{ 35{
36 u16 zone = NF_CT_DEFAULT_ZONE; 36 u16 zone = NF_CT_DEFAULT_ZONE;
37 37
38#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 38#if IS_ENABLED(CONFIG_NF_CONNTRACK)
39 if (skb->nfct) 39 if (skb->nfct)
40 zone = nf_ct_zone((struct nf_conn *)skb->nfct); 40 zone = nf_ct_zone((struct nf_conn *)skb->nfct);
41#endif 41#endif
@@ -60,7 +60,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
60{ 60{
61 struct sk_buff *reasm; 61 struct sk_buff *reasm;
62 62
63#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 63#if IS_ENABLED(CONFIG_NF_CONNTRACK)
64 /* Previously seen (loopback)? */ 64 /* Previously seen (loopback)? */
65 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) 65 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
66 return NF_ACCEPT; 66 return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 5d6da784305b..61aaf70f376e 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -84,7 +84,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
84 .manip_pkt = icmpv6_manip_pkt, 84 .manip_pkt = icmpv6_manip_pkt,
85 .in_range = icmpv6_in_range, 85 .in_range = icmpv6_in_range,
86 .unique_tuple = icmpv6_unique_tuple, 86 .unique_tuple = icmpv6_unique_tuple,
87#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 87#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
88 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, 88 .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
89#endif 89#endif
90}; 90};
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index d8e95c77db99..6cd29b1e8b92 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -50,7 +50,7 @@
50#include <net/udp.h> 50#include <net/udp.h>
51#include <net/inet_common.h> 51#include <net/inet_common.h>
52#include <net/tcp_states.h> 52#include <net/tcp_states.h>
53#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 53#if IS_ENABLED(CONFIG_IPV6_MIP6)
54#include <net/mip6.h> 54#include <net/mip6.h>
55#endif 55#endif
56#include <linux/mroute6.h> 56#include <linux/mroute6.h>
@@ -123,7 +123,7 @@ static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
123 return 1; 123 return 1;
124} 124}
125 125
126#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 126#if IS_ENABLED(CONFIG_IPV6_MIP6)
127typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); 127typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
128 128
129static mh_filter_t __rcu *mh_filter __read_mostly; 129static mh_filter_t __rcu *mh_filter __read_mostly;
@@ -184,7 +184,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
184 filtered = icmpv6_filter(sk, skb); 184 filtered = icmpv6_filter(sk, skb);
185 break; 185 break;
186 186
187#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 187#if IS_ENABLED(CONFIG_IPV6_MIP6)
188 case IPPROTO_MH: 188 case IPPROTO_MH:
189 { 189 {
190 /* XXX: To validate MH only once for each packet, 190 /* XXX: To validate MH only once for each packet,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b1e6cf0b95fd..30458726accf 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -57,6 +57,7 @@
57#include <net/xfrm.h> 57#include <net/xfrm.h>
58#include <net/netevent.h> 58#include <net/netevent.h>
59#include <net/netlink.h> 59#include <net/netlink.h>
60#include <net/nexthop.h>
60 61
61#include <asm/uaccess.h> 62#include <asm/uaccess.h>
62 63
@@ -289,6 +290,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 290 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); 291 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 rt->rt6i_genid = rt_genid(net); 292 rt->rt6i_genid = rt_genid(net);
293 INIT_LIST_HEAD(&rt->rt6i_siblings);
294 rt->rt6i_nsiblings = 0;
292 } 295 }
293 return rt; 296 return rt;
294} 297}
@@ -318,13 +321,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
318 } 321 }
319} 322}
320 323
321static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322
323static u32 rt6_peer_genid(void)
324{
325 return atomic_read(&__rt6_peer_genid);
326}
327
328void rt6_bind_peer(struct rt6_info *rt, int create) 324void rt6_bind_peer(struct rt6_info *rt, int create)
329{ 325{
330 struct inet_peer_base *base; 326 struct inet_peer_base *base;
@@ -338,8 +334,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create)
338 if (peer) { 334 if (peer) {
339 if (!rt6_set_peer(rt, peer)) 335 if (!rt6_set_peer(rt, peer))
340 inet_putpeer(peer); 336 inet_putpeer(peer);
341 else
342 rt->rt6i_peer_genid = rt6_peer_genid();
343 } 337 }
344} 338}
345 339
@@ -385,6 +379,69 @@ static bool rt6_need_strict(const struct in6_addr *daddr)
385 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 379 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
386} 380}
387 381
382/* Multipath route selection:
383 * Hash based function using packet header and flowlabel.
384 * Adapted from fib_info_hashfn()
385 */
386static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387 const struct flowi6 *fl6)
388{
389 unsigned int val = fl6->flowi6_proto;
390
391 val ^= (__force u32)fl6->daddr.s6_addr32[0];
392 val ^= (__force u32)fl6->daddr.s6_addr32[1];
393 val ^= (__force u32)fl6->daddr.s6_addr32[2];
394 val ^= (__force u32)fl6->daddr.s6_addr32[3];
395
396 val ^= (__force u32)fl6->saddr.s6_addr32[0];
397 val ^= (__force u32)fl6->saddr.s6_addr32[1];
398 val ^= (__force u32)fl6->saddr.s6_addr32[2];
399 val ^= (__force u32)fl6->saddr.s6_addr32[3];
400
401 /* Work only if this not encapsulated */
402 switch (fl6->flowi6_proto) {
403 case IPPROTO_UDP:
404 case IPPROTO_TCP:
405 case IPPROTO_SCTP:
406 val ^= (__force u16)fl6->fl6_sport;
407 val ^= (__force u16)fl6->fl6_dport;
408 break;
409
410 case IPPROTO_ICMPV6:
411 val ^= (__force u16)fl6->fl6_icmp_type;
412 val ^= (__force u16)fl6->fl6_icmp_code;
413 break;
414 }
415 /* RFC6438 recommands to use flowlabel */
416 val ^= (__force u32)fl6->flowlabel;
417
418 /* Perhaps, we need to tune, this function? */
419 val = val ^ (val >> 7) ^ (val >> 12);
420 return val % candidate_count;
421}
422
423static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424 struct flowi6 *fl6)
425{
426 struct rt6_info *sibling, *next_sibling;
427 int route_choosen;
428
429 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430 /* Don't change the route, if route_choosen == 0
431 * (siblings does not include ourself)
432 */
433 if (route_choosen)
434 list_for_each_entry_safe(sibling, next_sibling,
435 &match->rt6i_siblings, rt6i_siblings) {
436 route_choosen--;
437 if (route_choosen == 0) {
438 match = sibling;
439 break;
440 }
441 }
442 return match;
443}
444
388/* 445/*
389 * Route lookup. Any table->tb6_lock is implied. 446 * Route lookup. Any table->tb6_lock is implied.
390 */ 447 */
@@ -666,7 +723,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
666 else 723 else
667 rt6_set_expires(rt, jiffies + HZ * lifetime); 724 rt6_set_expires(rt, jiffies + HZ * lifetime);
668 725
669 dst_release(&rt->dst); 726 ip6_rt_put(rt);
670 } 727 }
671 return 0; 728 return 0;
672} 729}
@@ -702,6 +759,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
702restart: 759restart:
703 rt = fn->leaf; 760 rt = fn->leaf;
704 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 761 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
762 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
763 rt = rt6_multipath_select(rt, fl6);
705 BACKTRACK(net, &fl6->saddr); 764 BACKTRACK(net, &fl6->saddr);
706out: 765out:
707 dst_use(&rt->dst, jiffies); 766 dst_use(&rt->dst, jiffies);
@@ -863,7 +922,8 @@ restart_2:
863 922
864restart: 923restart:
865 rt = rt6_select(fn, oif, strict | reachable); 924 rt = rt6_select(fn, oif, strict | reachable);
866 925 if (rt->rt6i_nsiblings && oif == 0)
926 rt = rt6_multipath_select(rt, fl6);
867 BACKTRACK(net, &fl6->saddr); 927 BACKTRACK(net, &fl6->saddr);
868 if (rt == net->ipv6.ip6_null_entry || 928 if (rt == net->ipv6.ip6_null_entry ||
869 rt->rt6i_flags & RTF_CACHE) 929 rt->rt6i_flags & RTF_CACHE)
@@ -879,7 +939,7 @@ restart:
879 else 939 else
880 goto out2; 940 goto out2;
881 941
882 dst_release(&rt->dst); 942 ip6_rt_put(rt);
883 rt = nrt ? : net->ipv6.ip6_null_entry; 943 rt = nrt ? : net->ipv6.ip6_null_entry;
884 944
885 dst_hold(&rt->dst); 945 dst_hold(&rt->dst);
@@ -896,7 +956,7 @@ restart:
896 * Race condition! In the gap, when table->tb6_lock was 956 * Race condition! In the gap, when table->tb6_lock was
897 * released someone could insert this route. Relookup. 957 * released someone could insert this route. Relookup.
898 */ 958 */
899 dst_release(&rt->dst); 959 ip6_rt_put(rt);
900 goto relookup; 960 goto relookup;
901 961
902out: 962out:
@@ -1030,14 +1090,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1030 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) 1090 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031 return NULL; 1091 return NULL;
1032 1092
1033 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { 1093 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1034 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1035 if (!rt6_has_peer(rt))
1036 rt6_bind_peer(rt, 0);
1037 rt->rt6i_peer_genid = rt6_peer_genid();
1038 }
1039 return dst; 1094 return dst;
1040 } 1095
1041 return NULL; 1096 return NULL;
1042} 1097}
1043 1098
@@ -1507,7 +1562,7 @@ int ip6_route_add(struct fib6_config *cfg)
1507 goto out; 1562 goto out;
1508 if (dev) { 1563 if (dev) {
1509 if (dev != grt->dst.dev) { 1564 if (dev != grt->dst.dev) {
1510 dst_release(&grt->dst); 1565 ip6_rt_put(grt);
1511 goto out; 1566 goto out;
1512 } 1567 }
1513 } else { 1568 } else {
@@ -1518,7 +1573,7 @@ int ip6_route_add(struct fib6_config *cfg)
1518 } 1573 }
1519 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1574 if (!(grt->rt6i_flags & RTF_GATEWAY))
1520 err = 0; 1575 err = 0;
1521 dst_release(&grt->dst); 1576 ip6_rt_put(grt);
1522 1577
1523 if (err) 1578 if (err)
1524 goto out; 1579 goto out;
@@ -1604,7 +1659,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1604 write_unlock_bh(&table->tb6_lock); 1659 write_unlock_bh(&table->tb6_lock);
1605 1660
1606out: 1661out:
1607 dst_release(&rt->dst); 1662 ip6_rt_put(rt);
1608 return err; 1663 return err;
1609} 1664}
1610 1665
@@ -2249,6 +2304,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2249 [RTA_IIF] = { .type = NLA_U32 }, 2304 [RTA_IIF] = { .type = NLA_U32 },
2250 [RTA_PRIORITY] = { .type = NLA_U32 }, 2305 [RTA_PRIORITY] = { .type = NLA_U32 },
2251 [RTA_METRICS] = { .type = NLA_NESTED }, 2306 [RTA_METRICS] = { .type = NLA_NESTED },
2307 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2252}; 2308};
2253 2309
2254static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2310static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2326,11 +2382,71 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2326 if (tb[RTA_TABLE]) 2382 if (tb[RTA_TABLE])
2327 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2383 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2328 2384
2385 if (tb[RTA_MULTIPATH]) {
2386 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2387 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2388 }
2389
2329 err = 0; 2390 err = 0;
2330errout: 2391errout:
2331 return err; 2392 return err;
2332} 2393}
2333 2394
2395static int ip6_route_multipath(struct fib6_config *cfg, int add)
2396{
2397 struct fib6_config r_cfg;
2398 struct rtnexthop *rtnh;
2399 int remaining;
2400 int attrlen;
2401 int err = 0, last_err = 0;
2402
2403beginning:
2404 rtnh = (struct rtnexthop *)cfg->fc_mp;
2405 remaining = cfg->fc_mp_len;
2406
2407 /* Parse a Multipath Entry */
2408 while (rtnh_ok(rtnh, remaining)) {
2409 memcpy(&r_cfg, cfg, sizeof(*cfg));
2410 if (rtnh->rtnh_ifindex)
2411 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2412
2413 attrlen = rtnh_attrlen(rtnh);
2414 if (attrlen > 0) {
2415 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2416
2417 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2418 if (nla) {
2419 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2420 r_cfg.fc_flags |= RTF_GATEWAY;
2421 }
2422 }
2423 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2424 if (err) {
2425 last_err = err;
2426 /* If we are trying to remove a route, do not stop the
2427 * loop when ip6_route_del() fails (because next hop is
2428 * already gone), we should try to remove all next hops.
2429 */
2430 if (add) {
2431 /* If add fails, we should try to delete all
2432 * next hops that have been already added.
2433 */
2434 add = 0;
2435 goto beginning;
2436 }
2437 }
2438 /* Because each route is added like a single route we remove
2439 * this flag after the first nexthop (if there is a collision,
2440 * we have already fail to add the first nexthop:
2441 * fib6_add_rt2node() has reject it).
2442 */
2443 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2444 rtnh = rtnh_next(rtnh, &remaining);
2445 }
2446
2447 return last_err;
2448}
2449
2334static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2450static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2335{ 2451{
2336 struct fib6_config cfg; 2452 struct fib6_config cfg;
@@ -2340,7 +2456,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
2340 if (err < 0) 2456 if (err < 0)
2341 return err; 2457 return err;
2342 2458
2343 return ip6_route_del(&cfg); 2459 if (cfg.fc_mp)
2460 return ip6_route_multipath(&cfg, 0);
2461 else
2462 return ip6_route_del(&cfg);
2344} 2463}
2345 2464
2346static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2465static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -2352,7 +2471,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
2352 if (err < 0) 2471 if (err < 0)
2353 return err; 2472 return err;
2354 2473
2355 return ip6_route_add(&cfg); 2474 if (cfg.fc_mp)
2475 return ip6_route_multipath(&cfg, 1);
2476 else
2477 return ip6_route_add(&cfg);
2356} 2478}
2357 2479
2358static inline size_t rt6_nlmsg_size(void) 2480static inline size_t rt6_nlmsg_size(void)
@@ -2596,7 +2718,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2596 2718
2597 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2719 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2598 if (!skb) { 2720 if (!skb) {
2599 dst_release(&rt->dst); 2721 ip6_rt_put(rt);
2600 err = -ENOBUFS; 2722 err = -ENOBUFS;
2601 goto errout; 2723 goto errout;
2602 } 2724 }
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3ed54ffd8d50..b543c56cad28 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -68,6 +68,7 @@
68static int ipip6_tunnel_init(struct net_device *dev); 68static int ipip6_tunnel_init(struct net_device *dev);
69static void ipip6_tunnel_setup(struct net_device *dev); 69static void ipip6_tunnel_setup(struct net_device *dev);
70static void ipip6_dev_free(struct net_device *dev); 70static void ipip6_dev_free(struct net_device *dev);
71static struct rtnl_link_ops sit_link_ops __read_mostly;
71 72
72static int sit_net_id __read_mostly; 73static int sit_net_id __read_mostly;
73struct sit_net { 74struct sit_net {
@@ -282,6 +283,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
282 goto failed_free; 283 goto failed_free;
283 284
284 strcpy(nt->parms.name, dev->name); 285 strcpy(nt->parms.name, dev->name);
286 dev->rtnl_link_ops = &sit_link_ops;
285 287
286 dev_hold(dev); 288 dev_hold(dev);
287 289
@@ -1216,6 +1218,47 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1216 return 0; 1218 return 0;
1217} 1219}
1218 1220
1221static size_t sit_get_size(const struct net_device *dev)
1222{
1223 return
1224 /* IFLA_IPTUN_LINK */
1225 nla_total_size(4) +
1226 /* IFLA_IPTUN_LOCAL */
1227 nla_total_size(4) +
1228 /* IFLA_IPTUN_REMOTE */
1229 nla_total_size(4) +
1230 /* IFLA_IPTUN_TTL */
1231 nla_total_size(1) +
1232 /* IFLA_IPTUN_TOS */
1233 nla_total_size(1) +
1234 0;
1235}
1236
1237static int sit_fill_info(struct sk_buff *skb, const struct net_device *dev)
1238{
1239 struct ip_tunnel *tunnel = netdev_priv(dev);
1240 struct ip_tunnel_parm *parm = &tunnel->parms;
1241
1242 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1243 nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
1244 nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
1245 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
1246 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos))
1247 goto nla_put_failure;
1248 return 0;
1249
1250nla_put_failure:
1251 return -EMSGSIZE;
1252}
1253
1254static struct rtnl_link_ops sit_link_ops __read_mostly = {
1255 .kind = "sit",
1256 .maxtype = IFLA_IPTUN_MAX,
1257 .priv_size = sizeof(struct ip_tunnel),
1258 .get_size = sit_get_size,
1259 .fill_info = sit_fill_info,
1260};
1261
1219static struct xfrm_tunnel sit_handler __read_mostly = { 1262static struct xfrm_tunnel sit_handler __read_mostly = {
1220 .handler = ipip6_rcv, 1263 .handler = ipip6_rcv,
1221 .err_handler = ipip6_err, 1264 .err_handler = ipip6_err,
@@ -1302,6 +1345,7 @@ static struct pernet_operations sit_net_ops = {
1302 1345
1303static void __exit sit_cleanup(void) 1346static void __exit sit_cleanup(void)
1304{ 1347{
1348 rtnl_link_unregister(&sit_link_ops);
1305 xfrm4_tunnel_deregister(&sit_handler, AF_INET6); 1349 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1306 1350
1307 unregister_pernet_device(&sit_net_ops); 1351 unregister_pernet_device(&sit_net_ops);
@@ -1319,10 +1363,21 @@ static int __init sit_init(void)
1319 return err; 1363 return err;
1320 err = xfrm4_tunnel_register(&sit_handler, AF_INET6); 1364 err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
1321 if (err < 0) { 1365 if (err < 0) {
1322 unregister_pernet_device(&sit_net_ops);
1323 pr_info("%s: can't add protocol\n", __func__); 1366 pr_info("%s: can't add protocol\n", __func__);
1367 goto xfrm_tunnel_failed;
1324 } 1368 }
1369 err = rtnl_link_register(&sit_link_ops);
1370 if (err < 0)
1371 goto rtnl_link_failed;
1372
1373out:
1325 return err; 1374 return err;
1375
1376rtnl_link_failed:
1377 xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1378xfrm_tunnel_failed:
1379 unregister_pernet_device(&sit_net_ops);
1380 goto out;
1326} 1381}
1327 1382
1328module_init(sit_init); 1383module_init(sit_init);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 182ab9a85d6c..40161977f7cf 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
214 ireq6->iif = inet6_iif(skb); 214 ireq6->iif = inet6_iif(skb);
215 215
216 req->expires = 0UL; 216 req->expires = 0UL;
217 req->retrans = 0; 217 req->num_retrans = 0;
218 ireq->ecn_ok = ecn_ok; 218 ireq->ecn_ok = ecn_ok;
219 ireq->snd_wscale = tcp_opt.snd_wscale; 219 ireq->snd_wscale = tcp_opt.snd_wscale;
220 ireq->sack_ok = tcp_opt.sack_ok; 220 ireq->sack_ok = tcp_opt.sack_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 26175bffbaa0..c73d0ebde9c8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -495,9 +495,12 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
495 struct request_values *rvp) 495 struct request_values *rvp)
496{ 496{
497 struct flowi6 fl6; 497 struct flowi6 fl6;
498 int res;
498 499
499 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 500 res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
500 return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); 501 if (!res)
502 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
503 return res;
501} 504}
502 505
503static void tcp_v6_reqsk_destructor(struct request_sock *req) 506static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -1364,7 +1367,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1364 1367
1365 tcp_initialize_rcv_mss(newsk); 1368 tcp_initialize_rcv_mss(newsk);
1366 tcp_synack_rtt_meas(newsk, req); 1369 tcp_synack_rtt_meas(newsk, req);
1367 newtp->total_retrans = req->retrans; 1370 newtp->total_retrans = req->num_retrans;
1368 1371
1369 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1372 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1370 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1373 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
@@ -1741,11 +1744,11 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
1741 skb->destructor = sock_edemux; 1744 skb->destructor = sock_edemux;
1742 if (sk->sk_state != TCP_TIME_WAIT) { 1745 if (sk->sk_state != TCP_TIME_WAIT) {
1743 struct dst_entry *dst = sk->sk_rx_dst; 1746 struct dst_entry *dst = sk->sk_rx_dst;
1744 struct inet_sock *icsk = inet_sk(sk); 1747
1745 if (dst) 1748 if (dst)
1746 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); 1749 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1747 if (dst && 1750 if (dst &&
1748 icsk->rx_dst_ifindex == skb->skb_iif) 1751 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1749 skb_dst_set_noref(skb, dst); 1752 skb_dst_set_noref(skb, dst);
1750 } 1753 }
1751 } 1754 }
@@ -1866,7 +1869,7 @@ static void get_openreq6(struct seq_file *seq,
1866 0,0, /* could print option size, but that is af dependent. */ 1869 0,0, /* could print option size, but that is af dependent. */
1867 1, /* timers active (only the expire timer) */ 1870 1, /* timers active (only the expire timer) */
1868 jiffies_to_clock_t(ttd), 1871 jiffies_to_clock_t(ttd),
1869 req->retrans, 1872 req->num_timeout,
1870 from_kuid_munged(seq_user_ns(seq), uid), 1873 from_kuid_munged(seq_user_ns(seq), uid),
1871 0, /* non standard timer */ 1874 0, /* non standard timer */
1872 0, /* open_requests have no inode */ 1875 0, /* open_requests have no inode */
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f8c4c08ffb60..f3ed8ca59b94 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,7 +20,7 @@
20#include <net/ip.h> 20#include <net/ip.h>
21#include <net/ipv6.h> 21#include <net/ipv6.h>
22#include <net/ip6_route.h> 22#include <net/ip6_route.h>
23#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 23#if IS_ENABLED(CONFIG_IPV6_MIP6)
24#include <net/mip6.h> 24#include <net/mip6.h>
25#endif 25#endif
26 26
@@ -182,7 +182,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
182 fl6->flowi6_proto = nexthdr; 182 fl6->flowi6_proto = nexthdr;
183 return; 183 return;
184 184
185#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 185#if IS_ENABLED(CONFIG_IPV6_MIP6)
186 case IPPROTO_MH: 186 case IPPROTO_MH:
187 if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { 187 if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
188 struct ip6_mh *mh; 188 struct ip6_mh *mh;
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 3f2f7c4ab721..d8c70b8efc24 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -101,7 +101,7 @@ static int __xfrm6_state_sort_cmp(void *p)
101 return 1; 101 return 1;
102 else 102 else
103 return 3; 103 return 3;
104#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 104#if IS_ENABLED(CONFIG_IPV6_MIP6)
105 case XFRM_MODE_ROUTEOPTIMIZATION: 105 case XFRM_MODE_ROUTEOPTIMIZATION:
106 case XFRM_MODE_IN_TRIGGER: 106 case XFRM_MODE_IN_TRIGGER:
107 return 2; 107 return 2;
@@ -134,7 +134,7 @@ static int __xfrm6_tmpl_sort_cmp(void *p)
134 switch (v->mode) { 134 switch (v->mode) {
135 case XFRM_MODE_TRANSPORT: 135 case XFRM_MODE_TRANSPORT:
136 return 1; 136 return 1;
137#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 137#if IS_ENABLED(CONFIG_IPV6_MIP6)
138 case XFRM_MODE_ROUTEOPTIMIZATION: 138 case XFRM_MODE_ROUTEOPTIMIZATION:
139 case XFRM_MODE_IN_TRIGGER: 139 case XFRM_MODE_IN_TRIGGER:
140 return 2; 140 return 2;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 6c4cc12c7414..bbba3a19e944 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -632,7 +632,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
632 nla_put_u16(skb, L2TP_ATTR_MRU, session->mru))) 632 nla_put_u16(skb, L2TP_ATTR_MRU, session->mru)))
633 goto nla_put_failure; 633 goto nla_put_failure;
634 634
635 if ((session->ifname && session->ifname[0] && 635 if ((session->ifname[0] &&
636 nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) || 636 nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) ||
637 (session->cookie_len && 637 (session->cookie_len &&
638 nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, 638 nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len,
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 8b2cffdfdd99..0c3b1670b0d1 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -28,12 +28,11 @@ if IP_VS
28config IP_VS_IPV6 28config IP_VS_IPV6
29 bool "IPv6 support for IPVS" 29 bool "IPv6 support for IPVS"
30 depends on IPV6 = y || IP_VS = IPV6 30 depends on IPV6 = y || IP_VS = IPV6
31 select IP6_NF_IPTABLES
31 ---help--- 32 ---help---
32 Add IPv6 support to IPVS. This is incomplete and might be dangerous. 33 Add IPv6 support to IPVS.
33 34
34 See http://www.mindbasket.com/ipvs for more information. 35 Say Y if unsure.
35
36 Say N if unsure.
37 36
38config IP_VS_DEBUG 37config IP_VS_DEBUG
39 bool "IP virtual server debugging" 38 bool "IP virtual server debugging"
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 1548df9a7524..30e764ad021f 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -308,13 +308,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
308static int 308static int
309ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, 309ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
310 const struct ip_vs_iphdr *iph, 310 const struct ip_vs_iphdr *iph,
311 unsigned int proto_off, int inverse, 311 int inverse, struct ip_vs_conn_param *p)
312 struct ip_vs_conn_param *p)
313{ 312{
314 __be16 _ports[2], *pptr; 313 __be16 _ports[2], *pptr;
315 struct net *net = skb_net(skb); 314 struct net *net = skb_net(skb);
316 315
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 316 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
318 if (pptr == NULL) 317 if (pptr == NULL)
319 return 1; 318 return 1;
320 319
@@ -329,12 +328,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
329 328
330struct ip_vs_conn * 329struct ip_vs_conn *
331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 330ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
332 const struct ip_vs_iphdr *iph, 331 const struct ip_vs_iphdr *iph, int inverse)
333 unsigned int proto_off, int inverse)
334{ 332{
335 struct ip_vs_conn_param p; 333 struct ip_vs_conn_param p;
336 334
337 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) 335 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
338 return NULL; 336 return NULL;
339 337
340 return ip_vs_conn_in_get(&p); 338 return ip_vs_conn_in_get(&p);
@@ -432,12 +430,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
432 430
433struct ip_vs_conn * 431struct ip_vs_conn *
434ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 432ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
435 const struct ip_vs_iphdr *iph, 433 const struct ip_vs_iphdr *iph, int inverse)
436 unsigned int proto_off, int inverse)
437{ 434{
438 struct ip_vs_conn_param p; 435 struct ip_vs_conn_param p;
439 436
440 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) 437 if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
441 return NULL; 438 return NULL;
442 439
443 return ip_vs_conn_out_get(&p); 440 return ip_vs_conn_out_get(&p);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 58918e20f9d5..fb45640dc1fb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -222,11 +222,10 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
222 */ 222 */
223static struct ip_vs_conn * 223static struct ip_vs_conn *
224ip_vs_sched_persist(struct ip_vs_service *svc, 224ip_vs_sched_persist(struct ip_vs_service *svc,
225 struct sk_buff *skb, 225 struct sk_buff *skb, __be16 src_port, __be16 dst_port,
226 __be16 src_port, __be16 dst_port, int *ignored) 226 int *ignored, struct ip_vs_iphdr *iph)
227{ 227{
228 struct ip_vs_conn *cp = NULL; 228 struct ip_vs_conn *cp = NULL;
229 struct ip_vs_iphdr iph;
230 struct ip_vs_dest *dest; 229 struct ip_vs_dest *dest;
231 struct ip_vs_conn *ct; 230 struct ip_vs_conn *ct;
232 __be16 dport = 0; /* destination port to forward */ 231 __be16 dport = 0; /* destination port to forward */
@@ -236,20 +235,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
236 union nf_inet_addr snet; /* source network of the client, 235 union nf_inet_addr snet; /* source network of the client,
237 after masking */ 236 after masking */
238 237
239 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
240
241 /* Mask saddr with the netmask to adjust template granularity */ 238 /* Mask saddr with the netmask to adjust template granularity */
242#ifdef CONFIG_IP_VS_IPV6 239#ifdef CONFIG_IP_VS_IPV6
243 if (svc->af == AF_INET6) 240 if (svc->af == AF_INET6)
244 ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); 241 ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask);
245 else 242 else
246#endif 243#endif
247 snet.ip = iph.saddr.ip & svc->netmask; 244 snet.ip = iph->saddr.ip & svc->netmask;
248 245
249 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 246 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
250 "mnet %s\n", 247 "mnet %s\n",
251 IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), 248 IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
252 IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), 249 IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
253 IP_VS_DBG_ADDR(svc->af, &snet)); 250 IP_VS_DBG_ADDR(svc->af, &snet));
254 251
255 /* 252 /*
@@ -266,8 +263,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
266 * is created for other persistent services. 263 * is created for other persistent services.
267 */ 264 */
268 { 265 {
269 int protocol = iph.protocol; 266 int protocol = iph->protocol;
270 const union nf_inet_addr *vaddr = &iph.daddr; 267 const union nf_inet_addr *vaddr = &iph->daddr;
271 __be16 vport = 0; 268 __be16 vport = 0;
272 269
273 if (dst_port == svc->port) { 270 if (dst_port == svc->port) {
@@ -342,14 +339,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
342 dport = dest->port; 339 dport = dest->port;
343 340
344 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 341 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
345 && iph.protocol == IPPROTO_UDP)? 342 && iph->protocol == IPPROTO_UDP) ?
346 IP_VS_CONN_F_ONE_PACKET : 0; 343 IP_VS_CONN_F_ONE_PACKET : 0;
347 344
348 /* 345 /*
349 * Create a new connection according to the template 346 * Create a new connection according to the template
350 */ 347 */
351 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, 348 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
352 src_port, &iph.daddr, dst_port, &param); 349 src_port, &iph->daddr, dst_port, &param);
353 350
354 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark); 351 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
355 if (cp == NULL) { 352 if (cp == NULL) {
@@ -392,18 +389,20 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
392 */ 389 */
393struct ip_vs_conn * 390struct ip_vs_conn *
394ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 391ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
395 struct ip_vs_proto_data *pd, int *ignored) 392 struct ip_vs_proto_data *pd, int *ignored,
393 struct ip_vs_iphdr *iph)
396{ 394{
397 struct ip_vs_protocol *pp = pd->pp; 395 struct ip_vs_protocol *pp = pd->pp;
398 struct ip_vs_conn *cp = NULL; 396 struct ip_vs_conn *cp = NULL;
399 struct ip_vs_iphdr iph;
400 struct ip_vs_dest *dest; 397 struct ip_vs_dest *dest;
401 __be16 _ports[2], *pptr; 398 __be16 _ports[2], *pptr;
402 unsigned int flags; 399 unsigned int flags;
403 400
404 *ignored = 1; 401 *ignored = 1;
405 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 402 /*
406 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 403 * IPv6 frags, only the first hit here.
404 */
405 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
407 if (pptr == NULL) 406 if (pptr == NULL)
408 return NULL; 407 return NULL;
409 408
@@ -423,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
423 * Do not schedule replies from local real server. 422 * Do not schedule replies from local real server.
424 */ 423 */
425 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 424 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
426 (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { 425 (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
427 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 426 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
428 "Not scheduling reply for existing connection"); 427 "Not scheduling reply for existing connection");
429 __ip_vs_conn_put(cp); 428 __ip_vs_conn_put(cp);
@@ -434,7 +433,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
434 * Persistent service 433 * Persistent service
435 */ 434 */
436 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 435 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
437 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); 436 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
437 iph);
438 438
439 *ignored = 0; 439 *ignored = 0;
440 440
@@ -456,7 +456,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
456 } 456 }
457 457
458 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 458 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
459 && iph.protocol == IPPROTO_UDP)? 459 && iph->protocol == IPPROTO_UDP) ?
460 IP_VS_CONN_F_ONE_PACKET : 0; 460 IP_VS_CONN_F_ONE_PACKET : 0;
461 461
462 /* 462 /*
@@ -465,9 +465,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
465 { 465 {
466 struct ip_vs_conn_param p; 466 struct ip_vs_conn_param p;
467 467
468 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 468 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
469 &iph.saddr, pptr[0], &iph.daddr, pptr[1], 469 &iph->saddr, pptr[0], &iph->daddr,
470 &p); 470 pptr[1], &p);
471 cp = ip_vs_conn_new(&p, &dest->addr, 471 cp = ip_vs_conn_new(&p, &dest->addr,
472 dest->port ? dest->port : pptr[1], 472 dest->port ? dest->port : pptr[1],
473 flags, dest, skb->mark); 473 flags, dest, skb->mark);
@@ -496,19 +496,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
496 * no destination is available for a new connection. 496 * no destination is available for a new connection.
497 */ 497 */
498int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 498int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
499 struct ip_vs_proto_data *pd) 499 struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
500{ 500{
501 __be16 _ports[2], *pptr; 501 __be16 _ports[2], *pptr;
502 struct ip_vs_iphdr iph;
503#ifdef CONFIG_SYSCTL 502#ifdef CONFIG_SYSCTL
504 struct net *net; 503 struct net *net;
505 struct netns_ipvs *ipvs; 504 struct netns_ipvs *ipvs;
506 int unicast; 505 int unicast;
507#endif 506#endif
508 507
509 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 508 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
510
511 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
512 if (pptr == NULL) { 509 if (pptr == NULL) {
513 ip_vs_service_put(svc); 510 ip_vs_service_put(svc);
514 return NF_DROP; 511 return NF_DROP;
@@ -519,10 +516,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
519 516
520#ifdef CONFIG_IP_VS_IPV6 517#ifdef CONFIG_IP_VS_IPV6
521 if (svc->af == AF_INET6) 518 if (svc->af == AF_INET6)
522 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; 519 unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
523 else 520 else
524#endif 521#endif
525 unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); 522 unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
526 523
527 /* if it is fwmark-based service, the cache_bypass sysctl is up 524 /* if it is fwmark-based service, the cache_bypass sysctl is up
528 and the destination is a non-local unicast, then create 525 and the destination is a non-local unicast, then create
@@ -532,7 +529,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
532 int ret; 529 int ret;
533 struct ip_vs_conn *cp; 530 struct ip_vs_conn *cp;
534 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 531 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
535 iph.protocol == IPPROTO_UDP)? 532 iph->protocol == IPPROTO_UDP) ?
536 IP_VS_CONN_F_ONE_PACKET : 0; 533 IP_VS_CONN_F_ONE_PACKET : 0;
537 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 534 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
538 535
@@ -542,9 +539,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
542 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 539 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
543 { 540 {
544 struct ip_vs_conn_param p; 541 struct ip_vs_conn_param p;
545 ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 542 ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
546 &iph.saddr, pptr[0], 543 &iph->saddr, pptr[0],
547 &iph.daddr, pptr[1], &p); 544 &iph->daddr, pptr[1], &p);
548 cp = ip_vs_conn_new(&p, &daddr, 0, 545 cp = ip_vs_conn_new(&p, &daddr, 0,
549 IP_VS_CONN_F_BYPASS | flags, 546 IP_VS_CONN_F_BYPASS | flags,
550 NULL, skb->mark); 547 NULL, skb->mark);
@@ -559,7 +556,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
559 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 556 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
560 557
561 /* transmit the first SYN packet */ 558 /* transmit the first SYN packet */
562 ret = cp->packet_xmit(skb, cp, pd->pp); 559 ret = cp->packet_xmit(skb, cp, pd->pp, iph);
563 /* do not touch skb anymore */ 560 /* do not touch skb anymore */
564 561
565 atomic_inc(&cp->in_pkts); 562 atomic_inc(&cp->in_pkts);
@@ -654,14 +651,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
654 return err; 651 return err;
655} 652}
656 653
657#ifdef CONFIG_IP_VS_IPV6
658static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
659{
660 /* TODO IPv6: Find out what to do here for IPv6 */
661 return 0;
662}
663#endif
664
665static int ip_vs_route_me_harder(int af, struct sk_buff *skb) 654static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
666{ 655{
667#ifdef CONFIG_IP_VS_IPV6 656#ifdef CONFIG_IP_VS_IPV6
@@ -732,10 +721,19 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
732 struct ip_vs_conn *cp, int inout) 721 struct ip_vs_conn *cp, int inout)
733{ 722{
734 struct ipv6hdr *iph = ipv6_hdr(skb); 723 struct ipv6hdr *iph = ipv6_hdr(skb);
735 unsigned int icmp_offset = sizeof(struct ipv6hdr); 724 unsigned int icmp_offset = 0;
736 struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + 725 unsigned int offs = 0; /* header offset*/
737 icmp_offset); 726 int protocol;
738 struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); 727 struct icmp6hdr *icmph;
728 struct ipv6hdr *ciph;
729 unsigned short fragoffs;
730
731 ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL);
732 icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset);
733 offs = icmp_offset + sizeof(struct icmp6hdr);
734 ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs);
735
736 protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL);
739 737
740 if (inout) { 738 if (inout) {
741 iph->saddr = cp->vaddr.in6; 739 iph->saddr = cp->vaddr.in6;
@@ -746,10 +744,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
746 } 744 }
747 745
748 /* the TCP/UDP/SCTP port */ 746 /* the TCP/UDP/SCTP port */
749 if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr || 747 if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
750 IPPROTO_SCTP == ciph->nexthdr) { 748 IPPROTO_SCTP == protocol)) {
751 __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); 749 __be16 *ports = (void *)(skb_network_header(skb) + offs);
752 750
751 IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__,
752 ntohs(inout ? ports[1] : ports[0]),
753 ntohs(inout ? cp->vport : cp->dport));
753 if (inout) 754 if (inout)
754 ports[1] = cp->vport; 755 ports[1] = cp->vport;
755 else 756 else
@@ -898,51 +899,35 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
898 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, 899 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
899 "Checking outgoing ICMP for"); 900 "Checking outgoing ICMP for");
900 901
901 offset += cih->ihl * 4; 902 ip_vs_fill_ip4hdr(cih, &ciph);
902 903 ciph.len += offset;
903 ip_vs_fill_iphdr(AF_INET, cih, &ciph);
904 /* The embedded headers contain source and dest in reverse order */ 904 /* The embedded headers contain source and dest in reverse order */
905 cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); 905 cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
906 if (!cp) 906 if (!cp)
907 return NF_ACCEPT; 907 return NF_ACCEPT;
908 908
909 snet.ip = iph->saddr; 909 snet.ip = iph->saddr;
910 return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, 910 return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp,
911 pp, offset, ihl); 911 pp, ciph.len, ihl);
912} 912}
913 913
914#ifdef CONFIG_IP_VS_IPV6 914#ifdef CONFIG_IP_VS_IPV6
915static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, 915static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
916 unsigned int hooknum) 916 unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
917{ 917{
918 struct ipv6hdr *iph;
919 struct icmp6hdr _icmph, *ic; 918 struct icmp6hdr _icmph, *ic;
920 struct ipv6hdr _ciph, *cih; /* The ip header contained 919 struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
921 within the ICMP */ 920 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
922 struct ip_vs_iphdr ciph;
923 struct ip_vs_conn *cp; 921 struct ip_vs_conn *cp;
924 struct ip_vs_protocol *pp; 922 struct ip_vs_protocol *pp;
925 unsigned int offset;
926 union nf_inet_addr snet; 923 union nf_inet_addr snet;
924 unsigned int writable;
927 925
928 *related = 1; 926 *related = 1;
929 927 ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
930 /* reassemble IP fragments */
931 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
932 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
933 return NF_STOLEN;
934 }
935
936 iph = ipv6_hdr(skb);
937 offset = sizeof(struct ipv6hdr);
938 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
939 if (ic == NULL) 928 if (ic == NULL)
940 return NF_DROP; 929 return NF_DROP;
941 930
942 IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n",
943 ic->icmp6_type, ntohs(icmpv6_id(ic)),
944 &iph->saddr, &iph->daddr);
945
946 /* 931 /*
947 * Work through seeing if this is for us. 932 * Work through seeing if this is for us.
948 * These checks are supposed to be in an order that means easy 933 * These checks are supposed to be in an order that means easy
@@ -950,42 +935,45 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
950 * this means that some packets will manage to get a long way 935 * this means that some packets will manage to get a long way
951 * down this stack and then be rejected, but that's life. 936 * down this stack and then be rejected, but that's life.
952 */ 937 */
953 if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && 938 if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
954 (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
955 (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
956 *related = 0; 939 *related = 0;
957 return NF_ACCEPT; 940 return NF_ACCEPT;
958 } 941 }
942 /* Fragment header that is before ICMP header tells us that:
943 * it's not an error message since they can't be fragmented.
944 */
945 if (ipvsh->flags & IP6T_FH_F_FRAG)
946 return NF_DROP;
947
948 IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n",
949 ic->icmp6_type, ntohs(icmpv6_id(ic)),
950 &ipvsh->saddr, &ipvsh->daddr);
959 951
960 /* Now find the contained IP header */ 952 /* Now find the contained IP header */
961 offset += sizeof(_icmph); 953 ciph.len = ipvsh->len + sizeof(_icmph);
962 cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); 954 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
963 if (cih == NULL) 955 if (ip6h == NULL)
964 return NF_ACCEPT; /* The packet looks wrong, ignore */ 956 return NF_ACCEPT; /* The packet looks wrong, ignore */
965 957 ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
966 pp = ip_vs_proto_get(cih->nexthdr); 958 ciph.daddr.in6 = ip6h->daddr;
959 /* skip possible IPv6 exthdrs of contained IPv6 packet */
960 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
961 if (ciph.protocol < 0)
962 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
963
964 pp = ip_vs_proto_get(ciph.protocol);
967 if (!pp) 965 if (!pp)
968 return NF_ACCEPT; 966 return NF_ACCEPT;
969 967
970 /* Is the embedded protocol header present? */
971 /* TODO: we don't support fragmentation at the moment anyways */
972 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
973 return NF_ACCEPT;
974
975 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
976 "Checking outgoing ICMPv6 for");
977
978 offset += sizeof(struct ipv6hdr);
979
980 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
981 /* The embedded headers contain source and dest in reverse order */ 968 /* The embedded headers contain source and dest in reverse order */
982 cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); 969 cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
983 if (!cp) 970 if (!cp)
984 return NF_ACCEPT; 971 return NF_ACCEPT;
985 972
986 snet.in6 = iph->saddr; 973 snet.in6 = ciph.saddr.in6;
987 return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, 974 writable = ciph.len;
988 pp, offset, sizeof(struct ipv6hdr)); 975 return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
976 pp, writable, sizeof(struct ipv6hdr));
989} 977}
990#endif 978#endif
991 979
@@ -1018,17 +1006,17 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
1018 */ 1006 */
1019static unsigned int 1007static unsigned int
1020handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 1008handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
1021 struct ip_vs_conn *cp, int ihl) 1009 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
1022{ 1010{
1023 struct ip_vs_protocol *pp = pd->pp; 1011 struct ip_vs_protocol *pp = pd->pp;
1024 1012
1025 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 1013 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
1026 1014
1027 if (!skb_make_writable(skb, ihl)) 1015 if (!skb_make_writable(skb, iph->len))
1028 goto drop; 1016 goto drop;
1029 1017
1030 /* mangle the packet */ 1018 /* mangle the packet */
1031 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) 1019 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph))
1032 goto drop; 1020 goto drop;
1033 1021
1034#ifdef CONFIG_IP_VS_IPV6 1022#ifdef CONFIG_IP_VS_IPV6
@@ -1115,17 +1103,22 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1115 if (!net_ipvs(net)->enable) 1103 if (!net_ipvs(net)->enable)
1116 return NF_ACCEPT; 1104 return NF_ACCEPT;
1117 1105
1118 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1106 ip_vs_fill_iph_skb(af, skb, &iph);
1119#ifdef CONFIG_IP_VS_IPV6 1107#ifdef CONFIG_IP_VS_IPV6
1120 if (af == AF_INET6) { 1108 if (af == AF_INET6) {
1109 if (!iph.fragoffs && skb_nfct_reasm(skb)) {
1110 struct sk_buff *reasm = skb_nfct_reasm(skb);
1111 /* Save fw mark for coming frags */
1112 reasm->ipvs_property = 1;
1113 reasm->mark = skb->mark;
1114 }
1121 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1115 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1122 int related; 1116 int related;
1123 int verdict = ip_vs_out_icmp_v6(skb, &related, 1117 int verdict = ip_vs_out_icmp_v6(skb, &related,
1124 hooknum); 1118 hooknum, &iph);
1125 1119
1126 if (related) 1120 if (related)
1127 return verdict; 1121 return verdict;
1128 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1129 } 1122 }
1130 } else 1123 } else
1131#endif 1124#endif
@@ -1135,7 +1128,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1135 1128
1136 if (related) 1129 if (related)
1137 return verdict; 1130 return verdict;
1138 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1139 } 1131 }
1140 1132
1141 pd = ip_vs_proto_data_get(net, iph.protocol); 1133 pd = ip_vs_proto_data_get(net, iph.protocol);
@@ -1145,39 +1137,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1145 1137
1146 /* reassemble IP fragments */ 1138 /* reassemble IP fragments */
1147#ifdef CONFIG_IP_VS_IPV6 1139#ifdef CONFIG_IP_VS_IPV6
1148 if (af == AF_INET6) { 1140 if (af == AF_INET)
1149 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1150 if (ip_vs_gather_frags_v6(skb,
1151 ip_vs_defrag_user(hooknum)))
1152 return NF_STOLEN;
1153 }
1154
1155 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1156 } else
1157#endif 1141#endif
1158 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { 1142 if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
1159 if (ip_vs_gather_frags(skb, 1143 if (ip_vs_gather_frags(skb,
1160 ip_vs_defrag_user(hooknum))) 1144 ip_vs_defrag_user(hooknum)))
1161 return NF_STOLEN; 1145 return NF_STOLEN;
1162 1146
1163 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1147 ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
1164 } 1148 }
1165 1149
1166 /* 1150 /*
1167 * Check if the packet belongs to an existing entry 1151 * Check if the packet belongs to an existing entry
1168 */ 1152 */
1169 cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); 1153 cp = pp->conn_out_get(af, skb, &iph, 0);
1170 1154
1171 if (likely(cp)) 1155 if (likely(cp))
1172 return handle_response(af, skb, pd, cp, iph.len); 1156 return handle_response(af, skb, pd, cp, &iph);
1173 if (sysctl_nat_icmp_send(net) && 1157 if (sysctl_nat_icmp_send(net) &&
1174 (pp->protocol == IPPROTO_TCP || 1158 (pp->protocol == IPPROTO_TCP ||
1175 pp->protocol == IPPROTO_UDP || 1159 pp->protocol == IPPROTO_UDP ||
1176 pp->protocol == IPPROTO_SCTP)) { 1160 pp->protocol == IPPROTO_SCTP)) {
1177 __be16 _ports[2], *pptr; 1161 __be16 _ports[2], *pptr;
1178 1162
1179 pptr = skb_header_pointer(skb, iph.len, 1163 pptr = frag_safe_skb_hp(skb, iph.len,
1180 sizeof(_ports), _ports); 1164 sizeof(_ports), _ports, &iph);
1181 if (pptr == NULL) 1165 if (pptr == NULL)
1182 return NF_ACCEPT; /* Not for me */ 1166 return NF_ACCEPT; /* Not for me */
1183 if (ip_vs_lookup_real_service(net, af, iph.protocol, 1167 if (ip_vs_lookup_real_service(net, af, iph.protocol,
@@ -1375,13 +1359,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1375 "Checking incoming ICMP for"); 1359 "Checking incoming ICMP for");
1376 1360
1377 offset2 = offset; 1361 offset2 = offset;
1378 offset += cih->ihl * 4; 1362 ip_vs_fill_ip4hdr(cih, &ciph);
1379 1363 ciph.len += offset;
1380 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1364 offset = ciph.len;
1381 /* The embedded headers contain source and dest in reverse order. 1365 /* The embedded headers contain source and dest in reverse order.
1382 * For IPIP this is error for request, not for reply. 1366 * For IPIP this is error for request, not for reply.
1383 */ 1367 */
1384 cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); 1368 cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
1385 if (!cp) 1369 if (!cp)
1386 return NF_ACCEPT; 1370 return NF_ACCEPT;
1387 1371
@@ -1450,7 +1434,7 @@ ignore_ipip:
1450 ip_vs_in_stats(cp, skb); 1434 ip_vs_in_stats(cp, skb);
1451 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1435 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
1452 offset += 2 * sizeof(__u16); 1436 offset += 2 * sizeof(__u16);
1453 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); 1437 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
1454 1438
1455out: 1439out:
1456 __ip_vs_conn_put(cp); 1440 __ip_vs_conn_put(cp);
@@ -1459,38 +1443,24 @@ out:
1459} 1443}
1460 1444
1461#ifdef CONFIG_IP_VS_IPV6 1445#ifdef CONFIG_IP_VS_IPV6
1462static int 1446static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
1463ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) 1447 unsigned int hooknum, struct ip_vs_iphdr *iph)
1464{ 1448{
1465 struct net *net = NULL; 1449 struct net *net = NULL;
1466 struct ipv6hdr *iph; 1450 struct ipv6hdr _ip6h, *ip6h;
1467 struct icmp6hdr _icmph, *ic; 1451 struct icmp6hdr _icmph, *ic;
1468 struct ipv6hdr _ciph, *cih; /* The ip header contained 1452 struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
1469 within the ICMP */
1470 struct ip_vs_iphdr ciph;
1471 struct ip_vs_conn *cp; 1453 struct ip_vs_conn *cp;
1472 struct ip_vs_protocol *pp; 1454 struct ip_vs_protocol *pp;
1473 struct ip_vs_proto_data *pd; 1455 struct ip_vs_proto_data *pd;
1474 unsigned int offset, verdict; 1456 unsigned int offs_ciph, writable, verdict;
1475 1457
1476 *related = 1; 1458 *related = 1;
1477 1459
1478 /* reassemble IP fragments */ 1460 ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph);
1479 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1480 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
1481 return NF_STOLEN;
1482 }
1483
1484 iph = ipv6_hdr(skb);
1485 offset = sizeof(struct ipv6hdr);
1486 ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
1487 if (ic == NULL) 1461 if (ic == NULL)
1488 return NF_DROP; 1462 return NF_DROP;
1489 1463
1490 IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n",
1491 ic->icmp6_type, ntohs(icmpv6_id(ic)),
1492 &iph->saddr, &iph->daddr);
1493
1494 /* 1464 /*
1495 * Work through seeing if this is for us. 1465 * Work through seeing if this is for us.
1496 * These checks are supposed to be in an order that means easy 1466 * These checks are supposed to be in an order that means easy
@@ -1498,47 +1468,71 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1498 * this means that some packets will manage to get a long way 1468 * this means that some packets will manage to get a long way
1499 * down this stack and then be rejected, but that's life. 1469 * down this stack and then be rejected, but that's life.
1500 */ 1470 */
1501 if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && 1471 if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) {
1502 (ic->icmp6_type != ICMPV6_PKT_TOOBIG) &&
1503 (ic->icmp6_type != ICMPV6_TIME_EXCEED)) {
1504 *related = 0; 1472 *related = 0;
1505 return NF_ACCEPT; 1473 return NF_ACCEPT;
1506 } 1474 }
1475 /* Fragment header that is before ICMP header tells us that:
1476 * it's not an error message since they can't be fragmented.
1477 */
1478 if (iph->flags & IP6T_FH_F_FRAG)
1479 return NF_DROP;
1480
1481 IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n",
1482 ic->icmp6_type, ntohs(icmpv6_id(ic)),
1483 &iph->saddr, &iph->daddr);
1507 1484
1508 /* Now find the contained IP header */ 1485 /* Now find the contained IP header */
1509 offset += sizeof(_icmph); 1486 ciph.len = iph->len + sizeof(_icmph);
1510 cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); 1487 offs_ciph = ciph.len; /* Save ip header offset */
1511 if (cih == NULL) 1488 ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
1489 if (ip6h == NULL)
1512 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1490 return NF_ACCEPT; /* The packet looks wrong, ignore */
1491 ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
1492 ciph.daddr.in6 = ip6h->daddr;
1493 /* skip possible IPv6 exthdrs of contained IPv6 packet */
1494 ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
1495 if (ciph.protocol < 0)
1496 return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
1513 1497
1514 net = skb_net(skb); 1498 net = skb_net(skb);
1515 pd = ip_vs_proto_data_get(net, cih->nexthdr); 1499 pd = ip_vs_proto_data_get(net, ciph.protocol);
1516 if (!pd) 1500 if (!pd)
1517 return NF_ACCEPT; 1501 return NF_ACCEPT;
1518 pp = pd->pp; 1502 pp = pd->pp;
1519 1503
1520 /* Is the embedded protocol header present? */ 1504 /* Cannot handle fragmented embedded protocol */
1521 /* TODO: we don't support fragmentation at the moment anyways */ 1505 if (ciph.fragoffs)
1522 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
1523 return NF_ACCEPT; 1506 return NF_ACCEPT;
1524 1507
1525 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, 1508 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
1526 "Checking incoming ICMPv6 for"); 1509 "Checking incoming ICMPv6 for");
1527 1510
1528 offset += sizeof(struct ipv6hdr); 1511 /* The embedded headers contain source and dest in reverse order
1512 * if not from localhost
1513 */
1514 cp = pp->conn_in_get(AF_INET6, skb, &ciph,
1515 (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
1529 1516
1530 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
1531 /* The embedded headers contain source and dest in reverse order */
1532 cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
1533 if (!cp) 1517 if (!cp)
1534 return NF_ACCEPT; 1518 return NF_ACCEPT;
1519 /* VS/TUN, VS/DR and LOCALNODE just let it go */
1520 if ((hooknum == NF_INET_LOCAL_OUT) &&
1521 (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
1522 __ip_vs_conn_put(cp);
1523 return NF_ACCEPT;
1524 }
1535 1525
1536 /* do the statistics and put it back */ 1526 /* do the statistics and put it back */
1537 ip_vs_in_stats(cp, skb); 1527 ip_vs_in_stats(cp, skb);
1538 if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || 1528
1539 IPPROTO_SCTP == cih->nexthdr) 1529 /* Need to mangle contained IPv6 header in ICMPv6 packet */
1540 offset += 2 * sizeof(__u16); 1530 writable = ciph.len;
1541 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum); 1531 if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
1532 IPPROTO_SCTP == ciph.protocol)
1533 writable += 2 * sizeof(__u16); /* Also mangle ports */
1534
1535 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
1542 1536
1543 __ip_vs_conn_put(cp); 1537 __ip_vs_conn_put(cp);
1544 1538
@@ -1574,7 +1568,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1574 if (unlikely((skb->pkt_type != PACKET_HOST && 1568 if (unlikely((skb->pkt_type != PACKET_HOST &&
1575 hooknum != NF_INET_LOCAL_OUT) || 1569 hooknum != NF_INET_LOCAL_OUT) ||
1576 !skb_dst(skb))) { 1570 !skb_dst(skb))) {
1577 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1571 ip_vs_fill_iph_skb(af, skb, &iph);
1578 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" 1572 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
1579 " ignored in hook %u\n", 1573 " ignored in hook %u\n",
1580 skb->pkt_type, iph.protocol, 1574 skb->pkt_type, iph.protocol,
@@ -1586,7 +1580,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1586 if (!net_ipvs(net)->enable) 1580 if (!net_ipvs(net)->enable)
1587 return NF_ACCEPT; 1581 return NF_ACCEPT;
1588 1582
1589 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1583 ip_vs_fill_iph_skb(af, skb, &iph);
1590 1584
1591 /* Bad... Do not break raw sockets */ 1585 /* Bad... Do not break raw sockets */
1592 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && 1586 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
@@ -1600,13 +1594,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1600 1594
1601#ifdef CONFIG_IP_VS_IPV6 1595#ifdef CONFIG_IP_VS_IPV6
1602 if (af == AF_INET6) { 1596 if (af == AF_INET6) {
1597 if (!iph.fragoffs && skb_nfct_reasm(skb)) {
1598 struct sk_buff *reasm = skb_nfct_reasm(skb);
1599 /* Save fw mark for coming frags. */
1600 reasm->ipvs_property = 1;
1601 reasm->mark = skb->mark;
1602 }
1603 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1603 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1604 int related; 1604 int related;
1605 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); 1605 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
1606 &iph);
1606 1607
1607 if (related) 1608 if (related)
1608 return verdict; 1609 return verdict;
1609 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1610 } 1610 }
1611 } else 1611 } else
1612#endif 1612#endif
@@ -1616,7 +1616,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1616 1616
1617 if (related) 1617 if (related)
1618 return verdict; 1618 return verdict;
1619 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1620 } 1619 }
1621 1620
1622 /* Protocol supported? */ 1621 /* Protocol supported? */
@@ -1627,12 +1626,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1627 /* 1626 /*
1628 * Check if the packet belongs to an existing connection entry 1627 * Check if the packet belongs to an existing connection entry
1629 */ 1628 */
1630 cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); 1629 cp = pp->conn_in_get(af, skb, &iph, 0);
1631 1630 if (unlikely(!cp) && !iph.fragoffs) {
1632 if (unlikely(!cp)) { 1631 /* No (second) fragments need to enter here, as nf_defrag_ipv6
1632 * replayed fragment zero will already have created the cp
1633 */
1633 int v; 1634 int v;
1634 1635
1635 if (!pp->conn_schedule(af, skb, pd, &v, &cp)) 1636 /* Schedule and create new connection entry into &cp */
1637 if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
1636 return v; 1638 return v;
1637 } 1639 }
1638 1640
@@ -1640,6 +1642,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1640 /* sorry, all this trouble for a no-hit :) */ 1642 /* sorry, all this trouble for a no-hit :) */
1641 IP_VS_DBG_PKT(12, af, pp, skb, 0, 1643 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1642 "ip_vs_in: packet continues traversal as normal"); 1644 "ip_vs_in: packet continues traversal as normal");
1645 if (iph.fragoffs && !skb_nfct_reasm(skb)) {
1646 /* Fragment that couldn't be mapped to a conn entry
1647 * and don't have any pointer to a reasm skb
1648 * is missing module nf_defrag_ipv6
1649 */
1650 IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
1651 IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
1652 }
1643 return NF_ACCEPT; 1653 return NF_ACCEPT;
1644 } 1654 }
1645 1655
@@ -1662,7 +1672,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1662 ip_vs_in_stats(cp, skb); 1672 ip_vs_in_stats(cp, skb);
1663 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 1673 ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
1664 if (cp->packet_xmit) 1674 if (cp->packet_xmit)
1665 ret = cp->packet_xmit(skb, cp, pp); 1675 ret = cp->packet_xmit(skb, cp, pp, &iph);
1666 /* do not touch skb anymore */ 1676 /* do not touch skb anymore */
1667 else { 1677 else {
1668 IP_VS_DBG_RL("warning: packet_xmit is null"); 1678 IP_VS_DBG_RL("warning: packet_xmit is null");
@@ -1724,6 +1734,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1724#ifdef CONFIG_IP_VS_IPV6 1734#ifdef CONFIG_IP_VS_IPV6
1725 1735
1726/* 1736/*
1737 * AF_INET6 fragment handling
1738 * Copy info from first fragment, to the rest of them.
1739 */
1740static unsigned int
1741ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
1742 const struct net_device *in,
1743 const struct net_device *out,
1744 int (*okfn)(struct sk_buff *))
1745{
1746 struct sk_buff *reasm = skb_nfct_reasm(skb);
1747 struct net *net;
1748
1749 /* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
1750 * ipvs_property is set when checking first fragment
1751 * in ip_vs_in() and ip_vs_out().
1752 */
1753 if (reasm)
1754 IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
1755 if (!reasm || !reasm->ipvs_property)
1756 return NF_ACCEPT;
1757
1758 net = skb_net(skb);
1759 if (!net_ipvs(net)->enable)
1760 return NF_ACCEPT;
1761
1762 /* Copy stored fw mark, saved in ip_vs_{in,out} */
1763 skb->mark = reasm->mark;
1764
1765 return NF_ACCEPT;
1766}
1767
1768/*
1727 * AF_INET6 handler in NF_INET_LOCAL_IN chain 1769 * AF_INET6 handler in NF_INET_LOCAL_IN chain
1728 * Schedule and forward packets from remote clients 1770 * Schedule and forward packets from remote clients
1729 */ 1771 */
@@ -1793,8 +1835,10 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1793{ 1835{
1794 int r; 1836 int r;
1795 struct net *net; 1837 struct net *net;
1838 struct ip_vs_iphdr iphdr;
1796 1839
1797 if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) 1840 ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
1841 if (iphdr.protocol != IPPROTO_ICMPV6)
1798 return NF_ACCEPT; 1842 return NF_ACCEPT;
1799 1843
1800 /* ipvs enabled in this netns ? */ 1844 /* ipvs enabled in this netns ? */
@@ -1802,7 +1846,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1802 if (!net_ipvs(net)->enable) 1846 if (!net_ipvs(net)->enable)
1803 return NF_ACCEPT; 1847 return NF_ACCEPT;
1804 1848
1805 return ip_vs_in_icmp_v6(skb, &r, hooknum); 1849 return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
1806} 1850}
1807#endif 1851#endif
1808 1852
@@ -1860,6 +1904,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1860 .priority = 100, 1904 .priority = 100,
1861 }, 1905 },
1862#ifdef CONFIG_IP_VS_IPV6 1906#ifdef CONFIG_IP_VS_IPV6
1907 /* After mangle & nat fetch 2:nd fragment and following */
1908 {
1909 .hook = ip_vs_preroute_frag6,
1910 .owner = THIS_MODULE,
1911 .pf = NFPROTO_IPV6,
1912 .hooknum = NF_INET_PRE_ROUTING,
1913 .priority = NF_IP6_PRI_NAT_DST + 1,
1914 },
1863 /* After packet filtering, change source only for VS/NAT */ 1915 /* After packet filtering, change source only for VS/NAT */
1864 { 1916 {
1865 .hook = ip_vs_reply6, 1917 .hook = ip_vs_reply6,
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 8b7dca9ea422..7f3b0cc00b7a 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -215,7 +215,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
215 struct ip_vs_dh_bucket *tbl; 215 struct ip_vs_dh_bucket *tbl;
216 struct ip_vs_iphdr iph; 216 struct ip_vs_iphdr iph;
217 217
218 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 218 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
219 219
220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
221 221
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index df646ccf08a7..cbd37489ac77 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -479,7 +479,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
479 struct ip_vs_dest *dest = NULL; 479 struct ip_vs_dest *dest = NULL;
480 struct ip_vs_lblc_entry *en; 480 struct ip_vs_lblc_entry *en;
481 481
482 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 482 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
483 483
484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
485 485
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 570e31ea427a..161b67972e3f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -649,7 +649,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
649 struct ip_vs_dest *dest = NULL; 649 struct ip_vs_dest *dest = NULL;
650 struct ip_vs_lblcr_entry *en; 650 struct ip_vs_lblcr_entry *en;
651 651
652 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 652 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
653 653
654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
655 655
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1aa5cac748c4..12475ef88daf 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -68,23 +68,31 @@ static int get_callid(const char *dptr, unsigned int dataoff,
68static int 68static int
69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) 69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70{ 70{
71 struct sk_buff *reasm = skb_nfct_reasm(skb);
71 struct ip_vs_iphdr iph; 72 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen; 73 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr; 74 const char *dptr;
74 int retc; 75 int retc;
75 76
76 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); 77 ip_vs_fill_iph_skb(p->af, skb, &iph);
77 78
78 /* Only useful with UDP */ 79 /* Only useful with UDP */
79 if (iph.protocol != IPPROTO_UDP) 80 if (iph.protocol != IPPROTO_UDP)
80 return -EINVAL; 81 return -EINVAL;
82 /* todo: IPv6 fragments:
83 * I think this only should be done for the first fragment. /HS
84 */
85 if (reasm) {
86 skb = reasm;
87 dataoff = iph.thoff_reasm + sizeof(struct udphdr);
88 } else
89 dataoff = iph.len + sizeof(struct udphdr);
81 90
82 /* No Data ? */
83 dataoff = iph.len + sizeof(struct udphdr);
84 if (dataoff >= skb->len) 91 if (dataoff >= skb->len)
85 return -EINVAL; 92 return -EINVAL;
86 93 /* todo: Check if this will mess-up the reasm skb !!! /HS */
87 if ((retc=skb_linearize(skb)) < 0) 94 retc = skb_linearize(skb);
95 if (retc < 0)
88 return retc; 96 return retc;
89 dptr = skb->data + dataoff; 97 dptr = skb->data + dataoff;
90 datalen = skb->len - dataoff; 98 datalen = skb->len - dataoff;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 50d82186da87..939f7fbe9b46 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -280,17 +280,17 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
280 if (ih == NULL) 280 if (ih == NULL)
281 sprintf(buf, "TRUNCATED"); 281 sprintf(buf, "TRUNCATED");
282 else if (ih->nexthdr == IPPROTO_FRAGMENT) 282 else if (ih->nexthdr == IPPROTO_FRAGMENT)
283 sprintf(buf, "%pI6->%pI6 frag", &ih->saddr, &ih->daddr); 283 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr);
284 else { 284 else {
285 __be16 _ports[2], *pptr; 285 __be16 _ports[2], *pptr;
286 286
287 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 287 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
288 sizeof(_ports), _ports); 288 sizeof(_ports), _ports);
289 if (pptr == NULL) 289 if (pptr == NULL)
290 sprintf(buf, "TRUNCATED %pI6->%pI6", 290 sprintf(buf, "TRUNCATED %pI6c->%pI6c",
291 &ih->saddr, &ih->daddr); 291 &ih->saddr, &ih->daddr);
292 else 292 else
293 sprintf(buf, "%pI6:%u->%pI6:%u", 293 sprintf(buf, "%pI6c:%u->%pI6c:%u",
294 &ih->saddr, ntohs(pptr[0]), 294 &ih->saddr, ntohs(pptr[0]),
295 &ih->daddr, ntohs(pptr[1])); 295 &ih->daddr, ntohs(pptr[1]));
296 } 296 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5b8eb8b12c3e..5de3dd312c0f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -57,7 +57,7 @@ ah_esp_conn_fill_param_proto(struct net *net, int af,
57 57
58static struct ip_vs_conn * 58static struct ip_vs_conn *
59ah_esp_conn_in_get(int af, const struct sk_buff *skb, 59ah_esp_conn_in_get(int af, const struct sk_buff *skb,
60 const struct ip_vs_iphdr *iph, unsigned int proto_off, 60 const struct ip_vs_iphdr *iph,
61 int inverse) 61 int inverse)
62{ 62{
63 struct ip_vs_conn *cp; 63 struct ip_vs_conn *cp;
@@ -85,9 +85,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
85 85
86static struct ip_vs_conn * 86static struct ip_vs_conn *
87ah_esp_conn_out_get(int af, const struct sk_buff *skb, 87ah_esp_conn_out_get(int af, const struct sk_buff *skb,
88 const struct ip_vs_iphdr *iph, 88 const struct ip_vs_iphdr *iph, int inverse)
89 unsigned int proto_off,
90 int inverse)
91{ 89{
92 struct ip_vs_conn *cp; 90 struct ip_vs_conn *cp;
93 struct ip_vs_conn_param p; 91 struct ip_vs_conn_param p;
@@ -110,7 +108,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
110 108
111static int 109static int
112ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 110ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
113 int *verdict, struct ip_vs_conn **cpp) 111 int *verdict, struct ip_vs_conn **cpp,
112 struct ip_vs_iphdr *iph)
114{ 113{
115 /* 114 /*
116 * AH/ESP is only related traffic. Pass the packet to IP stack. 115 * AH/ESP is only related traffic. Pass the packet to IP stack.
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 9f3fb751c491..746048b13ef3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -10,28 +10,26 @@
10 10
11static int 11static int
12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
13 int *verdict, struct ip_vs_conn **cpp) 13 int *verdict, struct ip_vs_conn **cpp,
14 struct ip_vs_iphdr *iph)
14{ 15{
15 struct net *net; 16 struct net *net;
16 struct ip_vs_service *svc; 17 struct ip_vs_service *svc;
17 sctp_chunkhdr_t _schunkh, *sch; 18 sctp_chunkhdr_t _schunkh, *sch;
18 sctp_sctphdr_t *sh, _sctph; 19 sctp_sctphdr_t *sh, _sctph;
19 struct ip_vs_iphdr iph;
20 20
21 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 21 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
22
23 sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
24 if (sh == NULL) 22 if (sh == NULL)
25 return 0; 23 return 0;
26 24
27 sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t), 25 sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
28 sizeof(_schunkh), &_schunkh); 26 sizeof(_schunkh), &_schunkh);
29 if (sch == NULL) 27 if (sch == NULL)
30 return 0; 28 return 0;
31 net = skb_net(skb); 29 net = skb_net(skb);
32 if ((sch->type == SCTP_CID_INIT) && 30 if ((sch->type == SCTP_CID_INIT) &&
33 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 31 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
34 &iph.daddr, sh->dest))) { 32 &iph->daddr, sh->dest))) {
35 int ignored; 33 int ignored;
36 34
37 if (ip_vs_todrop(net_ipvs(net))) { 35 if (ip_vs_todrop(net_ipvs(net))) {
@@ -47,10 +45,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
47 * Let the virtual server select a real server for the 45 * Let the virtual server select a real server for the
48 * incoming connection, and create a connection entry. 46 * incoming connection, and create a connection entry.
49 */ 47 */
50 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 48 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
51 if (!*cpp && ignored <= 0) { 49 if (!*cpp && ignored <= 0) {
52 if (!ignored) 50 if (!ignored)
53 *verdict = ip_vs_leave(svc, skb, pd); 51 *verdict = ip_vs_leave(svc, skb, pd, iph);
54 else { 52 else {
55 ip_vs_service_put(svc); 53 ip_vs_service_put(svc);
56 *verdict = NF_DROP; 54 *verdict = NF_DROP;
@@ -64,20 +62,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
64} 62}
65 63
66static int 64static int
67sctp_snat_handler(struct sk_buff *skb, 65sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
68 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 66 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
69{ 67{
70 sctp_sctphdr_t *sctph; 68 sctp_sctphdr_t *sctph;
71 unsigned int sctphoff; 69 unsigned int sctphoff = iph->len;
72 struct sk_buff *iter; 70 struct sk_buff *iter;
73 __be32 crc32; 71 __be32 crc32;
74 72
75#ifdef CONFIG_IP_VS_IPV6 73#ifdef CONFIG_IP_VS_IPV6
76 if (cp->af == AF_INET6) 74 if (cp->af == AF_INET6 && iph->fragoffs)
77 sctphoff = sizeof(struct ipv6hdr); 75 return 1;
78 else
79#endif 76#endif
80 sctphoff = ip_hdrlen(skb);
81 77
82 /* csum_check requires unshared skb */ 78 /* csum_check requires unshared skb */
83 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 79 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
@@ -108,20 +104,18 @@ sctp_snat_handler(struct sk_buff *skb,
108} 104}
109 105
110static int 106static int
111sctp_dnat_handler(struct sk_buff *skb, 107sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
112 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 108 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
113{ 109{
114 sctp_sctphdr_t *sctph; 110 sctp_sctphdr_t *sctph;
115 unsigned int sctphoff; 111 unsigned int sctphoff = iph->len;
116 struct sk_buff *iter; 112 struct sk_buff *iter;
117 __be32 crc32; 113 __be32 crc32;
118 114
119#ifdef CONFIG_IP_VS_IPV6 115#ifdef CONFIG_IP_VS_IPV6
120 if (cp->af == AF_INET6) 116 if (cp->af == AF_INET6 && iph->fragoffs)
121 sctphoff = sizeof(struct ipv6hdr); 117 return 1;
122 else
123#endif 118#endif
124 sctphoff = ip_hdrlen(skb);
125 119
126 /* csum_check requires unshared skb */ 120 /* csum_check requires unshared skb */
127 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) 121 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index cd609cc62721..9af653a75825 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -33,16 +33,14 @@
33 33
34static int 34static int
35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 35tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
36 int *verdict, struct ip_vs_conn **cpp) 36 int *verdict, struct ip_vs_conn **cpp,
37 struct ip_vs_iphdr *iph)
37{ 38{
38 struct net *net; 39 struct net *net;
39 struct ip_vs_service *svc; 40 struct ip_vs_service *svc;
40 struct tcphdr _tcph, *th; 41 struct tcphdr _tcph, *th;
41 struct ip_vs_iphdr iph;
42 42
43 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 43 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
44
45 th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
46 if (th == NULL) { 44 if (th == NULL) {
47 *verdict = NF_DROP; 45 *verdict = NF_DROP;
48 return 0; 46 return 0;
@@ -50,8 +48,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
50 net = skb_net(skb); 48 net = skb_net(skb);
51 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 49 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
52 if (th->syn && 50 if (th->syn &&
53 (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 51 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
54 &iph.daddr, th->dest))) { 52 &iph->daddr, th->dest))) {
55 int ignored; 53 int ignored;
56 54
57 if (ip_vs_todrop(net_ipvs(net))) { 55 if (ip_vs_todrop(net_ipvs(net))) {
@@ -68,10 +66,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
68 * Let the virtual server select a real server for the 66 * Let the virtual server select a real server for the
69 * incoming connection, and create a connection entry. 67 * incoming connection, and create a connection entry.
70 */ 68 */
71 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 69 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
72 if (!*cpp && ignored <= 0) { 70 if (!*cpp && ignored <= 0) {
73 if (!ignored) 71 if (!ignored)
74 *verdict = ip_vs_leave(svc, skb, pd); 72 *verdict = ip_vs_leave(svc, skb, pd, iph);
75 else { 73 else {
76 ip_vs_service_put(svc); 74 ip_vs_service_put(svc);
77 *verdict = NF_DROP; 75 *verdict = NF_DROP;
@@ -128,20 +126,18 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
128 126
129 127
130static int 128static int
131tcp_snat_handler(struct sk_buff *skb, 129tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
132 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 130 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
133{ 131{
134 struct tcphdr *tcph; 132 struct tcphdr *tcph;
135 unsigned int tcphoff; 133 unsigned int tcphoff = iph->len;
136 int oldlen; 134 int oldlen;
137 int payload_csum = 0; 135 int payload_csum = 0;
138 136
139#ifdef CONFIG_IP_VS_IPV6 137#ifdef CONFIG_IP_VS_IPV6
140 if (cp->af == AF_INET6) 138 if (cp->af == AF_INET6 && iph->fragoffs)
141 tcphoff = sizeof(struct ipv6hdr); 139 return 1;
142 else
143#endif 140#endif
144 tcphoff = ip_hdrlen(skb);
145 oldlen = skb->len - tcphoff; 141 oldlen = skb->len - tcphoff;
146 142
147 /* csum_check requires unshared skb */ 143 /* csum_check requires unshared skb */
@@ -208,20 +204,18 @@ tcp_snat_handler(struct sk_buff *skb,
208 204
209 205
210static int 206static int
211tcp_dnat_handler(struct sk_buff *skb, 207tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
212 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 208 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
213{ 209{
214 struct tcphdr *tcph; 210 struct tcphdr *tcph;
215 unsigned int tcphoff; 211 unsigned int tcphoff = iph->len;
216 int oldlen; 212 int oldlen;
217 int payload_csum = 0; 213 int payload_csum = 0;
218 214
219#ifdef CONFIG_IP_VS_IPV6 215#ifdef CONFIG_IP_VS_IPV6
220 if (cp->af == AF_INET6) 216 if (cp->af == AF_INET6 && iph->fragoffs)
221 tcphoff = sizeof(struct ipv6hdr); 217 return 1;
222 else
223#endif 218#endif
224 tcphoff = ip_hdrlen(skb);
225 oldlen = skb->len - tcphoff; 219 oldlen = skb->len - tcphoff;
226 220
227 /* csum_check requires unshared skb */ 221 /* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 2fedb2dcb3d1..503a842c90d2 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -30,23 +30,22 @@
30 30
31static int 31static int
32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 32udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
33 int *verdict, struct ip_vs_conn **cpp) 33 int *verdict, struct ip_vs_conn **cpp,
34 struct ip_vs_iphdr *iph)
34{ 35{
35 struct net *net; 36 struct net *net;
36 struct ip_vs_service *svc; 37 struct ip_vs_service *svc;
37 struct udphdr _udph, *uh; 38 struct udphdr _udph, *uh;
38 struct ip_vs_iphdr iph;
39 39
40 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 40 /* IPv6 fragments, only first fragment will hit this */
41 41 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
42 uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
43 if (uh == NULL) { 42 if (uh == NULL) {
44 *verdict = NF_DROP; 43 *verdict = NF_DROP;
45 return 0; 44 return 0;
46 } 45 }
47 net = skb_net(skb); 46 net = skb_net(skb);
48 svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 47 svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
49 &iph.daddr, uh->dest); 48 &iph->daddr, uh->dest);
50 if (svc) { 49 if (svc) {
51 int ignored; 50 int ignored;
52 51
@@ -64,10 +63,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
64 * Let the virtual server select a real server for the 63 * Let the virtual server select a real server for the
65 * incoming connection, and create a connection entry. 64 * incoming connection, and create a connection entry.
66 */ 65 */
67 *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 66 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
68 if (!*cpp && ignored <= 0) { 67 if (!*cpp && ignored <= 0) {
69 if (!ignored) 68 if (!ignored)
70 *verdict = ip_vs_leave(svc, skb, pd); 69 *verdict = ip_vs_leave(svc, skb, pd, iph);
71 else { 70 else {
72 ip_vs_service_put(svc); 71 ip_vs_service_put(svc);
73 *verdict = NF_DROP; 72 *verdict = NF_DROP;
@@ -125,20 +124,18 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
125 124
126 125
127static int 126static int
128udp_snat_handler(struct sk_buff *skb, 127udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
129 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 128 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
130{ 129{
131 struct udphdr *udph; 130 struct udphdr *udph;
132 unsigned int udphoff; 131 unsigned int udphoff = iph->len;
133 int oldlen; 132 int oldlen;
134 int payload_csum = 0; 133 int payload_csum = 0;
135 134
136#ifdef CONFIG_IP_VS_IPV6 135#ifdef CONFIG_IP_VS_IPV6
137 if (cp->af == AF_INET6) 136 if (cp->af == AF_INET6 && iph->fragoffs)
138 udphoff = sizeof(struct ipv6hdr); 137 return 1;
139 else
140#endif 138#endif
141 udphoff = ip_hdrlen(skb);
142 oldlen = skb->len - udphoff; 139 oldlen = skb->len - udphoff;
143 140
144 /* csum_check requires unshared skb */ 141 /* csum_check requires unshared skb */
@@ -210,20 +207,18 @@ udp_snat_handler(struct sk_buff *skb,
210 207
211 208
212static int 209static int
213udp_dnat_handler(struct sk_buff *skb, 210udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
214 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 211 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
215{ 212{
216 struct udphdr *udph; 213 struct udphdr *udph;
217 unsigned int udphoff; 214 unsigned int udphoff = iph->len;
218 int oldlen; 215 int oldlen;
219 int payload_csum = 0; 216 int payload_csum = 0;
220 217
221#ifdef CONFIG_IP_VS_IPV6 218#ifdef CONFIG_IP_VS_IPV6
222 if (cp->af == AF_INET6) 219 if (cp->af == AF_INET6 && iph->fragoffs)
223 udphoff = sizeof(struct ipv6hdr); 220 return 1;
224 else
225#endif 221#endif
226 udphoff = ip_hdrlen(skb);
227 oldlen = skb->len - udphoff; 222 oldlen = skb->len - udphoff;
228 223
229 /* csum_check requires unshared skb */ 224 /* csum_check requires unshared skb */
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 08dbdd5bc18f..d6bf20d6cdbe 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -159,7 +159,7 @@ void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
159 svc->fwmark, msg); 159 svc->fwmark, msg);
160#ifdef CONFIG_IP_VS_IPV6 160#ifdef CONFIG_IP_VS_IPV6
161 } else if (svc->af == AF_INET6) { 161 } else if (svc->af == AF_INET6) {
162 IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n", 162 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
163 svc->scheduler->name, 163 svc->scheduler->name,
164 ip_vs_proto_name(svc->protocol), 164 ip_vs_proto_name(svc->protocol),
165 &svc->addr.in6, ntohs(svc->port), msg); 165 &svc->addr.in6, ntohs(svc->port), msg);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 05126521743e..e33126994628 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -228,7 +228,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
228 struct ip_vs_sh_bucket *tbl; 228 struct ip_vs_sh_bucket *tbl;
229 struct ip_vs_iphdr iph; 229 struct ip_vs_iphdr iph;
230 230
231 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 231 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
232 232
233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
234 234
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index cc4c8095681a..12008b47e5ca 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -338,7 +338,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
338 local = __ip_vs_is_local_route6(rt); 338 local = __ip_vs_is_local_route6(rt);
339 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 339 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
340 rt_mode)) { 340 rt_mode)) {
341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", 341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
342 local ? "local":"non-local", daddr); 342 local ? "local":"non-local", daddr);
343 dst_release(&rt->dst); 343 dst_release(&rt->dst);
344 return NULL; 344 return NULL;
@@ -346,8 +346,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
347 !((ort = (struct rt6_info *) skb_dst(skb)) && 347 !((ort = (struct rt6_info *) skb_dst(skb)) &&
348 __ip_vs_is_local_route6(ort))) { 348 __ip_vs_is_local_route6(ort))) {
349 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " 349 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
350 "requires NAT method, dest: %pI6\n", 350 "requires NAT method, dest: %pI6c\n",
351 &ipv6_hdr(skb)->daddr, daddr); 351 &ipv6_hdr(skb)->daddr, daddr);
352 dst_release(&rt->dst); 352 dst_release(&rt->dst);
353 return NULL; 353 return NULL;
@@ -355,8 +355,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) & 356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
357 IPV6_ADDR_LOOPBACK)) { 357 IPV6_ADDR_LOOPBACK)) {
358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " 358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
359 "to non-local address, dest: %pI6\n", 359 "to non-local address, dest: %pI6c\n",
360 &ipv6_hdr(skb)->saddr, daddr); 360 &ipv6_hdr(skb)->saddr, daddr);
361 dst_release(&rt->dst); 361 dst_release(&rt->dst);
362 return NULL; 362 return NULL;
@@ -427,7 +427,7 @@ do { \
427 */ 427 */
428int 428int
429ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 429ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
430 struct ip_vs_protocol *pp) 430 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
431{ 431{
432 /* we do not touch skb and do not need pskb ptr */ 432 /* we do not touch skb and do not need pskb ptr */
433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
@@ -441,7 +441,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
441 */ 441 */
442int 442int
443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
444 struct ip_vs_protocol *pp) 444 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
445{ 445{
446 struct rtable *rt; /* Route to the other host */ 446 struct rtable *rt; /* Route to the other host */
447 struct iphdr *iph = ip_hdr(skb); 447 struct iphdr *iph = ip_hdr(skb);
@@ -496,16 +496,16 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
496#ifdef CONFIG_IP_VS_IPV6 496#ifdef CONFIG_IP_VS_IPV6
497int 497int
498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
499 struct ip_vs_protocol *pp) 499 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
500{ 500{
501 struct rt6_info *rt; /* Route to the other host */ 501 struct rt6_info *rt; /* Route to the other host */
502 struct ipv6hdr *iph = ipv6_hdr(skb);
503 int mtu; 502 int mtu;
504 503
505 EnterFunction(10); 504 EnterFunction(10);
506 505
507 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 506 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
508 IP_VS_RT_MODE_NON_LOCAL))) 507 IP_VS_RT_MODE_NON_LOCAL);
508 if (!rt)
509 goto tx_error_icmp; 509 goto tx_error_icmp;
510 510
511 /* MTU checking */ 511 /* MTU checking */
@@ -516,7 +516,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
516 516
517 skb->dev = net->loopback_dev; 517 skb->dev = net->loopback_dev;
518 } 518 }
519 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 519 /* only send ICMP too big on first fragment */
520 if (!iph->fragoffs)
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520 dst_release(&rt->dst); 522 dst_release(&rt->dst);
521 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 523 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
522 goto tx_error; 524 goto tx_error;
@@ -559,7 +561,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
559 */ 561 */
560int 562int
561ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 563ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
562 struct ip_vs_protocol *pp) 564 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
563{ 565{
564 struct rtable *rt; /* Route to the other host */ 566 struct rtable *rt; /* Route to the other host */
565 int mtu; 567 int mtu;
@@ -629,7 +631,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
629 goto tx_error_put; 631 goto tx_error_put;
630 632
631 /* mangle the packet */ 633 /* mangle the packet */
632 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 634 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
633 goto tx_error_put; 635 goto tx_error_put;
634 ip_hdr(skb)->daddr = cp->daddr.ip; 636 ip_hdr(skb)->daddr = cp->daddr.ip;
635 ip_send_check(ip_hdr(skb)); 637 ip_send_check(ip_hdr(skb));
@@ -677,7 +679,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
677#ifdef CONFIG_IP_VS_IPV6 679#ifdef CONFIG_IP_VS_IPV6
678int 680int
679ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 681ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
680 struct ip_vs_protocol *pp) 682 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
681{ 683{
682 struct rt6_info *rt; /* Route to the other host */ 684 struct rt6_info *rt; /* Route to the other host */
683 int mtu; 685 int mtu;
@@ -686,10 +688,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
686 EnterFunction(10); 688 EnterFunction(10);
687 689
688 /* check if it is a connection of no-client-port */ 690 /* check if it is a connection of no-client-port */
689 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 691 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
690 __be16 _pt, *p; 692 __be16 _pt, *p;
691 p = skb_header_pointer(skb, sizeof(struct ipv6hdr), 693 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
692 sizeof(_pt), &_pt);
693 if (p == NULL) 694 if (p == NULL)
694 goto tx_error; 695 goto tx_error;
695 ip_vs_conn_fill_cport(cp, *p); 696 ip_vs_conn_fill_cport(cp, *p);
@@ -737,7 +738,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
737 738
738 skb->dev = net->loopback_dev; 739 skb->dev = net->loopback_dev;
739 } 740 }
740 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 741 /* only send ICMP too big on first fragment */
742 if (!iph->fragoffs)
743 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
741 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, 744 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
742 "ip_vs_nat_xmit_v6(): frag needed for"); 745 "ip_vs_nat_xmit_v6(): frag needed for");
743 goto tx_error_put; 746 goto tx_error_put;
@@ -751,7 +754,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
751 goto tx_error_put; 754 goto tx_error_put;
752 755
753 /* mangle the packet */ 756 /* mangle the packet */
754 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 757 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
755 goto tx_error; 758 goto tx_error;
756 ipv6_hdr(skb)->daddr = cp->daddr.in6; 759 ipv6_hdr(skb)->daddr = cp->daddr.in6;
757 760
@@ -812,7 +815,7 @@ tx_error_put:
812 */ 815 */
813int 816int
814ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 817ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
815 struct ip_vs_protocol *pp) 818 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
816{ 819{
817 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 820 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
818 struct rtable *rt; /* Route to the other host */ 821 struct rtable *rt; /* Route to the other host */
@@ -932,7 +935,7 @@ tx_error_put:
932#ifdef CONFIG_IP_VS_IPV6 935#ifdef CONFIG_IP_VS_IPV6
933int 936int
934ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 937ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
935 struct ip_vs_protocol *pp) 938 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
936{ 939{
937 struct rt6_info *rt; /* Route to the other host */ 940 struct rt6_info *rt; /* Route to the other host */
938 struct in6_addr saddr; /* Source for tunnel */ 941 struct in6_addr saddr; /* Source for tunnel */
@@ -972,7 +975,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
972 975
973 skb->dev = net->loopback_dev; 976 skb->dev = net->loopback_dev;
974 } 977 }
975 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 978 /* only send ICMP too big on first fragment */
979 if (!ipvsh->fragoffs)
980 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
976 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 981 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
977 goto tx_error_put; 982 goto tx_error_put;
978 } 983 }
@@ -1053,7 +1058,7 @@ tx_error_put:
1053 */ 1058 */
1054int 1059int
1055ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1060ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1056 struct ip_vs_protocol *pp) 1061 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1057{ 1062{
1058 struct rtable *rt; /* Route to the other host */ 1063 struct rtable *rt; /* Route to the other host */
1059 struct iphdr *iph = ip_hdr(skb); 1064 struct iphdr *iph = ip_hdr(skb);
@@ -1115,7 +1120,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1115#ifdef CONFIG_IP_VS_IPV6 1120#ifdef CONFIG_IP_VS_IPV6
1116int 1121int
1117ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1122ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1118 struct ip_vs_protocol *pp) 1123 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
1119{ 1124{
1120 struct rt6_info *rt; /* Route to the other host */ 1125 struct rt6_info *rt; /* Route to the other host */
1121 int mtu; 1126 int mtu;
@@ -1139,7 +1144,9 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1139 1144
1140 skb->dev = net->loopback_dev; 1145 skb->dev = net->loopback_dev;
1141 } 1146 }
1142 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1147 /* only send ICMP too big on first fragment */
1148 if (!iph->fragoffs)
1149 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1143 dst_release(&rt->dst); 1150 dst_release(&rt->dst);
1144 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1151 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1145 goto tx_error; 1152 goto tx_error;
@@ -1183,7 +1190,8 @@ tx_error:
1183 */ 1190 */
1184int 1191int
1185ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1192ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1186 struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1193 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1194 struct ip_vs_iphdr *iph)
1187{ 1195{
1188 struct rtable *rt; /* Route to the other host */ 1196 struct rtable *rt; /* Route to the other host */
1189 int mtu; 1197 int mtu;
@@ -1198,7 +1206,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1198 translate address/port back */ 1206 translate address/port back */
1199 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1207 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1200 if (cp->packet_xmit) 1208 if (cp->packet_xmit)
1201 rc = cp->packet_xmit(skb, cp, pp); 1209 rc = cp->packet_xmit(skb, cp, pp, iph);
1202 else 1210 else
1203 rc = NF_ACCEPT; 1211 rc = NF_ACCEPT;
1204 /* do not touch skb anymore */ 1212 /* do not touch skb anymore */
@@ -1304,7 +1312,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1304#ifdef CONFIG_IP_VS_IPV6 1312#ifdef CONFIG_IP_VS_IPV6
1305int 1313int
1306ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1314ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1307 struct ip_vs_protocol *pp, int offset, unsigned int hooknum) 1315 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1316 struct ip_vs_iphdr *iph)
1308{ 1317{
1309 struct rt6_info *rt; /* Route to the other host */ 1318 struct rt6_info *rt; /* Route to the other host */
1310 int mtu; 1319 int mtu;
@@ -1319,7 +1328,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1319 translate address/port back */ 1328 translate address/port back */
1320 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1329 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1321 if (cp->packet_xmit) 1330 if (cp->packet_xmit)
1322 rc = cp->packet_xmit(skb, cp, pp); 1331 rc = cp->packet_xmit(skb, cp, pp, iph);
1323 else 1332 else
1324 rc = NF_ACCEPT; 1333 rc = NF_ACCEPT;
1325 /* do not touch skb anymore */ 1334 /* do not touch skb anymore */
@@ -1375,7 +1384,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1375 1384
1376 skb->dev = net->loopback_dev; 1385 skb->dev = net->loopback_dev;
1377 } 1386 }
1378 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1387 /* only send ICMP too big on first fragment */
1388 if (!iph->fragoffs)
1389 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1379 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1390 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1380 goto tx_error_put; 1391 goto tx_error_put;
1381 } 1392 }
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index bb10b0717f1b..8d47c3780fda 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
67 goto out; 67 goto out;
68 } 68 }
69 69
70 ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); 70 ip_vs_fill_iph_skb(family, skb, &iph);
71 71
72 if (data->bitmask & XT_IPVS_PROTO) 72 if (data->bitmask & XT_IPVS_PROTO)
73 if ((iph.protocol == data->l4proto) ^ 73 if ((iph.protocol == data->l4proto) ^
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
85 /* 85 /*
86 * Check if the packet belongs to an existing entry 86 * Check if the packet belongs to an existing entry
87 */ 87 */
88 cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); 88 cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */);
89 if (unlikely(cp == NULL)) { 89 if (unlikely(cp == NULL)) {
90 match = false; 90 match = false;
91 goto out; 91 goto out;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 94060edbbd70..f262dbfc7f06 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1881,7 +1881,35 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1881 skb_reserve(skb, hlen); 1881 skb_reserve(skb, hlen);
1882 skb_reset_network_header(skb); 1882 skb_reset_network_header(skb);
1883 1883
1884 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); 1884 if (po->tp_tx_has_off) {
1885 int off_min, off_max, off;
1886 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
1887 off_max = po->tx_ring.frame_size - tp_len;
1888 if (sock->type == SOCK_DGRAM) {
1889 switch (po->tp_version) {
1890 case TPACKET_V2:
1891 off = ph.h2->tp_net;
1892 break;
1893 default:
1894 off = ph.h1->tp_net;
1895 break;
1896 }
1897 } else {
1898 switch (po->tp_version) {
1899 case TPACKET_V2:
1900 off = ph.h2->tp_mac;
1901 break;
1902 default:
1903 off = ph.h1->tp_mac;
1904 break;
1905 }
1906 }
1907 if (unlikely((off < off_min) || (off_max < off)))
1908 return -EINVAL;
1909 data = ph.raw + off;
1910 } else {
1911 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
1912 }
1885 to_write = tp_len; 1913 to_write = tp_len;
1886 1914
1887 if (sock->type == SOCK_DGRAM) { 1915 if (sock->type == SOCK_DGRAM) {
@@ -1907,7 +1935,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1907 to_write -= dev->hard_header_len; 1935 to_write -= dev->hard_header_len;
1908 } 1936 }
1909 1937
1910 err = -EFAULT;
1911 offset = offset_in_page(data); 1938 offset = offset_in_page(data);
1912 len_max = PAGE_SIZE - offset; 1939 len_max = PAGE_SIZE - offset;
1913 len = ((to_write > len_max) ? len_max : to_write); 1940 len = ((to_write > len_max) ? len_max : to_write);
@@ -1957,7 +1984,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
1957 1984
1958 mutex_lock(&po->pg_vec_lock); 1985 mutex_lock(&po->pg_vec_lock);
1959 1986
1960 err = -EBUSY;
1961 if (saddr == NULL) { 1987 if (saddr == NULL) {
1962 dev = po->prot_hook.dev; 1988 dev = po->prot_hook.dev;
1963 proto = po->num; 1989 proto = po->num;
@@ -3111,6 +3137,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
3111 3137
3112 return fanout_add(sk, val & 0xffff, val >> 16); 3138 return fanout_add(sk, val & 0xffff, val >> 16);
3113 } 3139 }
3140 case PACKET_TX_HAS_OFF:
3141 {
3142 unsigned int val;
3143
3144 if (optlen != sizeof(val))
3145 return -EINVAL;
3146 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3147 return -EBUSY;
3148 if (copy_from_user(&val, optval, sizeof(val)))
3149 return -EFAULT;
3150 po->tp_tx_has_off = !!val;
3151 return 0;
3152 }
3114 default: 3153 default:
3115 return -ENOPROTOOPT; 3154 return -ENOPROTOOPT;
3116 } 3155 }
@@ -3202,6 +3241,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3202 ((u32)po->fanout->type << 16)) : 3241 ((u32)po->fanout->type << 16)) :
3203 0); 3242 0);
3204 break; 3243 break;
3244 case PACKET_TX_HAS_OFF:
3245 val = po->tp_tx_has_off;
3246 break;
3205 default: 3247 default:
3206 return -ENOPROTOOPT; 3248 return -ENOPROTOOPT;
3207 } 3249 }
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 44945f6b7252..e84cab8cb7a9 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -109,6 +109,7 @@ struct packet_sock {
109 unsigned int tp_hdrlen; 109 unsigned int tp_hdrlen;
110 unsigned int tp_reserve; 110 unsigned int tp_reserve;
111 unsigned int tp_loss:1; 111 unsigned int tp_loss:1;
112 unsigned int tp_tx_has_off:1;
112 unsigned int tp_tstamp; 113 unsigned int tp_tstamp;
113 struct packet_type prot_hook ____cacheline_aligned_in_smp; 114 struct packet_type prot_hook ____cacheline_aligned_in_smp;
114}; 115};
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 2ecde225ae60..709b0fb38a18 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -17,6 +17,7 @@
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/cgroup.h> 18#include <linux/cgroup.h>
19#include <linux/rcupdate.h> 19#include <linux/rcupdate.h>
20#include <linux/fdtable.h>
20#include <net/rtnetlink.h> 21#include <net/rtnetlink.h>
21#include <net/pkt_cls.h> 22#include <net/pkt_cls.h>
22#include <net/sock.h> 23#include <net/sock.h>
@@ -53,6 +54,28 @@ static void cgrp_destroy(struct cgroup *cgrp)
53 kfree(cgrp_cls_state(cgrp)); 54 kfree(cgrp_cls_state(cgrp));
54} 55}
55 56
57static int update_classid(const void *v, struct file *file, unsigned n)
58{
59 int err;
60 struct socket *sock = sock_from_file(file, &err);
61 if (sock)
62 sock->sk->sk_classid = (u32)(unsigned long)v;
63 return 0;
64}
65
66static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
67{
68 struct task_struct *p;
69 void *v;
70
71 cgroup_taskset_for_each(p, cgrp, tset) {
72 task_lock(p);
73 v = (void *)(unsigned long)task_cls_classid(p);
74 iterate_fd(p->files, 0, update_classid, v);
75 task_unlock(p);
76 }
77}
78
56static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) 79static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
57{ 80{
58 return cgrp_cls_state(cgrp)->classid; 81 return cgrp_cls_state(cgrp)->classid;
@@ -77,6 +100,7 @@ struct cgroup_subsys net_cls_subsys = {
77 .name = "net_cls", 100 .name = "net_cls",
78 .create = cgrp_create, 101 .create = cgrp_create,
79 .destroy = cgrp_destroy, 102 .destroy = cgrp_destroy,
103 .attach = cgrp_attach,
80 .subsys_id = net_cls_subsys_id, 104 .subsys_id = net_cls_subsys_id,
81 .base_cftypes = ss_files, 105 .base_cftypes = ss_files,
82 .module = THIS_MODULE, 106 .module = THIS_MODULE,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a18d975db59c..13cc744a2498 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -495,16 +495,15 @@ EXPORT_SYMBOL(qdisc_watchdog_init);
495 495
496void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) 496void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
497{ 497{
498 ktime_t time;
499
500 if (test_bit(__QDISC_STATE_DEACTIVATED, 498 if (test_bit(__QDISC_STATE_DEACTIVATED,
501 &qdisc_root_sleeping(wd->qdisc)->state)) 499 &qdisc_root_sleeping(wd->qdisc)->state))
502 return; 500 return;
503 501
504 qdisc_throttled(wd->qdisc); 502 qdisc_throttled(wd->qdisc);
505 time = ktime_set(0, 0); 503
506 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); 504 hrtimer_start(&wd->timer,
507 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); 505 ns_to_ktime(PSCHED_TICKS2NS(expires)),
506 HRTIMER_MODE_ABS);
508} 507}
509EXPORT_SYMBOL(qdisc_watchdog_schedule); 508EXPORT_SYMBOL(qdisc_watchdog_schedule);
510 509
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 564b9fc8efd3..0e19948470b8 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -509,8 +509,7 @@ static void cbq_ovl_delay(struct cbq_class *cl)
509 cl->cpriority = TC_CBQ_MAXPRIO; 509 cl->cpriority = TC_CBQ_MAXPRIO;
510 q->pmask |= (1<<TC_CBQ_MAXPRIO); 510 q->pmask |= (1<<TC_CBQ_MAXPRIO);
511 511
512 expires = ktime_set(0, 0); 512 expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
513 expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
514 if (hrtimer_try_to_cancel(&q->delay_timer) && 513 if (hrtimer_try_to_cancel(&q->delay_timer) &&
515 ktime_to_ns(ktime_sub( 514 ktime_to_ns(ktime_sub(
516 hrtimer_get_expires(&q->delay_timer), 515 hrtimer_get_expires(&q->delay_timer),
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9d75b7761313..d2922c0ef57a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -71,6 +71,12 @@ enum htb_cmode {
71 HTB_CAN_SEND /* class can send */ 71 HTB_CAN_SEND /* class can send */
72}; 72};
73 73
74struct htb_rate_cfg {
75 u64 rate_bps;
76 u32 mult;
77 u32 shift;
78};
79
74/* interior & leaf nodes; props specific to leaves are marked L: */ 80/* interior & leaf nodes; props specific to leaves are marked L: */
75struct htb_class { 81struct htb_class {
76 struct Qdisc_class_common common; 82 struct Qdisc_class_common common;
@@ -118,11 +124,11 @@ struct htb_class {
118 int filter_cnt; 124 int filter_cnt;
119 125
120 /* token bucket parameters */ 126 /* token bucket parameters */
121 struct qdisc_rate_table *rate; /* rate table of the class itself */ 127 struct htb_rate_cfg rate;
122 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ 128 struct htb_rate_cfg ceil;
123 long buffer, cbuffer; /* token bucket depth/rate */ 129 s64 buffer, cbuffer; /* token bucket depth/rate */
124 psched_tdiff_t mbuffer; /* max wait time */ 130 psched_tdiff_t mbuffer; /* max wait time */
125 long tokens, ctokens; /* current number of tokens */ 131 s64 tokens, ctokens; /* current number of tokens */
126 psched_time_t t_c; /* checkpoint time */ 132 psched_time_t t_c; /* checkpoint time */
127}; 133};
128 134
@@ -162,6 +168,45 @@ struct htb_sched {
162 struct work_struct work; 168 struct work_struct work;
163}; 169};
164 170
171static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len)
172{
173 return ((u64)len * r->mult) >> r->shift;
174}
175
176static void htb_precompute_ratedata(struct htb_rate_cfg *r)
177{
178 u64 factor;
179 u64 mult;
180 int shift;
181
182 r->shift = 0;
183 r->mult = 1;
184 /*
185 * Calibrate mult, shift so that token counting is accurate
186 * for smallest packet size (64 bytes). Token (time in ns) is
187 * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will
188 * work as long as the smallest packet transfer time can be
189 * accurately represented in nanosec.
190 */
191 if (r->rate_bps > 0) {
192 /*
193 * Higher shift gives better accuracy. Find the largest
194 * shift such that mult fits in 32 bits.
195 */
196 for (shift = 0; shift < 16; shift++) {
197 r->shift = shift;
198 factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
199 mult = div64_u64(factor, r->rate_bps);
200 if (mult > UINT_MAX)
201 break;
202 }
203
204 r->shift = shift - 1;
205 factor = 8LLU * NSEC_PER_SEC * (1 << r->shift);
206 r->mult = div64_u64(factor, r->rate_bps);
207 }
208}
209
165/* find class in global hash table using given handle */ 210/* find class in global hash table using given handle */
166static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) 211static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
167{ 212{
@@ -273,7 +318,7 @@ static void htb_add_to_id_tree(struct rb_root *root,
273 * already in the queue. 318 * already in the queue.
274 */ 319 */
275static void htb_add_to_wait_tree(struct htb_sched *q, 320static void htb_add_to_wait_tree(struct htb_sched *q,
276 struct htb_class *cl, long delay) 321 struct htb_class *cl, s64 delay)
277{ 322{
278 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; 323 struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
279 324
@@ -441,14 +486,14 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
441 htb_remove_class_from_row(q, cl, mask); 486 htb_remove_class_from_row(q, cl, mask);
442} 487}
443 488
444static inline long htb_lowater(const struct htb_class *cl) 489static inline s64 htb_lowater(const struct htb_class *cl)
445{ 490{
446 if (htb_hysteresis) 491 if (htb_hysteresis)
447 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; 492 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
448 else 493 else
449 return 0; 494 return 0;
450} 495}
451static inline long htb_hiwater(const struct htb_class *cl) 496static inline s64 htb_hiwater(const struct htb_class *cl)
452{ 497{
453 if (htb_hysteresis) 498 if (htb_hysteresis)
454 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; 499 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
@@ -469,9 +514,9 @@ static inline long htb_hiwater(const struct htb_class *cl)
469 * mode transitions per time unit. The speed gain is about 1/6. 514 * mode transitions per time unit. The speed gain is about 1/6.
470 */ 515 */
471static inline enum htb_cmode 516static inline enum htb_cmode
472htb_class_mode(struct htb_class *cl, long *diff) 517htb_class_mode(struct htb_class *cl, s64 *diff)
473{ 518{
474 long toks; 519 s64 toks;
475 520
476 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { 521 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
477 *diff = -toks; 522 *diff = -toks;
@@ -495,7 +540,7 @@ htb_class_mode(struct htb_class *cl, long *diff)
495 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). 540 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
496 */ 541 */
497static void 542static void
498htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) 543htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
499{ 544{
500 enum htb_cmode new_mode = htb_class_mode(cl, diff); 545 enum htb_cmode new_mode = htb_class_mode(cl, diff);
501 546
@@ -581,26 +626,26 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
581 return NET_XMIT_SUCCESS; 626 return NET_XMIT_SUCCESS;
582} 627}
583 628
584static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff) 629static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
585{ 630{
586 long toks = diff + cl->tokens; 631 s64 toks = diff + cl->tokens;
587 632
588 if (toks > cl->buffer) 633 if (toks > cl->buffer)
589 toks = cl->buffer; 634 toks = cl->buffer;
590 toks -= (long) qdisc_l2t(cl->rate, bytes); 635 toks -= (s64) l2t_ns(&cl->rate, bytes);
591 if (toks <= -cl->mbuffer) 636 if (toks <= -cl->mbuffer)
592 toks = 1 - cl->mbuffer; 637 toks = 1 - cl->mbuffer;
593 638
594 cl->tokens = toks; 639 cl->tokens = toks;
595} 640}
596 641
597static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff) 642static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
598{ 643{
599 long toks = diff + cl->ctokens; 644 s64 toks = diff + cl->ctokens;
600 645
601 if (toks > cl->cbuffer) 646 if (toks > cl->cbuffer)
602 toks = cl->cbuffer; 647 toks = cl->cbuffer;
603 toks -= (long) qdisc_l2t(cl->ceil, bytes); 648 toks -= (s64) l2t_ns(&cl->ceil, bytes);
604 if (toks <= -cl->mbuffer) 649 if (toks <= -cl->mbuffer)
605 toks = 1 - cl->mbuffer; 650 toks = 1 - cl->mbuffer;
606 651
@@ -623,10 +668,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
623{ 668{
624 int bytes = qdisc_pkt_len(skb); 669 int bytes = qdisc_pkt_len(skb);
625 enum htb_cmode old_mode; 670 enum htb_cmode old_mode;
626 long diff; 671 s64 diff;
627 672
628 while (cl) { 673 while (cl) {
629 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); 674 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
630 if (cl->level >= level) { 675 if (cl->level >= level) {
631 if (cl->level == level) 676 if (cl->level == level)
632 cl->xstats.lends++; 677 cl->xstats.lends++;
@@ -673,7 +718,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
673 unsigned long stop_at = start + 2; 718 unsigned long stop_at = start + 2;
674 while (time_before(jiffies, stop_at)) { 719 while (time_before(jiffies, stop_at)) {
675 struct htb_class *cl; 720 struct htb_class *cl;
676 long diff; 721 s64 diff;
677 struct rb_node *p = rb_first(&q->wait_pq[level]); 722 struct rb_node *p = rb_first(&q->wait_pq[level]);
678 723
679 if (!p) 724 if (!p)
@@ -684,7 +729,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
684 return cl->pq_key; 729 return cl->pq_key;
685 730
686 htb_safe_rb_erase(p, q->wait_pq + level); 731 htb_safe_rb_erase(p, q->wait_pq + level);
687 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); 732 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
688 htb_change_class_mode(q, cl, &diff); 733 htb_change_class_mode(q, cl, &diff);
689 if (cl->cmode != HTB_CAN_SEND) 734 if (cl->cmode != HTB_CAN_SEND)
690 htb_add_to_wait_tree(q, cl, diff); 735 htb_add_to_wait_tree(q, cl, diff);
@@ -871,10 +916,10 @@ ok:
871 916
872 if (!sch->q.qlen) 917 if (!sch->q.qlen)
873 goto fin; 918 goto fin;
874 q->now = psched_get_time(); 919 q->now = ktime_to_ns(ktime_get());
875 start_at = jiffies; 920 start_at = jiffies;
876 921
877 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; 922 next_event = q->now + 5 * NSEC_PER_SEC;
878 923
879 for (level = 0; level < TC_HTB_MAXDEPTH; level++) { 924 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
880 /* common case optimization - skip event handler quickly */ 925 /* common case optimization - skip event handler quickly */
@@ -884,7 +929,7 @@ ok:
884 if (q->now >= q->near_ev_cache[level]) { 929 if (q->now >= q->near_ev_cache[level]) {
885 event = htb_do_events(q, level, start_at); 930 event = htb_do_events(q, level, start_at);
886 if (!event) 931 if (!event)
887 event = q->now + PSCHED_TICKS_PER_SEC; 932 event = q->now + NSEC_PER_SEC;
888 q->near_ev_cache[level] = event; 933 q->near_ev_cache[level] = event;
889 } else 934 } else
890 event = q->near_ev_cache[level]; 935 event = q->near_ev_cache[level];
@@ -903,10 +948,17 @@ ok:
903 } 948 }
904 } 949 }
905 sch->qstats.overlimits++; 950 sch->qstats.overlimits++;
906 if (likely(next_event > q->now)) 951 if (likely(next_event > q->now)) {
907 qdisc_watchdog_schedule(&q->watchdog, next_event); 952 if (!test_bit(__QDISC_STATE_DEACTIVATED,
908 else 953 &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
954 ktime_t time = ns_to_ktime(next_event);
955 qdisc_throttled(q->watchdog.qdisc);
956 hrtimer_start(&q->watchdog.timer, time,
957 HRTIMER_MODE_ABS);
958 }
959 } else {
909 schedule_work(&q->work); 960 schedule_work(&q->work);
961 }
910fin: 962fin:
911 return skb; 963 return skb;
912} 964}
@@ -1082,9 +1134,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1082 1134
1083 memset(&opt, 0, sizeof(opt)); 1135 memset(&opt, 0, sizeof(opt));
1084 1136
1085 opt.rate = cl->rate->rate; 1137 opt.rate.rate = cl->rate.rate_bps >> 3;
1086 opt.buffer = cl->buffer; 1138 opt.buffer = cl->buffer;
1087 opt.ceil = cl->ceil->rate; 1139 opt.ceil.rate = cl->ceil.rate_bps >> 3;
1088 opt.cbuffer = cl->cbuffer; 1140 opt.cbuffer = cl->cbuffer;
1089 opt.quantum = cl->quantum; 1141 opt.quantum = cl->quantum;
1090 opt.prio = cl->prio; 1142 opt.prio = cl->prio;
@@ -1203,9 +1255,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1203 qdisc_destroy(cl->un.leaf.q); 1255 qdisc_destroy(cl->un.leaf.q);
1204 } 1256 }
1205 gen_kill_estimator(&cl->bstats, &cl->rate_est); 1257 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1206 qdisc_put_rtab(cl->rate);
1207 qdisc_put_rtab(cl->ceil);
1208
1209 tcf_destroy_chain(&cl->filter_list); 1258 tcf_destroy_chain(&cl->filter_list);
1210 kfree(cl); 1259 kfree(cl);
1211} 1260}
@@ -1307,7 +1356,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1307 struct htb_sched *q = qdisc_priv(sch); 1356 struct htb_sched *q = qdisc_priv(sch);
1308 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1357 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1309 struct nlattr *opt = tca[TCA_OPTIONS]; 1358 struct nlattr *opt = tca[TCA_OPTIONS];
1310 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1311 struct nlattr *tb[__TCA_HTB_MAX]; 1359 struct nlattr *tb[__TCA_HTB_MAX];
1312 struct tc_htb_opt *hopt; 1360 struct tc_htb_opt *hopt;
1313 1361
@@ -1326,10 +1374,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1326 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); 1374 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1327 1375
1328 hopt = nla_data(tb[TCA_HTB_PARMS]); 1376 hopt = nla_data(tb[TCA_HTB_PARMS]);
1329 1377 if (!hopt->rate.rate || !hopt->ceil.rate)
1330 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1331 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1332 if (!rtab || !ctab)
1333 goto failure; 1378 goto failure;
1334 1379
1335 if (!cl) { /* new class */ 1380 if (!cl) { /* new class */
@@ -1439,7 +1484,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1439 * is really leaf before changing cl->un.leaf ! 1484 * is really leaf before changing cl->un.leaf !
1440 */ 1485 */
1441 if (!cl->level) { 1486 if (!cl->level) {
1442 cl->quantum = rtab->rate.rate / q->rate2quantum; 1487 cl->quantum = hopt->rate.rate / q->rate2quantum;
1443 if (!hopt->quantum && cl->quantum < 1000) { 1488 if (!hopt->quantum && cl->quantum < 1000) {
1444 pr_warning( 1489 pr_warning(
1445 "HTB: quantum of class %X is small. Consider r2q change.\n", 1490 "HTB: quantum of class %X is small. Consider r2q change.\n",
@@ -1460,12 +1505,16 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1460 1505
1461 cl->buffer = hopt->buffer; 1506 cl->buffer = hopt->buffer;
1462 cl->cbuffer = hopt->cbuffer; 1507 cl->cbuffer = hopt->cbuffer;
1463 if (cl->rate) 1508
1464 qdisc_put_rtab(cl->rate); 1509 cl->rate.rate_bps = (u64)hopt->rate.rate << 3;
1465 cl->rate = rtab; 1510 cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3;
1466 if (cl->ceil) 1511
1467 qdisc_put_rtab(cl->ceil); 1512 htb_precompute_ratedata(&cl->rate);
1468 cl->ceil = ctab; 1513 htb_precompute_ratedata(&cl->ceil);
1514
1515 cl->buffer = hopt->buffer << PSCHED_SHIFT;
1516 cl->cbuffer = hopt->buffer << PSCHED_SHIFT;
1517
1469 sch_tree_unlock(sch); 1518 sch_tree_unlock(sch);
1470 1519
1471 qdisc_class_hash_grow(sch, &q->clhash); 1520 qdisc_class_hash_grow(sch, &q->clhash);
@@ -1474,10 +1523,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1474 return 0; 1523 return 0;
1475 1524
1476failure: 1525failure:
1477 if (rtab)
1478 qdisc_put_rtab(rtab);
1479 if (ctab)
1480 qdisc_put_rtab(ctab);
1481 return err; 1526 return err;
1482} 1527}
1483 1528
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 126b014eb79b..a9edd2e205f4 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -9,7 +9,6 @@ menuconfig IP_SCTP
9 select CRYPTO 9 select CRYPTO
10 select CRYPTO_HMAC 10 select CRYPTO_HMAC
11 select CRYPTO_SHA1 11 select CRYPTO_SHA1
12 select CRYPTO_MD5 if SCTP_HMAC_MD5
13 select LIBCRC32C 12 select LIBCRC32C
14 ---help--- 13 ---help---
15 Stream Control Transmission Protocol 14 Stream Control Transmission Protocol
@@ -68,33 +67,21 @@ config SCTP_DBG_OBJCNT
68 67
69 If unsure, say N 68 If unsure, say N
70 69
71choice 70config SCTP_COOKIE_HMAC_MD5
72 prompt "SCTP: Cookie HMAC Algorithm" 71 bool "Enable optional MD5 hmac cookie generation"
73 default SCTP_HMAC_MD5
74 help 72 help
75 HMAC algorithm to be used during association initialization. It 73 Enable optional MD5 hmac based SCTP cookie generation
76 is strongly recommended to use HMAC-SHA1 or HMAC-MD5. See 74 default y
77 configuration for Cryptographic API and enable those algorithms 75 select CRYPTO_HMAC if SCTP_COOKIE_HMAC_MD5
78 to make usable by SCTP. 76 select CRYPTO_MD5 if SCTP_COOKIE_HMAC_MD5
79 77
80config SCTP_HMAC_NONE 78config SCTP_COOKIE_HMAC_SHA1
81 bool "None" 79 bool "Enable optional SHA1 hmac cookie generation"
82 help
83 Choosing this disables the use of an HMAC during association
84 establishment. It is advised to use either HMAC-MD5 or HMAC-SHA1.
85
86config SCTP_HMAC_SHA1
87 bool "HMAC-SHA1"
88 help
89 Enable the use of HMAC-SHA1 during association establishment. It
90 is advised to use either HMAC-MD5 or HMAC-SHA1.
91
92config SCTP_HMAC_MD5
93 bool "HMAC-MD5"
94 help 80 help
95 Enable the use of HMAC-MD5 during association establishment. It is 81 Enable optional SHA1 hmac based SCTP cookie generation
96 advised to use either HMAC-MD5 or HMAC-SHA1. 82 default y
83 select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1
84 select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1
97 85
98endchoice
99 86
100endif # IP_SCTP 87endif # IP_SCTP
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 2d518425d598..456bc3dbdd51 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1190,6 +1190,15 @@ static int sctp_net_init(struct net *net)
1190 /* Whether Cookie Preservative is enabled(1) or not(0) */ 1190 /* Whether Cookie Preservative is enabled(1) or not(0) */
1191 net->sctp.cookie_preserve_enable = 1; 1191 net->sctp.cookie_preserve_enable = 1;
1192 1192
1193 /* Default sctp sockets to use md5 as their hmac alg */
1194#if defined (CONFIG_CRYPTO_MD5)
1195 net->sctp.sctp_hmac_alg = "md5";
1196#elif defined (CONFIG_CRYPTO_SHA1)
1197 net->sctp.sctp_hmac_alg = "sha1";
1198#else
1199 net->sctp.sctp_hmac_alg = NULL;
1200#endif
1201
1193 /* Max.Burst - 4 */ 1202 /* Max.Burst - 4 */
1194 net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; 1203 net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST;
1195 1204
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 6773d7803627..6eecf7e6338d 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1268,14 +1268,14 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1268 sctp_outq_uncork(&asoc->outqueue); 1268 sctp_outq_uncork(&asoc->outqueue);
1269 local_cork = 0; 1269 local_cork = 0;
1270 } 1270 }
1271 asoc = cmd->obj.ptr; 1271 asoc = cmd->obj.asoc;
1272 /* Register with the endpoint. */ 1272 /* Register with the endpoint. */
1273 sctp_endpoint_add_asoc(ep, asoc); 1273 sctp_endpoint_add_asoc(ep, asoc);
1274 sctp_hash_established(asoc); 1274 sctp_hash_established(asoc);
1275 break; 1275 break;
1276 1276
1277 case SCTP_CMD_UPDATE_ASSOC: 1277 case SCTP_CMD_UPDATE_ASSOC:
1278 sctp_assoc_update(asoc, cmd->obj.ptr); 1278 sctp_assoc_update(asoc, cmd->obj.asoc);
1279 break; 1279 break;
1280 1280
1281 case SCTP_CMD_PURGE_OUTQUEUE: 1281 case SCTP_CMD_PURGE_OUTQUEUE:
@@ -1315,7 +1315,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1315 break; 1315 break;
1316 1316
1317 case SCTP_CMD_PROCESS_FWDTSN: 1317 case SCTP_CMD_PROCESS_FWDTSN:
1318 sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.ptr); 1318 sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.chunk);
1319 break; 1319 break;
1320 1320
1321 case SCTP_CMD_GEN_SACK: 1321 case SCTP_CMD_GEN_SACK:
@@ -1331,7 +1331,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1331 case SCTP_CMD_PROCESS_SACK: 1331 case SCTP_CMD_PROCESS_SACK:
1332 /* Process an inbound SACK. */ 1332 /* Process an inbound SACK. */
1333 error = sctp_cmd_process_sack(commands, asoc, 1333 error = sctp_cmd_process_sack(commands, asoc,
1334 cmd->obj.ptr); 1334 cmd->obj.chunk);
1335 break; 1335 break;
1336 1336
1337 case SCTP_CMD_GEN_INIT_ACK: 1337 case SCTP_CMD_GEN_INIT_ACK:
@@ -1352,15 +1352,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1352 * layer which will bail. 1352 * layer which will bail.
1353 */ 1353 */
1354 error = sctp_cmd_process_init(commands, asoc, chunk, 1354 error = sctp_cmd_process_init(commands, asoc, chunk,
1355 cmd->obj.ptr, gfp); 1355 cmd->obj.init, gfp);
1356 break; 1356 break;
1357 1357
1358 case SCTP_CMD_GEN_COOKIE_ECHO: 1358 case SCTP_CMD_GEN_COOKIE_ECHO:
1359 /* Generate a COOKIE ECHO chunk. */ 1359 /* Generate a COOKIE ECHO chunk. */
1360 new_obj = sctp_make_cookie_echo(asoc, chunk); 1360 new_obj = sctp_make_cookie_echo(asoc, chunk);
1361 if (!new_obj) { 1361 if (!new_obj) {
1362 if (cmd->obj.ptr) 1362 if (cmd->obj.chunk)
1363 sctp_chunk_free(cmd->obj.ptr); 1363 sctp_chunk_free(cmd->obj.chunk);
1364 goto nomem; 1364 goto nomem;
1365 } 1365 }
1366 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, 1366 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -1369,9 +1369,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1369 /* If there is an ERROR chunk to be sent along with 1369 /* If there is an ERROR chunk to be sent along with
1370 * the COOKIE_ECHO, send it, too. 1370 * the COOKIE_ECHO, send it, too.
1371 */ 1371 */
1372 if (cmd->obj.ptr) 1372 if (cmd->obj.chunk)
1373 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, 1373 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
1374 SCTP_CHUNK(cmd->obj.ptr)); 1374 SCTP_CHUNK(cmd->obj.chunk));
1375 1375
1376 if (new_obj->transport) { 1376 if (new_obj->transport) {
1377 new_obj->transport->init_sent_count++; 1377 new_obj->transport->init_sent_count++;
@@ -1417,18 +1417,18 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1417 case SCTP_CMD_CHUNK_ULP: 1417 case SCTP_CMD_CHUNK_ULP:
1418 /* Send a chunk to the sockets layer. */ 1418 /* Send a chunk to the sockets layer. */
1419 SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", 1419 SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n",
1420 "chunk_up:", cmd->obj.ptr, 1420 "chunk_up:", cmd->obj.chunk,
1421 "ulpq:", &asoc->ulpq); 1421 "ulpq:", &asoc->ulpq);
1422 sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.ptr, 1422 sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk,
1423 GFP_ATOMIC); 1423 GFP_ATOMIC);
1424 break; 1424 break;
1425 1425
1426 case SCTP_CMD_EVENT_ULP: 1426 case SCTP_CMD_EVENT_ULP:
1427 /* Send a notification to the sockets layer. */ 1427 /* Send a notification to the sockets layer. */
1428 SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", 1428 SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n",
1429 "event_up:",cmd->obj.ptr, 1429 "event_up:",cmd->obj.ulpevent,
1430 "ulpq:",&asoc->ulpq); 1430 "ulpq:",&asoc->ulpq);
1431 sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ptr); 1431 sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent);
1432 break; 1432 break;
1433 1433
1434 case SCTP_CMD_REPLY: 1434 case SCTP_CMD_REPLY:
@@ -1438,12 +1438,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1438 local_cork = 1; 1438 local_cork = 1;
1439 } 1439 }
1440 /* Send a chunk to our peer. */ 1440 /* Send a chunk to our peer. */
1441 error = sctp_outq_tail(&asoc->outqueue, cmd->obj.ptr); 1441 error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk);
1442 break; 1442 break;
1443 1443
1444 case SCTP_CMD_SEND_PKT: 1444 case SCTP_CMD_SEND_PKT:
1445 /* Send a full packet to our peer. */ 1445 /* Send a full packet to our peer. */
1446 packet = cmd->obj.ptr; 1446 packet = cmd->obj.packet;
1447 sctp_packet_transmit(packet); 1447 sctp_packet_transmit(packet);
1448 sctp_ootb_pkt_free(packet); 1448 sctp_ootb_pkt_free(packet);
1449 break; 1449 break;
@@ -1480,7 +1480,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1480 break; 1480 break;
1481 1481
1482 case SCTP_CMD_SETUP_T2: 1482 case SCTP_CMD_SETUP_T2:
1483 sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr); 1483 sctp_cmd_setup_t2(commands, asoc, cmd->obj.chunk);
1484 break; 1484 break;
1485 1485
1486 case SCTP_CMD_TIMER_START_ONCE: 1486 case SCTP_CMD_TIMER_START_ONCE:
@@ -1514,7 +1514,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1514 break; 1514 break;
1515 1515
1516 case SCTP_CMD_INIT_CHOOSE_TRANSPORT: 1516 case SCTP_CMD_INIT_CHOOSE_TRANSPORT:
1517 chunk = cmd->obj.ptr; 1517 chunk = cmd->obj.chunk;
1518 t = sctp_assoc_choose_alter_transport(asoc, 1518 t = sctp_assoc_choose_alter_transport(asoc,
1519 asoc->init_last_sent_to); 1519 asoc->init_last_sent_to);
1520 asoc->init_last_sent_to = t; 1520 asoc->init_last_sent_to = t;
@@ -1665,17 +1665,16 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1665 break; 1665 break;
1666 1666
1667 case SCTP_CMD_PART_DELIVER: 1667 case SCTP_CMD_PART_DELIVER:
1668 sctp_ulpq_partial_delivery(&asoc->ulpq, cmd->obj.ptr, 1668 sctp_ulpq_partial_delivery(&asoc->ulpq, GFP_ATOMIC);
1669 GFP_ATOMIC);
1670 break; 1669 break;
1671 1670
1672 case SCTP_CMD_RENEGE: 1671 case SCTP_CMD_RENEGE:
1673 sctp_ulpq_renege(&asoc->ulpq, cmd->obj.ptr, 1672 sctp_ulpq_renege(&asoc->ulpq, cmd->obj.chunk,
1674 GFP_ATOMIC); 1673 GFP_ATOMIC);
1675 break; 1674 break;
1676 1675
1677 case SCTP_CMD_SETUP_T4: 1676 case SCTP_CMD_SETUP_T4:
1678 sctp_cmd_setup_t4(commands, asoc, cmd->obj.ptr); 1677 sctp_cmd_setup_t4(commands, asoc, cmd->obj.chunk);
1679 break; 1678 break;
1680 1679
1681 case SCTP_CMD_PROCESS_OPERR: 1680 case SCTP_CMD_PROCESS_OPERR:
@@ -1734,8 +1733,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1734 break; 1733 break;
1735 1734
1736 default: 1735 default:
1737 pr_warn("Impossible command: %u, %p\n", 1736 pr_warn("Impossible command: %u\n",
1738 cmd->verb, cmd->obj.ptr); 1737 cmd->verb);
1739 break; 1738 break;
1740 } 1739 }
1741 1740
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a60d1f8b41c5..15379acd9c08 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -110,7 +110,6 @@ static int sctp_do_bind(struct sock *, union sctp_addr *, int);
110static int sctp_autobind(struct sock *sk); 110static int sctp_autobind(struct sock *sk);
111static void sctp_sock_migrate(struct sock *, struct sock *, 111static void sctp_sock_migrate(struct sock *, struct sock *,
112 struct sctp_association *, sctp_socket_type_t); 112 struct sctp_association *, sctp_socket_type_t);
113static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
114 113
115extern struct kmem_cache *sctp_bucket_cachep; 114extern struct kmem_cache *sctp_bucket_cachep;
116extern long sysctl_sctp_mem[3]; 115extern long sysctl_sctp_mem[3];
@@ -3890,6 +3889,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3890 sp->default_rcv_context = 0; 3889 sp->default_rcv_context = 0;
3891 sp->max_burst = net->sctp.max_burst; 3890 sp->max_burst = net->sctp.max_burst;
3892 3891
3892 sp->sctp_hmac_alg = net->sctp.sctp_hmac_alg;
3893
3893 /* Initialize default setup parameters. These parameters 3894 /* Initialize default setup parameters. These parameters
3894 * can be modified with the SCTP_INITMSG socket option or 3895 * can be modified with the SCTP_INITMSG socket option or
3895 * overridden by the SCTP_INIT CMSG. 3896 * overridden by the SCTP_INIT CMSG.
@@ -5981,13 +5982,15 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
5981 struct sctp_sock *sp = sctp_sk(sk); 5982 struct sctp_sock *sp = sctp_sk(sk);
5982 struct sctp_endpoint *ep = sp->ep; 5983 struct sctp_endpoint *ep = sp->ep;
5983 struct crypto_hash *tfm = NULL; 5984 struct crypto_hash *tfm = NULL;
5985 char alg[32];
5984 5986
5985 /* Allocate HMAC for generating cookie. */ 5987 /* Allocate HMAC for generating cookie. */
5986 if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { 5988 if (!sp->hmac && sp->sctp_hmac_alg) {
5987 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); 5989 sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg);
5990 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
5988 if (IS_ERR(tfm)) { 5991 if (IS_ERR(tfm)) {
5989 net_info_ratelimited("failed to load transform for %s: %ld\n", 5992 net_info_ratelimited("failed to load transform for %s: %ld\n",
5990 sctp_hmac_alg, PTR_ERR(tfm)); 5993 sp->sctp_hmac_alg, PTR_ERR(tfm));
5991 return -ENOSYS; 5994 return -ENOSYS;
5992 } 5995 }
5993 sctp_sk(sk)->hmac = tfm; 5996 sctp_sk(sk)->hmac = tfm;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 70e3ba5cb50b..043889ac86c0 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -62,6 +62,11 @@ extern long sysctl_sctp_mem[3];
62extern int sysctl_sctp_rmem[3]; 62extern int sysctl_sctp_rmem[3];
63extern int sysctl_sctp_wmem[3]; 63extern int sysctl_sctp_wmem[3];
64 64
65static int proc_sctp_do_hmac_alg(ctl_table *ctl,
66 int write,
67 void __user *buffer, size_t *lenp,
68
69 loff_t *ppos);
65static ctl_table sctp_table[] = { 70static ctl_table sctp_table[] = {
66 { 71 {
67 .procname = "sctp_mem", 72 .procname = "sctp_mem",
@@ -147,6 +152,12 @@ static ctl_table sctp_net_table[] = {
147 .proc_handler = proc_dointvec, 152 .proc_handler = proc_dointvec,
148 }, 153 },
149 { 154 {
155 .procname = "cookie_hmac_alg",
156 .maxlen = 8,
157 .mode = 0644,
158 .proc_handler = proc_sctp_do_hmac_alg,
159 },
160 {
150 .procname = "valid_cookie_life", 161 .procname = "valid_cookie_life",
151 .data = &init_net.sctp.valid_cookie_life, 162 .data = &init_net.sctp.valid_cookie_life,
152 .maxlen = sizeof(unsigned int), 163 .maxlen = sizeof(unsigned int),
@@ -289,6 +300,54 @@ static ctl_table sctp_net_table[] = {
289 { /* sentinel */ } 300 { /* sentinel */ }
290}; 301};
291 302
303static int proc_sctp_do_hmac_alg(ctl_table *ctl,
304 int write,
305 void __user *buffer, size_t *lenp,
306 loff_t *ppos)
307{
308 struct net *net = current->nsproxy->net_ns;
309 char tmp[8];
310 ctl_table tbl;
311 int ret;
312 int changed = 0;
313 char *none = "none";
314
315 memset(&tbl, 0, sizeof(struct ctl_table));
316
317 if (write) {
318 tbl.data = tmp;
319 tbl.maxlen = 8;
320 } else {
321 tbl.data = net->sctp.sctp_hmac_alg ? : none;
322 tbl.maxlen = strlen(tbl.data);
323 }
324 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
325
326 if (write) {
327#ifdef CONFIG_CRYPTO_MD5
328 if (!strncmp(tmp, "md5", 3)) {
329 net->sctp.sctp_hmac_alg = "md5";
330 changed = 1;
331 }
332#endif
333#ifdef CONFIG_CRYPTO_SHA1
334 if (!strncmp(tmp, "sha1", 4)) {
335 net->sctp.sctp_hmac_alg = "sha1";
336 changed = 1;
337 }
338#endif
339 if (!strncmp(tmp, "none", 4)) {
340 net->sctp.sctp_hmac_alg = NULL;
341 changed = 1;
342 }
343
344 if (!changed)
345 ret = -EINVAL;
346 }
347
348 return ret;
349}
350
292int sctp_sysctl_net_register(struct net *net) 351int sctp_sysctl_net_register(struct net *net)
293{ 352{
294 struct ctl_table *table; 353 struct ctl_table *table;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 360d8697b95c..ada17464b65b 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -997,7 +997,6 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed)
997 997
998/* Partial deliver the first message as there is pressure on rwnd. */ 998/* Partial deliver the first message as there is pressure on rwnd. */
999void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, 999void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
1000 struct sctp_chunk *chunk,
1001 gfp_t gfp) 1000 gfp_t gfp)
1002{ 1001{
1003 struct sctp_ulpevent *event; 1002 struct sctp_ulpevent *event;
@@ -1060,7 +1059,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1060 sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); 1059 sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport);
1061 sctp_ulpq_tail_data(ulpq, chunk, gfp); 1060 sctp_ulpq_tail_data(ulpq, chunk, gfp);
1062 1061
1063 sctp_ulpq_partial_delivery(ulpq, chunk, gfp); 1062 sctp_ulpq_partial_delivery(ulpq, gfp);
1064 } 1063 }
1065 1064
1066 sk_mem_reclaim(asoc->base.sk); 1065 sk_mem_reclaim(asoc->base.sk);
diff --git a/net/socket.c b/net/socket.c
index d92c490e66fa..2ca51c719ef9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -620,8 +620,6 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
620{ 620{
621 struct sock_iocb *si = kiocb_to_siocb(iocb); 621 struct sock_iocb *si = kiocb_to_siocb(iocb);
622 622
623 sock_update_classid(sock->sk);
624
625 si->sock = sock; 623 si->sock = sock;
626 si->scm = NULL; 624 si->scm = NULL;
627 si->msg = msg; 625 si->msg = msg;
@@ -784,8 +782,6 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
784{ 782{
785 struct sock_iocb *si = kiocb_to_siocb(iocb); 783 struct sock_iocb *si = kiocb_to_siocb(iocb);
786 784
787 sock_update_classid(sock->sk);
788
789 si->sock = sock; 785 si->sock = sock;
790 si->scm = NULL; 786 si->scm = NULL;
791 si->msg = msg; 787 si->msg = msg;
@@ -896,8 +892,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
896 if (unlikely(!sock->ops->splice_read)) 892 if (unlikely(!sock->ops->splice_read))
897 return -EINVAL; 893 return -EINVAL;
898 894
899 sock_update_classid(sock->sk);
900
901 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 895 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
902} 896}
903 897
@@ -3437,8 +3431,6 @@ EXPORT_SYMBOL(kernel_setsockopt);
3437int kernel_sendpage(struct socket *sock, struct page *page, int offset, 3431int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3438 size_t size, int flags) 3432 size_t size, int flags)
3439{ 3433{
3440 sock_update_classid(sock->sk);
3441
3442 if (sock->ops->sendpage) 3434 if (sock->ops->sendpage)
3443 return sock->ops->sendpage(sock, page, offset, size, flags); 3435 return sock->ops->sendpage(sock, page, offset, size, flags);
3444 3436
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 06748f108a57..5ac19dc1d5e4 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -151,6 +151,9 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
151 sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) 151 sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO))
152 goto out_nlmsg_trim; 152 goto out_nlmsg_trim;
153 153
154 if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
155 goto out_nlmsg_trim;
156
154 return nlmsg_end(skb, nlh); 157 return nlmsg_end(skb, nlh);
155 158
156out_nlmsg_trim: 159out_nlmsg_trim: