aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:43:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:43:13 -0400
commitfd37ce34bd512f2b1a503f82abf8768da556a955 (patch)
tree557ff43ff5291d1704527e31293633fbc2f956d5
parent4b24ff71108164e047cf2c95990b77651163e315 (diff)
parentcaacf05e5ad1abf0a2864863da4e33024bc68ec6 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking update from David S. Miller: "I think Eric Dumazet and I have dealt with all of the known routing cache removal fallout. Some other minor fixes all around. 1) Fix RCU of cached routes, particular of output routes which require liberation via call_rcu() instead of call_rcu_bh(). From Eric Dumazet. 2) Make sure we purge net device references in cached routes properly. 3) TG3 driver bug fixes from Michael Chan. 4) Fix reported 'expires' value in ipv6 routes, from Li Wei. 5) TUN driver ioctl leaks kernel bytes to userspace, from Mathias Krause." * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (22 commits) ipv4: Properly purge netdev references on uncached routes. ipv4: Cache routes in nexthop exception entries. ipv4: percpu nh_rth_output cache ipv4: Restore old dst_free() behavior. bridge: make port attributes const ipv4: remove rt_cache_rebuild_count net: ipv4: fix RCU races on dst refcounts net: TCP early demux cleanup tun: Fix formatting. net/tun: fix ioctl() based info leaks tg3: Update version to 3.124 tg3: Fix race condition in tg3_get_stats64() tg3: Add New 5719 Read DMA workaround tg3: Fix Read DMA workaround for 5719 A0. tg3: Request APE_LOCK_PHY before PHY access ipv6: fix incorrect route 'expires' value passed to userspace mISDN: Bugfix only few bytes are transfered on a connection seeq: use PTR_RET at init_module of driver bnx2x: remove cast around the kmalloc in bnx2x_prev_mark_path ipv4: clean up put_child ...
-rw-r--r--Documentation/networking/ip-sysctl.txt6
-rw-r--r--drivers/isdn/hardware/mISDN/avmfritz.c7
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c3
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c71
-rw-r--r--drivers/net/ethernet/broadcom/tg3.h8
-rw-r--r--drivers/net/ethernet/qlogic/qlge/qlge_main.c1
-rw-r--r--drivers/net/ethernet/seeq/seeq8005.c4
-rw-r--r--drivers/net/tun.c6
-rw-r--r--include/net/inet_sock.h9
-rw-r--r--include/net/ip_fib.h6
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/route.h3
-rw-r--r--net/bridge/br_sysfs_if.c6
-rw-r--r--net/core/rtnetlink.c8
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_semantics.c42
-rw-r--r--net/ipv4/fib_trie.c53
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/route.c183
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/route.c8
26 files changed, 316 insertions, 145 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 406a5226220d..ca447b35b833 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -48,12 +48,6 @@ min_adv_mss - INTEGER
48 The advertised MSS depends on the first hop route MTU, but will 48 The advertised MSS depends on the first hop route MTU, but will
49 never be lower than this setting. 49 never be lower than this setting.
50 50
51rt_cache_rebuild_count - INTEGER
52 The per net-namespace route cache emergency rebuild threshold.
53 Any net-namespace having its route cache rebuilt due to
54 a hash bucket chain being too long more than this many times
55 will have its route caching disabled
56
57IP Fragmentation: 51IP Fragmentation:
58 52
59ipfrag_high_thresh - INTEGER 53ipfrag_high_thresh - INTEGER
diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c
index c08fc605e56b..fa6ca4733725 100644
--- a/drivers/isdn/hardware/mISDN/avmfritz.c
+++ b/drivers/isdn/hardware/mISDN/avmfritz.c
@@ -449,7 +449,8 @@ hdlc_fill_fifo(struct bchannel *bch)
449{ 449{
450 struct fritzcard *fc = bch->hw; 450 struct fritzcard *fc = bch->hw;
451 struct hdlc_hw *hdlc; 451 struct hdlc_hw *hdlc;
452 int count, fs, cnt = 0, idx, fillempty = 0; 452 int count, fs, cnt = 0, idx;
453 bool fillempty = false;
453 u8 *p; 454 u8 *p;
454 u32 *ptr, val, addr; 455 u32 *ptr, val, addr;
455 456
@@ -462,7 +463,7 @@ hdlc_fill_fifo(struct bchannel *bch)
462 return; 463 return;
463 count = fs; 464 count = fs;
464 p = bch->fill; 465 p = bch->fill;
465 fillempty = 1; 466 fillempty = true;
466 } else { 467 } else {
467 count = bch->tx_skb->len - bch->tx_idx; 468 count = bch->tx_skb->len - bch->tx_idx;
468 if (count <= 0) 469 if (count <= 0)
@@ -477,7 +478,7 @@ hdlc_fill_fifo(struct bchannel *bch)
477 hdlc->ctrl.sr.cmd |= HDLC_CMD_XME; 478 hdlc->ctrl.sr.cmd |= HDLC_CMD_XME;
478 } 479 }
479 ptr = (u32 *)p; 480 ptr = (u32 *)p;
480 if (fillempty) { 481 if (!fillempty) {
481 pr_debug("%s.B%d: %d/%d/%d", fc->name, bch->nr, count, 482 pr_debug("%s.B%d: %d/%d/%d", fc->name, bch->nr, count,
482 bch->tx_idx, bch->tx_skb->len); 483 bch->tx_idx, bch->tx_skb->len);
483 bch->tx_idx += count; 484 bch->tx_idx += count;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 9aaf863b4237..dd451c3dd83d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9360,8 +9360,7 @@ static int __devinit bnx2x_prev_mark_path(struct bnx2x *bp)
9360 struct bnx2x_prev_path_list *tmp_list; 9360 struct bnx2x_prev_path_list *tmp_list;
9361 int rc; 9361 int rc;
9362 9362
9363 tmp_list = (struct bnx2x_prev_path_list *) 9363 tmp_list = kmalloc(sizeof(struct bnx2x_prev_path_list), GFP_KERNEL);
9364 kmalloc(sizeof(struct bnx2x_prev_path_list), GFP_KERNEL);
9365 if (!tmp_list) { 9364 if (!tmp_list) {
9366 BNX2X_ERR("Failed to allocate 'bnx2x_prev_path_list'\n"); 9365 BNX2X_ERR("Failed to allocate 'bnx2x_prev_path_list'\n");
9367 return -ENOMEM; 9366 return -ENOMEM;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 9a009fd6ea1b..bf906c51d82a 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -92,7 +92,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
92 92
93#define DRV_MODULE_NAME "tg3" 93#define DRV_MODULE_NAME "tg3"
94#define TG3_MAJ_NUM 3 94#define TG3_MAJ_NUM 3
95#define TG3_MIN_NUM 123 95#define TG3_MIN_NUM 124
96#define DRV_MODULE_VERSION \ 96#define DRV_MODULE_VERSION \
97 __stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM) 97 __stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
98#define DRV_MODULE_RELDATE "March 21, 2012" 98#define DRV_MODULE_RELDATE "March 21, 2012"
@@ -672,6 +672,12 @@ static int tg3_ape_lock(struct tg3 *tp, int locknum)
672 else 672 else
673 bit = 1 << tp->pci_fn; 673 bit = 1 << tp->pci_fn;
674 break; 674 break;
675 case TG3_APE_LOCK_PHY0:
676 case TG3_APE_LOCK_PHY1:
677 case TG3_APE_LOCK_PHY2:
678 case TG3_APE_LOCK_PHY3:
679 bit = APE_LOCK_REQ_DRIVER;
680 break;
675 default: 681 default:
676 return -EINVAL; 682 return -EINVAL;
677 } 683 }
@@ -723,6 +729,12 @@ static void tg3_ape_unlock(struct tg3 *tp, int locknum)
723 else 729 else
724 bit = 1 << tp->pci_fn; 730 bit = 1 << tp->pci_fn;
725 break; 731 break;
732 case TG3_APE_LOCK_PHY0:
733 case TG3_APE_LOCK_PHY1:
734 case TG3_APE_LOCK_PHY2:
735 case TG3_APE_LOCK_PHY3:
736 bit = APE_LOCK_GRANT_DRIVER;
737 break;
726 default: 738 default:
727 return; 739 return;
728 } 740 }
@@ -1052,6 +1064,8 @@ static int tg3_readphy(struct tg3 *tp, int reg, u32 *val)
1052 udelay(80); 1064 udelay(80);
1053 } 1065 }
1054 1066
1067 tg3_ape_lock(tp, tp->phy_ape_lock);
1068
1055 *val = 0x0; 1069 *val = 0x0;
1056 1070
1057 frame_val = ((tp->phy_addr << MI_COM_PHY_ADDR_SHIFT) & 1071 frame_val = ((tp->phy_addr << MI_COM_PHY_ADDR_SHIFT) &
@@ -1086,6 +1100,8 @@ static int tg3_readphy(struct tg3 *tp, int reg, u32 *val)
1086 udelay(80); 1100 udelay(80);
1087 } 1101 }
1088 1102
1103 tg3_ape_unlock(tp, tp->phy_ape_lock);
1104
1089 return ret; 1105 return ret;
1090} 1106}
1091 1107
@@ -1105,6 +1121,8 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
1105 udelay(80); 1121 udelay(80);
1106 } 1122 }
1107 1123
1124 tg3_ape_lock(tp, tp->phy_ape_lock);
1125
1108 frame_val = ((tp->phy_addr << MI_COM_PHY_ADDR_SHIFT) & 1126 frame_val = ((tp->phy_addr << MI_COM_PHY_ADDR_SHIFT) &
1109 MI_COM_PHY_ADDR_MASK); 1127 MI_COM_PHY_ADDR_MASK);
1110 frame_val |= ((reg << MI_COM_REG_ADDR_SHIFT) & 1128 frame_val |= ((reg << MI_COM_REG_ADDR_SHIFT) &
@@ -1135,6 +1153,8 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
1135 udelay(80); 1153 udelay(80);
1136 } 1154 }
1137 1155
1156 tg3_ape_unlock(tp, tp->phy_ape_lock);
1157
1138 return ret; 1158 return ret;
1139} 1159}
1140 1160
@@ -9066,8 +9086,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
9066 GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 || 9086 GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 ||
9067 tg3_flag(tp, 57765_PLUS)) { 9087 tg3_flag(tp, 57765_PLUS)) {
9068 val = tr32(TG3_RDMA_RSRVCTRL_REG); 9088 val = tr32(TG3_RDMA_RSRVCTRL_REG);
9069 if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 || 9089 if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0) {
9070 GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5720) {
9071 val &= ~(TG3_RDMA_RSRVCTRL_TXMRGN_MASK | 9090 val &= ~(TG3_RDMA_RSRVCTRL_TXMRGN_MASK |
9072 TG3_RDMA_RSRVCTRL_FIFO_LWM_MASK | 9091 TG3_RDMA_RSRVCTRL_FIFO_LWM_MASK |
9073 TG3_RDMA_RSRVCTRL_FIFO_HWM_MASK); 9092 TG3_RDMA_RSRVCTRL_FIFO_HWM_MASK);
@@ -9257,6 +9276,19 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
9257 tw32_f(RDMAC_MODE, rdmac_mode); 9276 tw32_f(RDMAC_MODE, rdmac_mode);
9258 udelay(40); 9277 udelay(40);
9259 9278
9279 if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) {
9280 for (i = 0; i < TG3_NUM_RDMA_CHANNELS; i++) {
9281 if (tr32(TG3_RDMA_LENGTH + (i << 2)) > TG3_MAX_MTU(tp))
9282 break;
9283 }
9284 if (i < TG3_NUM_RDMA_CHANNELS) {
9285 val = tr32(TG3_LSO_RD_DMA_CRPTEN_CTRL);
9286 val |= TG3_LSO_RD_DMA_TX_LENGTH_WA;
9287 tw32(TG3_LSO_RD_DMA_CRPTEN_CTRL, val);
9288 tg3_flag_set(tp, 5719_RDMA_BUG);
9289 }
9290 }
9291
9260 tw32(RCVDCC_MODE, RCVDCC_MODE_ENABLE | RCVDCC_MODE_ATTN_ENABLE); 9292 tw32(RCVDCC_MODE, RCVDCC_MODE_ENABLE | RCVDCC_MODE_ATTN_ENABLE);
9261 if (!tg3_flag(tp, 5705_PLUS)) 9293 if (!tg3_flag(tp, 5705_PLUS))
9262 tw32(MBFREE_MODE, MBFREE_MODE_ENABLE); 9294 tw32(MBFREE_MODE, MBFREE_MODE_ENABLE);
@@ -9616,6 +9648,16 @@ static void tg3_periodic_fetch_stats(struct tg3 *tp)
9616 TG3_STAT_ADD32(&sp->tx_ucast_packets, MAC_TX_STATS_UCAST); 9648 TG3_STAT_ADD32(&sp->tx_ucast_packets, MAC_TX_STATS_UCAST);
9617 TG3_STAT_ADD32(&sp->tx_mcast_packets, MAC_TX_STATS_MCAST); 9649 TG3_STAT_ADD32(&sp->tx_mcast_packets, MAC_TX_STATS_MCAST);
9618 TG3_STAT_ADD32(&sp->tx_bcast_packets, MAC_TX_STATS_BCAST); 9650 TG3_STAT_ADD32(&sp->tx_bcast_packets, MAC_TX_STATS_BCAST);
9651 if (unlikely(tg3_flag(tp, 5719_RDMA_BUG) &&
9652 (sp->tx_ucast_packets.low + sp->tx_mcast_packets.low +
9653 sp->tx_bcast_packets.low) > TG3_NUM_RDMA_CHANNELS)) {
9654 u32 val;
9655
9656 val = tr32(TG3_LSO_RD_DMA_CRPTEN_CTRL);
9657 val &= ~TG3_LSO_RD_DMA_TX_LENGTH_WA;
9658 tw32(TG3_LSO_RD_DMA_CRPTEN_CTRL, val);
9659 tg3_flag_clear(tp, 5719_RDMA_BUG);
9660 }
9619 9661
9620 TG3_STAT_ADD32(&sp->rx_octets, MAC_RX_STATS_OCTETS); 9662 TG3_STAT_ADD32(&sp->rx_octets, MAC_RX_STATS_OCTETS);
9621 TG3_STAT_ADD32(&sp->rx_fragments, MAC_RX_STATS_FRAGMENTS); 9663 TG3_STAT_ADD32(&sp->rx_fragments, MAC_RX_STATS_FRAGMENTS);
@@ -12482,10 +12524,12 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
12482{ 12524{
12483 struct tg3 *tp = netdev_priv(dev); 12525 struct tg3 *tp = netdev_priv(dev);
12484 12526
12485 if (!tp->hw_stats) 12527 spin_lock_bh(&tp->lock);
12528 if (!tp->hw_stats) {
12529 spin_unlock_bh(&tp->lock);
12486 return &tp->net_stats_prev; 12530 return &tp->net_stats_prev;
12531 }
12487 12532
12488 spin_lock_bh(&tp->lock);
12489 tg3_get_nstats(tp, stats); 12533 tg3_get_nstats(tp, stats);
12490 spin_unlock_bh(&tp->lock); 12534 spin_unlock_bh(&tp->lock);
12491 12535
@@ -13648,6 +13692,23 @@ static int __devinit tg3_phy_probe(struct tg3 *tp)
13648 tg3_flag_set(tp, PAUSE_AUTONEG); 13692 tg3_flag_set(tp, PAUSE_AUTONEG);
13649 tp->link_config.flowctrl = FLOW_CTRL_TX | FLOW_CTRL_RX; 13693 tp->link_config.flowctrl = FLOW_CTRL_TX | FLOW_CTRL_RX;
13650 13694
13695 if (tg3_flag(tp, ENABLE_APE)) {
13696 switch (tp->pci_fn) {
13697 case 0:
13698 tp->phy_ape_lock = TG3_APE_LOCK_PHY0;
13699 break;
13700 case 1:
13701 tp->phy_ape_lock = TG3_APE_LOCK_PHY1;
13702 break;
13703 case 2:
13704 tp->phy_ape_lock = TG3_APE_LOCK_PHY2;
13705 break;
13706 case 3:
13707 tp->phy_ape_lock = TG3_APE_LOCK_PHY3;
13708 break;
13709 }
13710 }
13711
13651 if (tg3_flag(tp, USE_PHYLIB)) 13712 if (tg3_flag(tp, USE_PHYLIB))
13652 return tg3_phy_init(tp); 13713 return tg3_phy_init(tp);
13653 13714
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index a1b75cd67b9d..6d52cb286826 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -1376,7 +1376,11 @@
1376#define TG3_LSO_RD_DMA_CRPTEN_CTRL 0x00004910 1376#define TG3_LSO_RD_DMA_CRPTEN_CTRL 0x00004910
1377#define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_BD_4K 0x00030000 1377#define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_BD_4K 0x00030000
1378#define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_LSO_4K 0x000c0000 1378#define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_LSO_4K 0x000c0000
1379/* 0x4914 --> 0x4c00 unused */ 1379#define TG3_LSO_RD_DMA_TX_LENGTH_WA 0x02000000
1380/* 0x4914 --> 0x4be0 unused */
1381
1382#define TG3_NUM_RDMA_CHANNELS 4
1383#define TG3_RDMA_LENGTH 0x00004be0
1380 1384
1381/* Write DMA control registers */ 1385/* Write DMA control registers */
1382#define WDMAC_MODE 0x00004c00 1386#define WDMAC_MODE 0x00004c00
@@ -2959,6 +2963,7 @@ enum TG3_FLAGS {
2959 TG3_FLAG_L1PLLPD_EN, 2963 TG3_FLAG_L1PLLPD_EN,
2960 TG3_FLAG_APE_HAS_NCSI, 2964 TG3_FLAG_APE_HAS_NCSI,
2961 TG3_FLAG_4K_FIFO_LIMIT, 2965 TG3_FLAG_4K_FIFO_LIMIT,
2966 TG3_FLAG_5719_RDMA_BUG,
2962 TG3_FLAG_RESET_TASK_PENDING, 2967 TG3_FLAG_RESET_TASK_PENDING,
2963 TG3_FLAG_5705_PLUS, 2968 TG3_FLAG_5705_PLUS,
2964 TG3_FLAG_IS_5788, 2969 TG3_FLAG_IS_5788,
@@ -3107,6 +3112,7 @@ struct tg3 {
3107 int old_link; 3112 int old_link;
3108 3113
3109 u8 phy_addr; 3114 u8 phy_addr;
3115 u8 phy_ape_lock;
3110 3116
3111 /* PHY info */ 3117 /* PHY info */
3112 u32 phy_id; 3118 u32 phy_id;
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 3769f5711cc3..b53a3b60b648 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -4682,6 +4682,7 @@ static int __devinit qlge_probe(struct pci_dev *pdev,
4682 NETIF_F_HW_VLAN_TX | NETIF_F_RXCSUM; 4682 NETIF_F_HW_VLAN_TX | NETIF_F_RXCSUM;
4683 ndev->features = ndev->hw_features | 4683 ndev->features = ndev->hw_features |
4684 NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER; 4684 NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_FILTER;
4685 ndev->vlan_features = ndev->hw_features;
4685 4686
4686 if (test_bit(QL_DMA64, &qdev->flags)) 4687 if (test_bit(QL_DMA64, &qdev->flags))
4687 ndev->features |= NETIF_F_HIGHDMA; 4688 ndev->features |= NETIF_F_HIGHDMA;
diff --git a/drivers/net/ethernet/seeq/seeq8005.c b/drivers/net/ethernet/seeq/seeq8005.c
index 698edbbfc149..d6e50de71186 100644
--- a/drivers/net/ethernet/seeq/seeq8005.c
+++ b/drivers/net/ethernet/seeq/seeq8005.c
@@ -736,9 +736,7 @@ MODULE_PARM_DESC(irq, "SEEQ 8005 IRQ number");
736int __init init_module(void) 736int __init init_module(void)
737{ 737{
738 dev_seeq = seeq8005_probe(-1); 738 dev_seeq = seeq8005_probe(-1);
739 if (IS_ERR(dev_seeq)) 739 return PTR_RET(dev_seeq);
740 return PTR_ERR(dev_seeq);
741 return 0;
742} 740}
743 741
744void __exit cleanup_module(void) 742void __exit cleanup_module(void)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index c62163e272cd..926d4db5cb38 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1379,10 +1379,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
1379 int vnet_hdr_sz; 1379 int vnet_hdr_sz;
1380 int ret; 1380 int ret;
1381 1381
1382 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) 1382 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) {
1383 if (copy_from_user(&ifr, argp, ifreq_len)) 1383 if (copy_from_user(&ifr, argp, ifreq_len))
1384 return -EFAULT; 1384 return -EFAULT;
1385 1385 } else {
1386 memset(&ifr, 0, sizeof(ifr));
1387 }
1386 if (cmd == TUNGETFEATURES) { 1388 if (cmd == TUNGETFEATURES) {
1387 /* Currently this just means: "what IFF flags are valid?". 1389 /* Currently this just means: "what IFF flags are valid?".
1388 * This is needed because we never checked for invalid flags on 1390 * This is needed because we never checked for invalid flags on
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 613cfa401672..83b567fe1941 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -249,4 +249,13 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
249 return flags; 249 return flags;
250} 250}
251 251
252static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
253{
254 struct dst_entry *dst = skb_dst(skb);
255
256 dst_hold(dst);
257 sk->sk_rx_dst = dst;
258 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
259}
260
252#endif /* _INET_SOCK_H */ 261#endif /* _INET_SOCK_H */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e69c3a47153d..926142ed8d7a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -21,6 +21,7 @@
21#include <linux/rcupdate.h> 21#include <linux/rcupdate.h>
22#include <net/fib_rules.h> 22#include <net/fib_rules.h>
23#include <net/inetpeer.h> 23#include <net/inetpeer.h>
24#include <linux/percpu.h>
24 25
25struct fib_config { 26struct fib_config {
26 u8 fc_dst_len; 27 u8 fc_dst_len;
@@ -54,6 +55,7 @@ struct fib_nh_exception {
54 u32 fnhe_pmtu; 55 u32 fnhe_pmtu;
55 __be32 fnhe_gw; 56 __be32 fnhe_gw;
56 unsigned long fnhe_expires; 57 unsigned long fnhe_expires;
58 struct rtable __rcu *fnhe_rth;
57 unsigned long fnhe_stamp; 59 unsigned long fnhe_stamp;
58}; 60};
59 61
@@ -81,8 +83,8 @@ struct fib_nh {
81 __be32 nh_gw; 83 __be32 nh_gw;
82 __be32 nh_saddr; 84 __be32 nh_saddr;
83 int nh_saddr_genid; 85 int nh_saddr_genid;
84 struct rtable *nh_rth_output; 86 struct rtable __rcu * __percpu *nh_pcpu_rth_output;
85 struct rtable *nh_rth_input; 87 struct rtable __rcu *nh_rth_input;
86 struct fnhe_hash_bucket *nh_exceptions; 88 struct fnhe_hash_bucket *nh_exceptions;
87}; 89};
88 90
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 0ffb8e31f3cd..1474dd65c66f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -61,8 +61,6 @@ struct netns_ipv4 {
61 int sysctl_icmp_ratelimit; 61 int sysctl_icmp_ratelimit;
62 int sysctl_icmp_ratemask; 62 int sysctl_icmp_ratemask;
63 int sysctl_icmp_errors_use_inbound_ifaddr; 63 int sysctl_icmp_errors_use_inbound_ifaddr;
64 int sysctl_rt_cache_rebuild_count;
65 int current_rt_cache_rebuild_count;
66 64
67 unsigned int sysctl_ping_group_range[2]; 65 unsigned int sysctl_ping_group_range[2];
68 long sysctl_tcp_mem[3]; 66 long sysctl_tcp_mem[3];
diff --git a/include/net/route.h b/include/net/route.h
index 8c52bc6f1c90..776a27f1ab78 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -57,6 +57,8 @@ struct rtable {
57 57
58 /* Miscellaneous cached information */ 58 /* Miscellaneous cached information */
59 u32 rt_pmtu; 59 u32 rt_pmtu;
60
61 struct list_head rt_uncached;
60}; 62};
61 63
62static inline bool rt_is_input_route(const struct rtable *rt) 64static inline bool rt_is_input_route(const struct rtable *rt)
@@ -107,6 +109,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;
107struct in_device; 109struct in_device;
108extern int ip_rt_init(void); 110extern int ip_rt_init(void);
109extern void rt_cache_flush(struct net *net, int how); 111extern void rt_cache_flush(struct net *net, int how);
112extern void rt_flush_dev(struct net_device *dev);
110extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); 113extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
111extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, 114extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
112 struct sock *sk); 115 struct sock *sk);
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 6229b62749e8..13b36bdc76a7 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -27,7 +27,7 @@ struct brport_attribute {
27}; 27};
28 28
29#define BRPORT_ATTR(_name,_mode,_show,_store) \ 29#define BRPORT_ATTR(_name,_mode,_show,_store) \
30struct brport_attribute brport_attr_##_name = { \ 30const struct brport_attribute brport_attr_##_name = { \
31 .attr = {.name = __stringify(_name), \ 31 .attr = {.name = __stringify(_name), \
32 .mode = _mode }, \ 32 .mode = _mode }, \
33 .show = _show, \ 33 .show = _show, \
@@ -164,7 +164,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
164 store_multicast_router); 164 store_multicast_router);
165#endif 165#endif
166 166
167static struct brport_attribute *brport_attrs[] = { 167static const struct brport_attribute *brport_attrs[] = {
168 &brport_attr_path_cost, 168 &brport_attr_path_cost,
169 &brport_attr_priority, 169 &brport_attr_priority,
170 &brport_attr_port_id, 170 &brport_attr_port_id,
@@ -241,7 +241,7 @@ const struct sysfs_ops brport_sysfs_ops = {
241int br_sysfs_addif(struct net_bridge_port *p) 241int br_sysfs_addif(struct net_bridge_port *p)
242{ 242{
243 struct net_bridge *br = p->br; 243 struct net_bridge *br = p->br;
244 struct brport_attribute **a; 244 const struct brport_attribute **a;
245 int err; 245 int err;
246 246
247 err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, 247 err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc9e380f0abf..5ff949dc954f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -625,9 +625,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
625 .rta_id = id, 625 .rta_id = id,
626 }; 626 };
627 627
628 if (expires) 628 if (expires) {
629 ci.rta_expires = jiffies_to_clock_t(expires); 629 unsigned long clock;
630 630
631 clock = jiffies_to_clock_t(abs(expires));
632 clock = min_t(unsigned long, clock, INT_MAX);
633 ci.rta_expires = (expires > 0) ? clock : -clock;
634 }
631 return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); 635 return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
632} 636}
633EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); 637EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8732cc7920ed..c43ae3fba792 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
1046 1046
1047 if (event == NETDEV_UNREGISTER) { 1047 if (event == NETDEV_UNREGISTER) {
1048 fib_disable_ip(dev, 2, -1); 1048 fib_disable_ip(dev, 2, -1);
1049 rt_flush_dev(dev);
1049 return NOTIFY_DONE; 1050 return NOTIFY_DONE;
1050 } 1051 }
1051 1052
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da0cc2e6b250..da80dc14cc76 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
140 }, 140 },
141}; 141};
142 142
143static void rt_fibinfo_free(struct rtable __rcu **rtp)
144{
145 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
146
147 if (!rt)
148 return;
149
150 /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
151 * because we waited an RCU grace period before calling
152 * free_fib_info_rcu()
153 */
154
155 dst_free(&rt->dst);
156}
157
143static void free_nh_exceptions(struct fib_nh *nh) 158static void free_nh_exceptions(struct fib_nh *nh)
144{ 159{
145 struct fnhe_hash_bucket *hash = nh->nh_exceptions; 160 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh)
153 struct fib_nh_exception *next; 168 struct fib_nh_exception *next;
154 169
155 next = rcu_dereference_protected(fnhe->fnhe_next, 1); 170 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
171
172 rt_fibinfo_free(&fnhe->fnhe_rth);
173
156 kfree(fnhe); 174 kfree(fnhe);
157 175
158 fnhe = next; 176 fnhe = next;
@@ -161,6 +179,23 @@ static void free_nh_exceptions(struct fib_nh *nh)
161 kfree(hash); 179 kfree(hash);
162} 180}
163 181
182static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
183{
184 int cpu;
185
186 if (!rtp)
187 return;
188
189 for_each_possible_cpu(cpu) {
190 struct rtable *rt;
191
192 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
193 if (rt)
194 dst_free(&rt->dst);
195 }
196 free_percpu(rtp);
197}
198
164/* Release a nexthop info record */ 199/* Release a nexthop info record */
165static void free_fib_info_rcu(struct rcu_head *head) 200static void free_fib_info_rcu(struct rcu_head *head)
166{ 201{
@@ -171,10 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
171 dev_put(nexthop_nh->nh_dev); 206 dev_put(nexthop_nh->nh_dev);
172 if (nexthop_nh->nh_exceptions) 207 if (nexthop_nh->nh_exceptions)
173 free_nh_exceptions(nexthop_nh); 208 free_nh_exceptions(nexthop_nh);
174 if (nexthop_nh->nh_rth_output) 209 rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
175 dst_free(&nexthop_nh->nh_rth_output->dst); 210 rt_fibinfo_free(&nexthop_nh->nh_rth_input);
176 if (nexthop_nh->nh_rth_input)
177 dst_free(&nexthop_nh->nh_rth_input->dst);
178 } endfor_nexthops(fi); 211 } endfor_nexthops(fi);
179 212
180 release_net(fi->fib_net); 213 release_net(fi->fib_net);
@@ -804,6 +837,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
804 fi->fib_nhs = nhs; 837 fi->fib_nhs = nhs;
805 change_nexthops(fi) { 838 change_nexthops(fi) {
806 nexthop_nh->nh_parent = fi; 839 nexthop_nh->nh_parent = fi;
840 nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
807 } endfor_nexthops(fi) 841 } endfor_nexthops(fi)
808 842
809 if (cfg->fc_mx) { 843 if (cfg->fc_mx) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 18cbc15b20d5..f0cdb30921c0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -159,7 +159,6 @@ struct trie {
159#endif 159#endif
160}; 160};
161 161
162static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
163static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, 162static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
164 int wasfull); 163 int wasfull);
165static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); 164static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
@@ -473,7 +472,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
473 } 472 }
474 473
475 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), 474 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
476 sizeof(struct rt_trie_node) << bits); 475 sizeof(struct rt_trie_node *) << bits);
477 return tn; 476 return tn;
478} 477}
479 478
@@ -490,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *
490 return ((struct tnode *) n)->pos == tn->pos + tn->bits; 489 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
491} 490}
492 491
493static inline void put_child(struct trie *t, struct tnode *tn, int i, 492static inline void put_child(struct tnode *tn, int i,
494 struct rt_trie_node *n) 493 struct rt_trie_node *n)
495{ 494{
496 tnode_put_child_reorg(tn, i, n, -1); 495 tnode_put_child_reorg(tn, i, n, -1);
@@ -754,8 +753,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
754 goto nomem; 753 goto nomem;
755 } 754 }
756 755
757 put_child(t, tn, 2*i, (struct rt_trie_node *) left); 756 put_child(tn, 2*i, (struct rt_trie_node *) left);
758 put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); 757 put_child(tn, 2*i+1, (struct rt_trie_node *) right);
759 } 758 }
760 } 759 }
761 760
@@ -776,9 +775,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
776 if (tkey_extract_bits(node->key, 775 if (tkey_extract_bits(node->key,
777 oldtnode->pos + oldtnode->bits, 776 oldtnode->pos + oldtnode->bits,
778 1) == 0) 777 1) == 0)
779 put_child(t, tn, 2*i, node); 778 put_child(tn, 2*i, node);
780 else 779 else
781 put_child(t, tn, 2*i+1, node); 780 put_child(tn, 2*i+1, node);
782 continue; 781 continue;
783 } 782 }
784 783
@@ -786,8 +785,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
786 inode = (struct tnode *) node; 785 inode = (struct tnode *) node;
787 786
788 if (inode->bits == 1) { 787 if (inode->bits == 1) {
789 put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); 788 put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
790 put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); 789 put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
791 790
792 tnode_free_safe(inode); 791 tnode_free_safe(inode);
793 continue; 792 continue;
@@ -817,22 +816,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
817 */ 816 */
818 817
819 left = (struct tnode *) tnode_get_child(tn, 2*i); 818 left = (struct tnode *) tnode_get_child(tn, 2*i);
820 put_child(t, tn, 2*i, NULL); 819 put_child(tn, 2*i, NULL);
821 820
822 BUG_ON(!left); 821 BUG_ON(!left);
823 822
824 right = (struct tnode *) tnode_get_child(tn, 2*i+1); 823 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
825 put_child(t, tn, 2*i+1, NULL); 824 put_child(tn, 2*i+1, NULL);
826 825
827 BUG_ON(!right); 826 BUG_ON(!right);
828 827
829 size = tnode_child_length(left); 828 size = tnode_child_length(left);
830 for (j = 0; j < size; j++) { 829 for (j = 0; j < size; j++) {
831 put_child(t, left, j, rtnl_dereference(inode->child[j])); 830 put_child(left, j, rtnl_dereference(inode->child[j]));
832 put_child(t, right, j, rtnl_dereference(inode->child[j + size])); 831 put_child(right, j, rtnl_dereference(inode->child[j + size]));
833 } 832 }
834 put_child(t, tn, 2*i, resize(t, left)); 833 put_child(tn, 2*i, resize(t, left));
835 put_child(t, tn, 2*i+1, resize(t, right)); 834 put_child(tn, 2*i+1, resize(t, right));
836 835
837 tnode_free_safe(inode); 836 tnode_free_safe(inode);
838 } 837 }
@@ -877,7 +876,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
877 if (!newn) 876 if (!newn)
878 goto nomem; 877 goto nomem;
879 878
880 put_child(t, tn, i/2, (struct rt_trie_node *)newn); 879 put_child(tn, i/2, (struct rt_trie_node *)newn);
881 } 880 }
882 881
883 } 882 }
@@ -892,21 +891,21 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
892 if (left == NULL) { 891 if (left == NULL) {
893 if (right == NULL) /* Both are empty */ 892 if (right == NULL) /* Both are empty */
894 continue; 893 continue;
895 put_child(t, tn, i/2, right); 894 put_child(tn, i/2, right);
896 continue; 895 continue;
897 } 896 }
898 897
899 if (right == NULL) { 898 if (right == NULL) {
900 put_child(t, tn, i/2, left); 899 put_child(tn, i/2, left);
901 continue; 900 continue;
902 } 901 }
903 902
904 /* Two nonempty children */ 903 /* Two nonempty children */
905 newBinNode = (struct tnode *) tnode_get_child(tn, i/2); 904 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
906 put_child(t, tn, i/2, NULL); 905 put_child(tn, i/2, NULL);
907 put_child(t, newBinNode, 0, left); 906 put_child(newBinNode, 0, left);
908 put_child(t, newBinNode, 1, right); 907 put_child(newBinNode, 1, right);
909 put_child(t, tn, i/2, resize(t, newBinNode)); 908 put_child(tn, i/2, resize(t, newBinNode));
910 } 909 }
911 tnode_free_safe(oldtnode); 910 tnode_free_safe(oldtnode);
912 return tn; 911 return tn;
@@ -1125,7 +1124,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1125 node_set_parent((struct rt_trie_node *)l, tp); 1124 node_set_parent((struct rt_trie_node *)l, tp);
1126 1125
1127 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1126 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1128 put_child(t, tp, cindex, (struct rt_trie_node *)l); 1127 put_child(tp, cindex, (struct rt_trie_node *)l);
1129 } else { 1128 } else {
1130 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 1129 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1131 /* 1130 /*
@@ -1155,12 +1154,12 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1155 node_set_parent((struct rt_trie_node *)tn, tp); 1154 node_set_parent((struct rt_trie_node *)tn, tp);
1156 1155
1157 missbit = tkey_extract_bits(key, newpos, 1); 1156 missbit = tkey_extract_bits(key, newpos, 1);
1158 put_child(t, tn, missbit, (struct rt_trie_node *)l); 1157 put_child(tn, missbit, (struct rt_trie_node *)l);
1159 put_child(t, tn, 1-missbit, n); 1158 put_child(tn, 1-missbit, n);
1160 1159
1161 if (tp) { 1160 if (tp) {
1162 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1161 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1163 put_child(t, tp, cindex, (struct rt_trie_node *)tn); 1162 put_child(tp, cindex, (struct rt_trie_node *)tn);
1164 } else { 1163 } else {
1165 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 1164 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1166 tp = tn; 1165 tp = tn;
@@ -1619,7 +1618,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
1619 1618
1620 if (tp) { 1619 if (tp) {
1621 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); 1620 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
1622 put_child(t, tp, cindex, NULL); 1621 put_child(tp, cindex, NULL);
1623 trie_rebalance(t, tp); 1622 trie_rebalance(t, tp);
1624 } else 1623 } else
1625 RCU_INIT_POINTER(t->trie, NULL); 1624 RCU_INIT_POINTER(t->trie, NULL);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 981ff1eef28c..f1395a6fb35f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -325,14 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb)
325 const struct net_protocol *ipprot; 325 const struct net_protocol *ipprot;
326 int protocol = iph->protocol; 326 int protocol = iph->protocol;
327 327
328 rcu_read_lock();
329 ipprot = rcu_dereference(inet_protos[protocol]); 328 ipprot = rcu_dereference(inet_protos[protocol]);
330 if (ipprot && ipprot->early_demux) { 329 if (ipprot && ipprot->early_demux) {
331 ipprot->early_demux(skb); 330 ipprot->early_demux(skb);
332 /* must reload iph, skb->head might have changed */ 331 /* must reload iph, skb->head might have changed */
333 iph = ip_hdr(skb); 332 iph = ip_hdr(skb);
334 } 333 }
335 rcu_read_unlock();
336 } 334 }
337 335
338 /* 336 /*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fc1a81ca79a7..c035251beb07 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
147 struct sk_buff *skb, u32 mtu); 147 struct sk_buff *skb, u32 mtu);
148static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, 148static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
149 struct sk_buff *skb); 149 struct sk_buff *skb);
150static void ipv4_dst_destroy(struct dst_entry *dst);
150 151
151static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 152static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
152 int how) 153 int how)
@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = {
170 .default_advmss = ipv4_default_advmss, 171 .default_advmss = ipv4_default_advmss,
171 .mtu = ipv4_mtu, 172 .mtu = ipv4_mtu,
172 .cow_metrics = ipv4_cow_metrics, 173 .cow_metrics = ipv4_cow_metrics,
174 .destroy = ipv4_dst_destroy,
173 .ifdown = ipv4_dst_ifdown, 175 .ifdown = ipv4_dst_ifdown,
174 .negative_advice = ipv4_negative_advice, 176 .negative_advice = ipv4_negative_advice,
175 .link_failure = ipv4_link_failure, 177 .link_failure = ipv4_link_failure,
@@ -587,11 +589,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
587 build_sk_flow_key(fl4, sk); 589 build_sk_flow_key(fl4, sk);
588} 590}
589 591
590static DEFINE_SEQLOCK(fnhe_seqlock); 592static inline void rt_free(struct rtable *rt)
593{
594 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
595}
596
597static DEFINE_SPINLOCK(fnhe_lock);
591 598
592static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) 599static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
593{ 600{
594 struct fib_nh_exception *fnhe, *oldest; 601 struct fib_nh_exception *fnhe, *oldest;
602 struct rtable *orig;
595 603
596 oldest = rcu_dereference(hash->chain); 604 oldest = rcu_dereference(hash->chain);
597 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; 605 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +607,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
599 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) 607 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
600 oldest = fnhe; 608 oldest = fnhe;
601 } 609 }
610 orig = rcu_dereference(oldest->fnhe_rth);
611 if (orig) {
612 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
613 rt_free(orig);
614 }
602 return oldest; 615 return oldest;
603} 616}
604 617
@@ -620,7 +633,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
620 int depth; 633 int depth;
621 u32 hval = fnhe_hashfun(daddr); 634 u32 hval = fnhe_hashfun(daddr);
622 635
623 write_seqlock_bh(&fnhe_seqlock); 636 spin_lock_bh(&fnhe_lock);
624 637
625 hash = nh->nh_exceptions; 638 hash = nh->nh_exceptions;
626 if (!hash) { 639 if (!hash) {
@@ -667,7 +680,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
667 fnhe->fnhe_stamp = jiffies; 680 fnhe->fnhe_stamp = jiffies;
668 681
669out_unlock: 682out_unlock:
670 write_sequnlock_bh(&fnhe_seqlock); 683 spin_unlock_bh(&fnhe_lock);
671 return; 684 return;
672} 685}
673 686
@@ -1164,53 +1177,62 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1164 return NULL; 1177 return NULL;
1165} 1178}
1166 1179
1167static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, 1180static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1168 __be32 daddr) 1181 __be32 daddr)
1169{ 1182{
1170 __be32 fnhe_daddr, gw; 1183 bool ret = false;
1171 unsigned long expires; 1184
1172 unsigned int seq; 1185 spin_lock_bh(&fnhe_lock);
1173 u32 pmtu;
1174
1175restart:
1176 seq = read_seqbegin(&fnhe_seqlock);
1177 fnhe_daddr = fnhe->fnhe_daddr;
1178 gw = fnhe->fnhe_gw;
1179 pmtu = fnhe->fnhe_pmtu;
1180 expires = fnhe->fnhe_expires;
1181 if (read_seqretry(&fnhe_seqlock, seq))
1182 goto restart;
1183
1184 if (daddr != fnhe_daddr)
1185 return;
1186 1186
1187 if (pmtu) { 1187 if (daddr == fnhe->fnhe_daddr) {
1188 unsigned long diff = expires - jiffies; 1188 struct rtable *orig;
1189 1189
1190 if (time_before(jiffies, expires)) { 1190 if (fnhe->fnhe_pmtu) {
1191 rt->rt_pmtu = pmtu; 1191 unsigned long expires = fnhe->fnhe_expires;
1192 dst_set_expires(&rt->dst, diff); 1192 unsigned long diff = expires - jiffies;
1193
1194 if (time_before(jiffies, expires)) {
1195 rt->rt_pmtu = fnhe->fnhe_pmtu;
1196 dst_set_expires(&rt->dst, diff);
1197 }
1193 } 1198 }
1199 if (fnhe->fnhe_gw) {
1200 rt->rt_flags |= RTCF_REDIRECTED;
1201 rt->rt_gateway = fnhe->fnhe_gw;
1202 }
1203
1204 orig = rcu_dereference(fnhe->fnhe_rth);
1205 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1206 if (orig)
1207 rt_free(orig);
1208
1209 fnhe->fnhe_stamp = jiffies;
1210 ret = true;
1211 } else {
1212 /* Routes we intend to cache in nexthop exception have
1213 * the DST_NOCACHE bit clear. However, if we are
1214 * unsuccessful at storing this route into the cache
1215 * we really need to set it.
1216 */
1217 rt->dst.flags |= DST_NOCACHE;
1194 } 1218 }
1195 if (gw) { 1219 spin_unlock_bh(&fnhe_lock);
1196 rt->rt_flags |= RTCF_REDIRECTED;
1197 rt->rt_gateway = gw;
1198 }
1199 fnhe->fnhe_stamp = jiffies;
1200}
1201 1220
1202static inline void rt_free(struct rtable *rt) 1221 return ret;
1203{
1204 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
1205} 1222}
1206 1223
1207static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) 1224static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1208{ 1225{
1209 struct rtable *orig, *prev, **p = &nh->nh_rth_output; 1226 struct rtable *orig, *prev, **p;
1210 1227 bool ret = true;
1211 if (rt_is_input_route(rt))
1212 p = &nh->nh_rth_input;
1213 1228
1229 if (rt_is_input_route(rt)) {
1230 p = (struct rtable **)&nh->nh_rth_input;
1231 } else {
1232 if (!nh->nh_pcpu_rth_output)
1233 goto nocache;
1234 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1235 }
1214 orig = *p; 1236 orig = *p;
1215 1237
1216 prev = cmpxchg(p, orig, rt); 1238 prev = cmpxchg(p, orig, rt);
@@ -1223,7 +1245,50 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1223 * unsuccessful at storing this route into the cache 1245 * unsuccessful at storing this route into the cache
1224 * we really need to set it. 1246 * we really need to set it.
1225 */ 1247 */
1248nocache:
1226 rt->dst.flags |= DST_NOCACHE; 1249 rt->dst.flags |= DST_NOCACHE;
1250 ret = false;
1251 }
1252
1253 return ret;
1254}
1255
1256static DEFINE_SPINLOCK(rt_uncached_lock);
1257static LIST_HEAD(rt_uncached_list);
1258
1259static void rt_add_uncached_list(struct rtable *rt)
1260{
1261 spin_lock_bh(&rt_uncached_lock);
1262 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1263 spin_unlock_bh(&rt_uncached_lock);
1264}
1265
1266static void ipv4_dst_destroy(struct dst_entry *dst)
1267{
1268 struct rtable *rt = (struct rtable *) dst;
1269
1270 if (dst->flags & DST_NOCACHE) {
1271 spin_lock_bh(&rt_uncached_lock);
1272 list_del(&rt->rt_uncached);
1273 spin_unlock_bh(&rt_uncached_lock);
1274 }
1275}
1276
1277void rt_flush_dev(struct net_device *dev)
1278{
1279 if (!list_empty(&rt_uncached_list)) {
1280 struct net *net = dev_net(dev);
1281 struct rtable *rt;
1282
1283 spin_lock_bh(&rt_uncached_lock);
1284 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1285 if (rt->dst.dev != dev)
1286 continue;
1287 rt->dst.dev = net->loopback_dev;
1288 dev_hold(rt->dst.dev);
1289 dev_put(dev);
1290 }
1291 spin_unlock_bh(&rt_uncached_lock);
1227 } 1292 }
1228} 1293}
1229 1294
@@ -1239,20 +1304,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1239 struct fib_nh_exception *fnhe, 1304 struct fib_nh_exception *fnhe,
1240 struct fib_info *fi, u16 type, u32 itag) 1305 struct fib_info *fi, u16 type, u32 itag)
1241{ 1306{
1307 bool cached = false;
1308
1242 if (fi) { 1309 if (fi) {
1243 struct fib_nh *nh = &FIB_RES_NH(*res); 1310 struct fib_nh *nh = &FIB_RES_NH(*res);
1244 1311
1245 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) 1312 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1246 rt->rt_gateway = nh->nh_gw; 1313 rt->rt_gateway = nh->nh_gw;
1247 if (unlikely(fnhe))
1248 rt_bind_exception(rt, fnhe, daddr);
1249 dst_init_metrics(&rt->dst, fi->fib_metrics, true); 1314 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1250#ifdef CONFIG_IP_ROUTE_CLASSID 1315#ifdef CONFIG_IP_ROUTE_CLASSID
1251 rt->dst.tclassid = nh->nh_tclassid; 1316 rt->dst.tclassid = nh->nh_tclassid;
1252#endif 1317#endif
1253 if (!(rt->dst.flags & DST_NOCACHE)) 1318 if (unlikely(fnhe))
1254 rt_cache_route(nh, rt); 1319 cached = rt_bind_exception(rt, fnhe, daddr);
1320 else if (!(rt->dst.flags & DST_NOCACHE))
1321 cached = rt_cache_route(nh, rt);
1255 } 1322 }
1323 if (unlikely(!cached))
1324 rt_add_uncached_list(rt);
1256 1325
1257#ifdef CONFIG_IP_ROUTE_CLASSID 1326#ifdef CONFIG_IP_ROUTE_CLASSID
1258#ifdef CONFIG_IP_MULTIPLE_TABLES 1327#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1319,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1319 rth->rt_iif = 0; 1388 rth->rt_iif = 0;
1320 rth->rt_pmtu = 0; 1389 rth->rt_pmtu = 0;
1321 rth->rt_gateway = 0; 1390 rth->rt_gateway = 0;
1391 INIT_LIST_HEAD(&rth->rt_uncached);
1322 if (our) { 1392 if (our) {
1323 rth->dst.input= ip_local_deliver; 1393 rth->dst.input= ip_local_deliver;
1324 rth->rt_flags |= RTCF_LOCAL; 1394 rth->rt_flags |= RTCF_LOCAL;
@@ -1420,7 +1490,7 @@ static int __mkroute_input(struct sk_buff *skb,
1420 do_cache = false; 1490 do_cache = false;
1421 if (res->fi) { 1491 if (res->fi) {
1422 if (!itag) { 1492 if (!itag) {
1423 rth = FIB_RES_NH(*res).nh_rth_input; 1493 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1424 if (rt_cache_valid(rth)) { 1494 if (rt_cache_valid(rth)) {
1425 skb_dst_set_noref(skb, &rth->dst); 1495 skb_dst_set_noref(skb, &rth->dst);
1426 goto out; 1496 goto out;
@@ -1444,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb,
1444 rth->rt_iif = 0; 1514 rth->rt_iif = 0;
1445 rth->rt_pmtu = 0; 1515 rth->rt_pmtu = 0;
1446 rth->rt_gateway = 0; 1516 rth->rt_gateway = 0;
1517 INIT_LIST_HEAD(&rth->rt_uncached);
1447 1518
1448 rth->dst.input = ip_forward; 1519 rth->dst.input = ip_forward;
1449 rth->dst.output = ip_output; 1520 rth->dst.output = ip_output;
@@ -1582,7 +1653,7 @@ local_input:
1582 do_cache = false; 1653 do_cache = false;
1583 if (res.fi) { 1654 if (res.fi) {
1584 if (!itag) { 1655 if (!itag) {
1585 rth = FIB_RES_NH(res).nh_rth_input; 1656 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
1586 if (rt_cache_valid(rth)) { 1657 if (rt_cache_valid(rth)) {
1587 skb_dst_set_noref(skb, &rth->dst); 1658 skb_dst_set_noref(skb, &rth->dst);
1588 err = 0; 1659 err = 0;
@@ -1610,6 +1681,7 @@ local_input:
1610 rth->rt_iif = 0; 1681 rth->rt_iif = 0;
1611 rth->rt_pmtu = 0; 1682 rth->rt_pmtu = 0;
1612 rth->rt_gateway = 0; 1683 rth->rt_gateway = 0;
1684 INIT_LIST_HEAD(&rth->rt_uncached);
1613 if (res.type == RTN_UNREACHABLE) { 1685 if (res.type == RTN_UNREACHABLE) {
1614 rth->dst.input= ip_error; 1686 rth->dst.input= ip_error;
1615 rth->dst.error= -err; 1687 rth->dst.error= -err;
@@ -1748,19 +1820,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1748 1820
1749 fnhe = NULL; 1821 fnhe = NULL;
1750 if (fi) { 1822 if (fi) {
1823 struct rtable __rcu **prth;
1824
1751 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); 1825 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1752 if (!fnhe) { 1826 if (fnhe)
1753 rth = FIB_RES_NH(*res).nh_rth_output; 1827 prth = &fnhe->fnhe_rth;
1754 if (rt_cache_valid(rth)) { 1828 else
1755 dst_hold(&rth->dst); 1829 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
1756 return rth; 1830 rth = rcu_dereference(*prth);
1757 } 1831 if (rt_cache_valid(rth)) {
1832 dst_hold(&rth->dst);
1833 return rth;
1758 } 1834 }
1759 } 1835 }
1760 rth = rt_dst_alloc(dev_out, 1836 rth = rt_dst_alloc(dev_out,
1761 IN_DEV_CONF_GET(in_dev, NOPOLICY), 1837 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1762 IN_DEV_CONF_GET(in_dev, NOXFRM), 1838 IN_DEV_CONF_GET(in_dev, NOXFRM),
1763 fi && !fnhe); 1839 fi);
1764 if (!rth) 1840 if (!rth)
1765 return ERR_PTR(-ENOBUFS); 1841 return ERR_PTR(-ENOBUFS);
1766 1842
@@ -1773,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1773 rth->rt_iif = orig_oif ? : 0; 1849 rth->rt_iif = orig_oif ? : 0;
1774 rth->rt_pmtu = 0; 1850 rth->rt_pmtu = 0;
1775 rth->rt_gateway = 0; 1851 rth->rt_gateway = 0;
1852 INIT_LIST_HEAD(&rth->rt_uncached);
1776 1853
1777 RT_CACHE_STAT_INC(out_slow_tot); 1854 RT_CACHE_STAT_INC(out_slow_tot);
1778 1855
@@ -2052,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2052 rt->rt_type = ort->rt_type; 2129 rt->rt_type = ort->rt_type;
2053 rt->rt_gateway = ort->rt_gateway; 2130 rt->rt_gateway = ort->rt_gateway;
2054 2131
2132 INIT_LIST_HEAD(&rt->rt_uncached);
2133
2055 dst_free(new); 2134 dst_free(new);
2056 } 2135 }
2057 2136
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5840c3255721..4b6487a68279 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -784,13 +784,6 @@ static struct ctl_table ipv4_net_table[] = {
784 .proc_handler = proc_dointvec 784 .proc_handler = proc_dointvec
785 }, 785 },
786 { 786 {
787 .procname = "rt_cache_rebuild_count",
788 .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count,
789 .maxlen = sizeof(int),
790 .mode = 0644,
791 .proc_handler = proc_dointvec
792 },
793 {
794 .procname = "ping_group_range", 787 .procname = "ping_group_range",
795 .data = &init_net.ipv4.sysctl_ping_group_range, 788 .data = &init_net.ipv4.sysctl_ping_group_range,
796 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range), 789 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
@@ -829,8 +822,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
829 table[5].data = 822 table[5].data =
830 &net->ipv4.sysctl_icmp_ratemask; 823 &net->ipv4.sysctl_icmp_ratemask;
831 table[6].data = 824 table[6].data =
832 &net->ipv4.sysctl_rt_cache_rebuild_count;
833 table[7].data =
834 &net->ipv4.sysctl_ping_group_range; 825 &net->ipv4.sysctl_ping_group_range;
835 826
836 } 827 }
@@ -842,8 +833,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
842 net->ipv4.sysctl_ping_group_range[0] = 1; 833 net->ipv4.sysctl_ping_group_range[0] = 1;
843 net->ipv4.sysctl_ping_group_range[1] = 0; 834 net->ipv4.sysctl_ping_group_range[1] = 0;
844 835
845 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
846
847 tcp_init_mem(net); 836 tcp_init_mem(net);
848 837
849 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); 838 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a356e1fecf9a..9be30b039ae3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5604,8 +5604,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5604 tcp_set_state(sk, TCP_ESTABLISHED); 5604 tcp_set_state(sk, TCP_ESTABLISHED);
5605 5605
5606 if (skb != NULL) { 5606 if (skb != NULL) {
5607 sk->sk_rx_dst = dst_clone(skb_dst(skb)); 5607 inet_sk_rx_dst_set(sk, skb);
5608 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
5609 security_inet_conn_established(sk, skb); 5608 security_inet_conn_established(sk, skb);
5610 } 5609 }
5611 5610
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2fbd9921253f..7f91e5ac8277 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1617,19 +1617,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1617#endif 1617#endif
1618 1618
1619 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1619 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1620 struct dst_entry *dst = sk->sk_rx_dst;
1621
1620 sock_rps_save_rxhash(sk, skb); 1622 sock_rps_save_rxhash(sk, skb);
1621 if (sk->sk_rx_dst) { 1623 if (dst) {
1622 struct dst_entry *dst = sk->sk_rx_dst;
1623 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 1624 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1624 dst->ops->check(dst, 0) == NULL) { 1625 dst->ops->check(dst, 0) == NULL) {
1625 dst_release(dst); 1626 dst_release(dst);
1626 sk->sk_rx_dst = NULL; 1627 sk->sk_rx_dst = NULL;
1627 } 1628 }
1628 } 1629 }
1629 if (unlikely(sk->sk_rx_dst == NULL)) { 1630 if (unlikely(sk->sk_rx_dst == NULL))
1630 sk->sk_rx_dst = dst_clone(skb_dst(skb)); 1631 inet_sk_rx_dst_set(sk, skb);
1631 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 1632
1632 }
1633 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1633 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1634 rsk = sk; 1634 rsk = sk;
1635 goto reset; 1635 goto reset;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 3f1cc2028edd..232a90c3ec86 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
387 struct tcp_sock *oldtp = tcp_sk(sk); 387 struct tcp_sock *oldtp = tcp_sk(sk);
388 struct tcp_cookie_values *oldcvp = oldtp->cookie_values; 388 struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
389 389
390 newsk->sk_rx_dst = dst_clone(skb_dst(skb)); 390 inet_sk_rx_dst_set(newsk, skb);
391 inet_sk(newsk)->rx_dst_ifindex = skb->skb_iif;
392 391
393 /* TCP Cookie Transactions require space for the cookie pair, 392 /* TCP Cookie Transactions require space for the cookie pair,
394 * as it differs for each connection. There is no need to 393 * as it differs for each connection. There is no need to
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c6281847f16a..681ea2f413e2 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
92 xdst->u.rt.rt_type = rt->rt_type; 92 xdst->u.rt.rt_type = rt->rt_type;
93 xdst->u.rt.rt_gateway = rt->rt_gateway; 93 xdst->u.rt.rt_gateway = rt->rt_gateway;
94 xdst->u.rt.rt_pmtu = rt->rt_pmtu; 94 xdst->u.rt.rt_pmtu = rt->rt_pmtu;
95 INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
95 96
96 return 0; 97 return 0;
97} 98}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 47975e363fcd..a52d864d562b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -52,11 +52,9 @@ int ip6_rcv_finish(struct sk_buff *skb)
52 if (sysctl_ip_early_demux && !skb_dst(skb)) { 52 if (sysctl_ip_early_demux && !skb_dst(skb)) {
53 const struct inet6_protocol *ipprot; 53 const struct inet6_protocol *ipprot;
54 54
55 rcu_read_lock();
56 ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); 55 ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
57 if (ipprot && ipprot->early_demux) 56 if (ipprot && ipprot->early_demux)
58 ipprot->early_demux(skb); 57 ipprot->early_demux(skb);
59 rcu_read_unlock();
60 } 58 }
61 if (!skb_dst(skb)) 59 if (!skb_dst(skb))
62 ip6_route_input(skb); 60 ip6_route_input(skb);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cf02cb97bbdd..8e80fd279100 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2480,12 +2480,8 @@ static int rt6_fill_node(struct net *net,
2480 goto nla_put_failure; 2480 goto nla_put_failure;
2481 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 2481 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2482 goto nla_put_failure; 2482 goto nla_put_failure;
2483 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2483
2484 expires = 0; 2484 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2485 else if (rt->dst.expires - jiffies < INT_MAX)
2486 expires = rt->dst.expires - jiffies;
2487 else
2488 expires = INT_MAX;
2489 2485
2490 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 2486 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2491 goto nla_put_failure; 2487 goto nla_put_failure;