aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-09-26 19:08:27 -0400
committerDavid S. Miller <davem@davemloft.net>2015-09-26 19:08:27 -0400
commit4963ed48f2c20196d51a447ee87dc2815584fee4 (patch)
treea1902f466dafa00453889a4f1e66b00249ce0529 /net
parent4d54d86546f62c7c4a0fe3b36a64c5e3b98ce1a9 (diff)
parent518a7cb6980cd640c7f979d29021ad870f60d7d7 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts: net/ipv4/arp.c The net/ipv4/arp.c conflict was one commit adding a new local variable while another commit was deleting one. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/atm/clip.c3
-rw-r--r--net/bluetooth/smp.c12
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/ceph/ceph_common.c1
-rw-r--r--net/ceph/crypto.c4
-rw-r--r--net/ceph/messenger.c83
-rw-r--r--net/ceph/mon_client.c37
-rw-r--r--net/ceph/osd_client.c51
-rw-r--r--net/ceph/osdmap.c2
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/fib_rules.c14
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/net-sysfs.c9
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/rtnetlink.c26
-rw-r--r--net/core/sock.c12
-rw-r--r--net/dccp/ackvec.c12
-rw-r--r--net/dccp/ccid.c3
-rw-r--r--net/dccp/minisocks.c4
-rw-r--r--net/dsa/dsa.c41
-rw-r--r--net/dsa/tag_trailer.c2
-rw-r--r--net/ipv4/arp.c39
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/inet_timewait_sock.c16
-rw-r--r--net/ipv4/ip_tunnel_core.c54
-rw-r--r--net/ipv4/route.c6
-rw-r--r--net/ipv4/tcp_cubic.c10
-rw-r--r--net/ipv4/tcp_minisocks.c13
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/xfrm4_policy.c2
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/ipv6/ip6_fib.c26
-rw-r--r--net/ipv6/ip6_gre.c93
-rw-r--r--net/ipv6/ip6_output.c14
-rw-r--r--net/ipv6/ip6_tunnel.c147
-rw-r--r--net/ipv6/route.c16
-rw-r--r--net/mac80211/cfg.c13
-rw-r--r--net/netfilter/nf_log.c9
-rw-r--r--net/netfilter/nft_compat.c24
-rw-r--r--net/netlink/af_netlink.c63
-rw-r--r--net/netlink/af_netlink.h10
-rw-r--r--net/openvswitch/Kconfig3
-rw-r--r--net/openvswitch/conntrack.c8
-rw-r--r--net/openvswitch/datapath.c4
-rw-r--r--net/openvswitch/flow_netlink.c82
-rw-r--r--net/openvswitch/flow_table.c23
-rw-r--r--net/openvswitch/flow_table.h2
-rw-r--r--net/packet/af_packet.c32
-rw-r--r--net/sched/cls_fw.c30
-rw-r--r--net/sctp/protocol.c64
-rw-r--r--net/sunrpc/sched.c14
-rw-r--r--net/sunrpc/xprt.c6
-rw-r--r--net/sunrpc/xprtsock.c15
-rw-r--r--net/tipc/msg.c1
57 files changed, 778 insertions, 420 deletions
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 17e55dfecbe2..e07f551a863c 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -317,6 +317,9 @@ static int clip_constructor(struct neighbour *neigh)
317 317
318static int clip_encap(struct atm_vcc *vcc, int mode) 318static int clip_encap(struct atm_vcc *vcc, int mode)
319{ 319{
320 if (!CLIP_VCC(vcc))
321 return -EBADFD;
322
320 CLIP_VCC(vcc)->encap = mode; 323 CLIP_VCC(vcc)->encap = mode;
321 return 0; 324 return 0;
322} 325}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 4d56e593faad..25644e1bc479 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2311,12 +2311,6 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
2311 if (!conn) 2311 if (!conn)
2312 return 1; 2312 return 1;
2313 2313
2314 chan = conn->smp;
2315 if (!chan) {
2316 BT_ERR("SMP security requested but not available");
2317 return 1;
2318 }
2319
2320 if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) 2314 if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED))
2321 return 1; 2315 return 1;
2322 2316
@@ -2330,6 +2324,12 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
2330 if (smp_ltk_encrypt(conn, hcon->pending_sec_level)) 2324 if (smp_ltk_encrypt(conn, hcon->pending_sec_level))
2331 return 0; 2325 return 0;
2332 2326
2327 chan = conn->smp;
2328 if (!chan) {
2329 BT_ERR("SMP security requested but not available");
2330 return 1;
2331 }
2332
2333 l2cap_chan_lock(chan); 2333 l2cap_chan_lock(chan);
2334 2334
2335 /* If SMP is already in progress ignore this request */ 2335 /* If SMP is already in progress ignore this request */
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b4d858a18eb6..03661d97463c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1006,7 +1006,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
1006 1006
1007 ih = igmpv3_report_hdr(skb); 1007 ih = igmpv3_report_hdr(skb);
1008 num = ntohs(ih->ngrec); 1008 num = ntohs(ih->ngrec);
1009 len = sizeof(*ih); 1009 len = skb_transport_offset(skb) + sizeof(*ih);
1010 1010
1011 for (i = 0; i < num; i++) { 1011 for (i = 0; i < num; i++) {
1012 len += sizeof(*grec); 1012 len += sizeof(*grec);
@@ -1067,7 +1067,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
1067 1067
1068 icmp6h = icmp6_hdr(skb); 1068 icmp6h = icmp6_hdr(skb);
1069 num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); 1069 num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
1070 len = sizeof(*icmp6h); 1070 len = skb_transport_offset(skb) + sizeof(*icmp6h);
1071 1071
1072 for (i = 0; i < num; i++) { 1072 for (i = 0; i < num; i++) {
1073 __be16 *nsrcs, _nsrcs; 1073 __be16 *nsrcs, _nsrcs;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 69a4d30a9ccf..54a00d66509e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -357,6 +357,7 @@ ceph_parse_options(char *options, const char *dev_name,
357 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 357 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
358 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; 358 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
359 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; 359 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
360 opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT;
360 361
361 /* get mon ip(s) */ 362 /* get mon ip(s) */
362 /* ip1[:port1][,ip2[:port2]...] */ 363 /* ip1[:port1][,ip2[:port2]...] */
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 790fe89d90c0..4440edcce0d6 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -79,10 +79,6 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
79 return 0; 79 return 0;
80} 80}
81 81
82
83
84#define AES_KEY_SIZE 16
85
86static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) 82static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
87{ 83{
88 return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); 84 return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index e3be1d22a247..b9b0e3b5da49 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -163,6 +163,7 @@ static struct kmem_cache *ceph_msg_data_cache;
163static char tag_msg = CEPH_MSGR_TAG_MSG; 163static char tag_msg = CEPH_MSGR_TAG_MSG;
164static char tag_ack = CEPH_MSGR_TAG_ACK; 164static char tag_ack = CEPH_MSGR_TAG_ACK;
165static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 165static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
166static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
166 167
167#ifdef CONFIG_LOCKDEP 168#ifdef CONFIG_LOCKDEP
168static struct lock_class_key socket_class; 169static struct lock_class_key socket_class;
@@ -176,7 +177,7 @@ static struct lock_class_key socket_class;
176 177
177static void queue_con(struct ceph_connection *con); 178static void queue_con(struct ceph_connection *con);
178static void cancel_con(struct ceph_connection *con); 179static void cancel_con(struct ceph_connection *con);
179static void con_work(struct work_struct *); 180static void ceph_con_workfn(struct work_struct *);
180static void con_fault(struct ceph_connection *con); 181static void con_fault(struct ceph_connection *con);
181 182
182/* 183/*
@@ -276,22 +277,22 @@ static void _ceph_msgr_exit(void)
276 ceph_msgr_wq = NULL; 277 ceph_msgr_wq = NULL;
277 } 278 }
278 279
279 ceph_msgr_slab_exit();
280
281 BUG_ON(zero_page == NULL); 280 BUG_ON(zero_page == NULL);
282 page_cache_release(zero_page); 281 page_cache_release(zero_page);
283 zero_page = NULL; 282 zero_page = NULL;
283
284 ceph_msgr_slab_exit();
284} 285}
285 286
286int ceph_msgr_init(void) 287int ceph_msgr_init(void)
287{ 288{
289 if (ceph_msgr_slab_init())
290 return -ENOMEM;
291
288 BUG_ON(zero_page != NULL); 292 BUG_ON(zero_page != NULL);
289 zero_page = ZERO_PAGE(0); 293 zero_page = ZERO_PAGE(0);
290 page_cache_get(zero_page); 294 page_cache_get(zero_page);
291 295
292 if (ceph_msgr_slab_init())
293 return -ENOMEM;
294
295 /* 296 /*
296 * The number of active work items is limited by the number of 297 * The number of active work items is limited by the number of
297 * connections, so leave @max_active at default. 298 * connections, so leave @max_active at default.
@@ -749,7 +750,7 @@ void ceph_con_init(struct ceph_connection *con, void *private,
749 mutex_init(&con->mutex); 750 mutex_init(&con->mutex);
750 INIT_LIST_HEAD(&con->out_queue); 751 INIT_LIST_HEAD(&con->out_queue);
751 INIT_LIST_HEAD(&con->out_sent); 752 INIT_LIST_HEAD(&con->out_sent);
752 INIT_DELAYED_WORK(&con->work, con_work); 753 INIT_DELAYED_WORK(&con->work, ceph_con_workfn);
753 754
754 con->state = CON_STATE_CLOSED; 755 con->state = CON_STATE_CLOSED;
755} 756}
@@ -1351,7 +1352,16 @@ static void prepare_write_keepalive(struct ceph_connection *con)
1351{ 1352{
1352 dout("prepare_write_keepalive %p\n", con); 1353 dout("prepare_write_keepalive %p\n", con);
1353 con_out_kvec_reset(con); 1354 con_out_kvec_reset(con);
1354 con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); 1355 if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
1356 struct timespec now = CURRENT_TIME;
1357
1358 con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
1359 ceph_encode_timespec(&con->out_temp_keepalive2, &now);
1360 con_out_kvec_add(con, sizeof(con->out_temp_keepalive2),
1361 &con->out_temp_keepalive2);
1362 } else {
1363 con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive);
1364 }
1355 con_flag_set(con, CON_FLAG_WRITE_PENDING); 1365 con_flag_set(con, CON_FLAG_WRITE_PENDING);
1356} 1366}
1357 1367
@@ -1625,6 +1635,12 @@ static void prepare_read_tag(struct ceph_connection *con)
1625 con->in_tag = CEPH_MSGR_TAG_READY; 1635 con->in_tag = CEPH_MSGR_TAG_READY;
1626} 1636}
1627 1637
1638static void prepare_read_keepalive_ack(struct ceph_connection *con)
1639{
1640 dout("prepare_read_keepalive_ack %p\n", con);
1641 con->in_base_pos = 0;
1642}
1643
1628/* 1644/*
1629 * Prepare to read a message. 1645 * Prepare to read a message.
1630 */ 1646 */
@@ -2322,13 +2338,6 @@ static int read_partial_message(struct ceph_connection *con)
2322 return ret; 2338 return ret;
2323 2339
2324 BUG_ON(!con->in_msg ^ skip); 2340 BUG_ON(!con->in_msg ^ skip);
2325 if (con->in_msg && data_len > con->in_msg->data_length) {
2326 pr_warn("%s skipping long message (%u > %zd)\n",
2327 __func__, data_len, con->in_msg->data_length);
2328 ceph_msg_put(con->in_msg);
2329 con->in_msg = NULL;
2330 skip = 1;
2331 }
2332 if (skip) { 2341 if (skip) {
2333 /* skip this message */ 2342 /* skip this message */
2334 dout("alloc_msg said skip message\n"); 2343 dout("alloc_msg said skip message\n");
@@ -2457,6 +2466,17 @@ static void process_message(struct ceph_connection *con)
2457 mutex_lock(&con->mutex); 2466 mutex_lock(&con->mutex);
2458} 2467}
2459 2468
2469static int read_keepalive_ack(struct ceph_connection *con)
2470{
2471 struct ceph_timespec ceph_ts;
2472 size_t size = sizeof(ceph_ts);
2473 int ret = read_partial(con, size, size, &ceph_ts);
2474 if (ret <= 0)
2475 return ret;
2476 ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts);
2477 prepare_read_tag(con);
2478 return 1;
2479}
2460 2480
2461/* 2481/*
2462 * Write something to the socket. Called in a worker thread when the 2482 * Write something to the socket. Called in a worker thread when the
@@ -2526,6 +2546,10 @@ more_kvec:
2526 2546
2527do_next: 2547do_next:
2528 if (con->state == CON_STATE_OPEN) { 2548 if (con->state == CON_STATE_OPEN) {
2549 if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
2550 prepare_write_keepalive(con);
2551 goto more;
2552 }
2529 /* is anything else pending? */ 2553 /* is anything else pending? */
2530 if (!list_empty(&con->out_queue)) { 2554 if (!list_empty(&con->out_queue)) {
2531 prepare_write_message(con); 2555 prepare_write_message(con);
@@ -2535,10 +2559,6 @@ do_next:
2535 prepare_write_ack(con); 2559 prepare_write_ack(con);
2536 goto more; 2560 goto more;
2537 } 2561 }
2538 if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
2539 prepare_write_keepalive(con);
2540 goto more;
2541 }
2542 } 2562 }
2543 2563
2544 /* Nothing to do! */ 2564 /* Nothing to do! */
@@ -2641,6 +2661,9 @@ more:
2641 case CEPH_MSGR_TAG_ACK: 2661 case CEPH_MSGR_TAG_ACK:
2642 prepare_read_ack(con); 2662 prepare_read_ack(con);
2643 break; 2663 break;
2664 case CEPH_MSGR_TAG_KEEPALIVE2_ACK:
2665 prepare_read_keepalive_ack(con);
2666 break;
2644 case CEPH_MSGR_TAG_CLOSE: 2667 case CEPH_MSGR_TAG_CLOSE:
2645 con_close_socket(con); 2668 con_close_socket(con);
2646 con->state = CON_STATE_CLOSED; 2669 con->state = CON_STATE_CLOSED;
@@ -2684,6 +2707,12 @@ more:
2684 process_ack(con); 2707 process_ack(con);
2685 goto more; 2708 goto more;
2686 } 2709 }
2710 if (con->in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) {
2711 ret = read_keepalive_ack(con);
2712 if (ret <= 0)
2713 goto out;
2714 goto more;
2715 }
2687 2716
2688out: 2717out:
2689 dout("try_read done on %p ret %d\n", con, ret); 2718 dout("try_read done on %p ret %d\n", con, ret);
@@ -2799,7 +2828,7 @@ static void con_fault_finish(struct ceph_connection *con)
2799/* 2828/*
2800 * Do some work on a connection. Drop a connection ref when we're done. 2829 * Do some work on a connection. Drop a connection ref when we're done.
2801 */ 2830 */
2802static void con_work(struct work_struct *work) 2831static void ceph_con_workfn(struct work_struct *work)
2803{ 2832{
2804 struct ceph_connection *con = container_of(work, struct ceph_connection, 2833 struct ceph_connection *con = container_of(work, struct ceph_connection,
2805 work.work); 2834 work.work);
@@ -3101,6 +3130,20 @@ void ceph_con_keepalive(struct ceph_connection *con)
3101} 3130}
3102EXPORT_SYMBOL(ceph_con_keepalive); 3131EXPORT_SYMBOL(ceph_con_keepalive);
3103 3132
3133bool ceph_con_keepalive_expired(struct ceph_connection *con,
3134 unsigned long interval)
3135{
3136 if (interval > 0 &&
3137 (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
3138 struct timespec now = CURRENT_TIME;
3139 struct timespec ts;
3140 jiffies_to_timespec(interval, &ts);
3141 ts = timespec_add(con->last_keepalive_ack, ts);
3142 return timespec_compare(&now, &ts) >= 0;
3143 }
3144 return false;
3145}
3146
3104static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) 3147static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
3105{ 3148{
3106 struct ceph_msg_data *data; 3149 struct ceph_msg_data *data;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9d6ff1215928..edda01626a45 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -149,6 +149,10 @@ static int __open_session(struct ceph_mon_client *monc)
149 CEPH_ENTITY_TYPE_MON, monc->cur_mon, 149 CEPH_ENTITY_TYPE_MON, monc->cur_mon,
150 &monc->monmap->mon_inst[monc->cur_mon].addr); 150 &monc->monmap->mon_inst[monc->cur_mon].addr);
151 151
152 /* send an initial keepalive to ensure our timestamp is
153 * valid by the time we are in an OPENED state */
154 ceph_con_keepalive(&monc->con);
155
152 /* initiatiate authentication handshake */ 156 /* initiatiate authentication handshake */
153 ret = ceph_auth_build_hello(monc->auth, 157 ret = ceph_auth_build_hello(monc->auth,
154 monc->m_auth->front.iov_base, 158 monc->m_auth->front.iov_base,
@@ -170,14 +174,19 @@ static bool __sub_expired(struct ceph_mon_client *monc)
170 */ 174 */
171static void __schedule_delayed(struct ceph_mon_client *monc) 175static void __schedule_delayed(struct ceph_mon_client *monc)
172{ 176{
173 unsigned int delay; 177 struct ceph_options *opt = monc->client->options;
178 unsigned long delay;
174 179
175 if (monc->cur_mon < 0 || __sub_expired(monc)) 180 if (monc->cur_mon < 0 || __sub_expired(monc)) {
176 delay = 10 * HZ; 181 delay = 10 * HZ;
177 else 182 } else {
178 delay = 20 * HZ; 183 delay = 20 * HZ;
179 dout("__schedule_delayed after %u\n", delay); 184 if (opt->monc_ping_timeout > 0)
180 schedule_delayed_work(&monc->delayed_work, delay); 185 delay = min(delay, opt->monc_ping_timeout / 3);
186 }
187 dout("__schedule_delayed after %lu\n", delay);
188 schedule_delayed_work(&monc->delayed_work,
189 round_jiffies_relative(delay));
181} 190}
182 191
183/* 192/*
@@ -743,11 +752,23 @@ static void delayed_work(struct work_struct *work)
743 __close_session(monc); 752 __close_session(monc);
744 __open_session(monc); /* continue hunting */ 753 __open_session(monc); /* continue hunting */
745 } else { 754 } else {
746 ceph_con_keepalive(&monc->con); 755 struct ceph_options *opt = monc->client->options;
756 int is_auth = ceph_auth_is_authenticated(monc->auth);
757 if (ceph_con_keepalive_expired(&monc->con,
758 opt->monc_ping_timeout)) {
759 dout("monc keepalive timeout\n");
760 is_auth = 0;
761 __close_session(monc);
762 monc->hunting = true;
763 __open_session(monc);
764 }
747 765
748 __validate_auth(monc); 766 if (!monc->hunting) {
767 ceph_con_keepalive(&monc->con);
768 __validate_auth(monc);
769 }
749 770
750 if (ceph_auth_is_authenticated(monc->auth)) 771 if (is_auth)
751 __send_subscribe(monc); 772 __send_subscribe(monc);
752 } 773 }
753 __schedule_delayed(monc); 774 __schedule_delayed(monc);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 50033677c0fa..80b94e37c94a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2817,8 +2817,9 @@ out:
2817} 2817}
2818 2818
2819/* 2819/*
2820 * lookup and return message for incoming reply. set up reply message 2820 * Lookup and return message for incoming reply. Don't try to do
2821 * pages. 2821 * anything about a larger than preallocated data portion of the
2822 * message at the moment - for now, just skip the message.
2822 */ 2823 */
2823static struct ceph_msg *get_reply(struct ceph_connection *con, 2824static struct ceph_msg *get_reply(struct ceph_connection *con,
2824 struct ceph_msg_header *hdr, 2825 struct ceph_msg_header *hdr,
@@ -2836,10 +2837,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2836 mutex_lock(&osdc->request_mutex); 2837 mutex_lock(&osdc->request_mutex);
2837 req = __lookup_request(osdc, tid); 2838 req = __lookup_request(osdc, tid);
2838 if (!req) { 2839 if (!req) {
2839 *skip = 1; 2840 pr_warn("%s osd%d tid %llu unknown, skipping\n",
2841 __func__, osd->o_osd, tid);
2840 m = NULL; 2842 m = NULL;
2841 dout("get_reply unknown tid %llu from osd%d\n", tid, 2843 *skip = 1;
2842 osd->o_osd);
2843 goto out; 2844 goto out;
2844 } 2845 }
2845 2846
@@ -2849,10 +2850,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2849 ceph_msg_revoke_incoming(req->r_reply); 2850 ceph_msg_revoke_incoming(req->r_reply);
2850 2851
2851 if (front_len > req->r_reply->front_alloc_len) { 2852 if (front_len > req->r_reply->front_alloc_len) {
2852 pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n", 2853 pr_warn("%s osd%d tid %llu front %d > preallocated %d\n",
2853 front_len, req->r_reply->front_alloc_len, 2854 __func__, osd->o_osd, req->r_tid, front_len,
2854 (unsigned int)con->peer_name.type, 2855 req->r_reply->front_alloc_len);
2855 le64_to_cpu(con->peer_name.num));
2856 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, 2856 m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS,
2857 false); 2857 false);
2858 if (!m) 2858 if (!m)
@@ -2860,37 +2860,22 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2860 ceph_msg_put(req->r_reply); 2860 ceph_msg_put(req->r_reply);
2861 req->r_reply = m; 2861 req->r_reply = m;
2862 } 2862 }
2863 m = ceph_msg_get(req->r_reply);
2864
2865 if (data_len > 0) {
2866 struct ceph_osd_data *osd_data;
2867 2863
2868 /* 2864 if (data_len > req->r_reply->data_length) {
2869 * XXX This is assuming there is only one op containing 2865 pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
2870 * XXX page data. Probably OK for reads, but this 2866 __func__, osd->o_osd, req->r_tid, data_len,
2871 * XXX ought to be done more generally. 2867 req->r_reply->data_length);
2872 */ 2868 m = NULL;
2873 osd_data = osd_req_op_extent_osd_data(req, 0); 2869 *skip = 1;
2874 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { 2870 goto out;
2875 if (osd_data->pages &&
2876 unlikely(osd_data->length < data_len)) {
2877
2878 pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n",
2879 tid, data_len, osd_data->length);
2880 *skip = 1;
2881 ceph_msg_put(m);
2882 m = NULL;
2883 goto out;
2884 }
2885 }
2886 } 2871 }
2887 *skip = 0; 2872
2873 m = ceph_msg_get(req->r_reply);
2888 dout("get_reply tid %lld %p\n", tid, m); 2874 dout("get_reply tid %lld %p\n", tid, m);
2889 2875
2890out: 2876out:
2891 mutex_unlock(&osdc->request_mutex); 2877 mutex_unlock(&osdc->request_mutex);
2892 return m; 2878 return m;
2893
2894} 2879}
2895 2880
2896static struct ceph_msg *alloc_msg(struct ceph_connection *con, 2881static struct ceph_msg *alloc_msg(struct ceph_connection *con,
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4a3125836b64..7d8f581d9f1f 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1300,7 +1300,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
1300 ceph_decode_addr(&addr); 1300 ceph_decode_addr(&addr);
1301 pr_info("osd%d up\n", osd); 1301 pr_info("osd%d up\n", osd);
1302 BUG_ON(osd >= map->max_osd); 1302 BUG_ON(osd >= map->max_osd);
1303 map->osd_state[osd] |= CEPH_OSD_UP; 1303 map->osd_state[osd] |= CEPH_OSD_UP | CEPH_OSD_EXISTS;
1304 map->osd_addr[osd] = addr; 1304 map->osd_addr[osd] = addr;
1305 } 1305 }
1306 1306
diff --git a/net/core/dev.c b/net/core/dev.c
index 464c22b6261a..323c04edd779 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4723,6 +4723,8 @@ void napi_disable(struct napi_struct *n)
4723 4723
4724 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) 4724 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
4725 msleep(1); 4725 msleep(1);
4726 while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
4727 msleep(1);
4726 4728
4727 hrtimer_cancel(&n->timer); 4729 hrtimer_cancel(&n->timer);
4728 4730
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bf77e3639ce0..365de66436ac 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -631,15 +631,17 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
631{ 631{
632 int idx = 0; 632 int idx = 0;
633 struct fib_rule *rule; 633 struct fib_rule *rule;
634 int err = 0;
634 635
635 rcu_read_lock(); 636 rcu_read_lock();
636 list_for_each_entry_rcu(rule, &ops->rules_list, list) { 637 list_for_each_entry_rcu(rule, &ops->rules_list, list) {
637 if (idx < cb->args[1]) 638 if (idx < cb->args[1])
638 goto skip; 639 goto skip;
639 640
640 if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, 641 err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
641 cb->nlh->nlmsg_seq, RTM_NEWRULE, 642 cb->nlh->nlmsg_seq, RTM_NEWRULE,
642 NLM_F_MULTI, ops) < 0) 643 NLM_F_MULTI, ops);
644 if (err)
643 break; 645 break;
644skip: 646skip:
645 idx++; 647 idx++;
@@ -648,7 +650,7 @@ skip:
648 cb->args[1] = idx; 650 cb->args[1] = idx;
649 rules_ops_put(ops); 651 rules_ops_put(ops);
650 652
651 return skb->len; 653 return err;
652} 654}
653 655
654static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) 656static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
@@ -664,7 +666,9 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
664 if (ops == NULL) 666 if (ops == NULL)
665 return -EAFNOSUPPORT; 667 return -EAFNOSUPPORT;
666 668
667 return dump_rules(skb, cb, ops); 669 dump_rules(skb, cb, ops);
670
671 return skb->len;
668 } 672 }
669 673
670 rcu_read_lock(); 674 rcu_read_lock();
diff --git a/net/core/filter.c b/net/core/filter.c
index 96bd962c292d..60e3fe7c59c0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -478,9 +478,9 @@ do_pass:
478 bpf_src = BPF_X; 478 bpf_src = BPF_X;
479 } else { 479 } else {
480 insn->dst_reg = BPF_REG_A; 480 insn->dst_reg = BPF_REG_A;
481 insn->src_reg = BPF_REG_X;
482 insn->imm = fp->k; 481 insn->imm = fp->k;
483 bpf_src = BPF_SRC(fp->code); 482 bpf_src = BPF_SRC(fp->code);
483 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
484 } 484 }
485 485
486 /* Common case where 'jump_false' is next insn. */ 486 /* Common case where 'jump_false' is next insn. */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 49b599062af1..b4c530065106 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1478,6 +1478,15 @@ static int of_dev_node_match(struct device *dev, const void *data)
1478 return ret == 0 ? dev->of_node == data : ret; 1478 return ret == 0 ? dev->of_node == data : ret;
1479} 1479}
1480 1480
1481/*
1482 * of_find_net_device_by_node - lookup the net device for the device node
1483 * @np: OF device node
1484 *
1485 * Looks up the net_device structure corresponding with the device node.
1486 * If successful, returns a pointer to the net_device with the embedded
1487 * struct device refcount incremented by one, or NULL on failure. The
1488 * refcount must be dropped when done with the net_device.
1489 */
1481struct net_device *of_find_net_device_by_node(struct device_node *np) 1490struct net_device *of_find_net_device_by_node(struct device_node *np)
1482{ 1491{
1483 struct device *dev; 1492 struct device *dev;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 6aa3db8dfc3b..8bdada242a7d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -142,7 +142,7 @@ static void queue_process(struct work_struct *work)
142 */ 142 */
143static int poll_one_napi(struct napi_struct *napi, int budget) 143static int poll_one_napi(struct napi_struct *napi, int budget)
144{ 144{
145 int work; 145 int work = 0;
146 146
147 /* net_rx_action's ->poll() invocations and our's are 147 /* net_rx_action's ->poll() invocations and our's are
148 * synchronized by this test which is only made while 148 * synchronized by this test which is only made while
@@ -151,7 +151,12 @@ static int poll_one_napi(struct napi_struct *napi, int budget)
151 if (!test_bit(NAPI_STATE_SCHED, &napi->state)) 151 if (!test_bit(NAPI_STATE_SCHED, &napi->state))
152 return budget; 152 return budget;
153 153
154 set_bit(NAPI_STATE_NPSVC, &napi->state); 154 /* If we set this bit but see that it has already been set,
155 * that indicates that napi has been disabled and we need
156 * to abort this operation
157 */
158 if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
159 goto out;
155 160
156 work = napi->poll(napi, budget); 161 work = napi->poll(napi, budget);
157 WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll); 162 WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
@@ -159,6 +164,7 @@ static int poll_one_napi(struct napi_struct *napi, int budget)
159 164
160 clear_bit(NAPI_STATE_NPSVC, &napi->state); 165 clear_bit(NAPI_STATE_NPSVC, &napi->state);
161 166
167out:
162 return budget - work; 168 return budget - work;
163} 169}
164 170
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e5452296ec2f..474a6da3b51a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3047,6 +3047,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
3047 u32 portid = NETLINK_CB(cb->skb).portid; 3047 u32 portid = NETLINK_CB(cb->skb).portid;
3048 u32 seq = cb->nlh->nlmsg_seq; 3048 u32 seq = cb->nlh->nlmsg_seq;
3049 u32 filter_mask = 0; 3049 u32 filter_mask = 0;
3050 int err;
3050 3051
3051 if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) { 3052 if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) {
3052 struct nlattr *extfilt; 3053 struct nlattr *extfilt;
@@ -3067,20 +3068,25 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
3067 struct net_device *br_dev = netdev_master_upper_dev_get(dev); 3068 struct net_device *br_dev = netdev_master_upper_dev_get(dev);
3068 3069
3069 if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { 3070 if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
3070 if (idx >= cb->args[0] && 3071 if (idx >= cb->args[0]) {
3071 br_dev->netdev_ops->ndo_bridge_getlink( 3072 err = br_dev->netdev_ops->ndo_bridge_getlink(
3072 skb, portid, seq, dev, filter_mask, 3073 skb, portid, seq, dev,
3073 NLM_F_MULTI) < 0) 3074 filter_mask, NLM_F_MULTI);
3074 break; 3075 if (err < 0 && err != -EOPNOTSUPP)
3076 break;
3077 }
3075 idx++; 3078 idx++;
3076 } 3079 }
3077 3080
3078 if (ops->ndo_bridge_getlink) { 3081 if (ops->ndo_bridge_getlink) {
3079 if (idx >= cb->args[0] && 3082 if (idx >= cb->args[0]) {
3080 ops->ndo_bridge_getlink(skb, portid, seq, dev, 3083 err = ops->ndo_bridge_getlink(skb, portid,
3081 filter_mask, 3084 seq, dev,
3082 NLM_F_MULTI) < 0) 3085 filter_mask,
3083 break; 3086 NLM_F_MULTI);
3087 if (err < 0 && err != -EOPNOTSUPP)
3088 break;
3089 }
3084 idx++; 3090 idx++;
3085 } 3091 }
3086 } 3092 }
diff --git a/net/core/sock.c b/net/core/sock.c
index ca2984afe16e..3307c02244d3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2740,10 +2740,8 @@ static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
2740 return; 2740 return;
2741 kfree(rsk_prot->slab_name); 2741 kfree(rsk_prot->slab_name);
2742 rsk_prot->slab_name = NULL; 2742 rsk_prot->slab_name = NULL;
2743 if (rsk_prot->slab) { 2743 kmem_cache_destroy(rsk_prot->slab);
2744 kmem_cache_destroy(rsk_prot->slab); 2744 rsk_prot->slab = NULL;
2745 rsk_prot->slab = NULL;
2746 }
2747} 2745}
2748 2746
2749static int req_prot_init(const struct proto *prot) 2747static int req_prot_init(const struct proto *prot)
@@ -2828,10 +2826,8 @@ void proto_unregister(struct proto *prot)
2828 list_del(&prot->node); 2826 list_del(&prot->node);
2829 mutex_unlock(&proto_list_mutex); 2827 mutex_unlock(&proto_list_mutex);
2830 2828
2831 if (prot->slab != NULL) { 2829 kmem_cache_destroy(prot->slab);
2832 kmem_cache_destroy(prot->slab); 2830 prot->slab = NULL;
2833 prot->slab = NULL;
2834 }
2835 2831
2836 req_prot_cleanup(prot->rsk_prot); 2832 req_prot_cleanup(prot->rsk_prot);
2837 2833
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index bd9e718c2a20..3de0d0362d7f 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -398,12 +398,8 @@ out_err:
398 398
399void dccp_ackvec_exit(void) 399void dccp_ackvec_exit(void)
400{ 400{
401 if (dccp_ackvec_slab != NULL) { 401 kmem_cache_destroy(dccp_ackvec_slab);
402 kmem_cache_destroy(dccp_ackvec_slab); 402 dccp_ackvec_slab = NULL;
403 dccp_ackvec_slab = NULL; 403 kmem_cache_destroy(dccp_ackvec_record_slab);
404 } 404 dccp_ackvec_record_slab = NULL;
405 if (dccp_ackvec_record_slab != NULL) {
406 kmem_cache_destroy(dccp_ackvec_record_slab);
407 dccp_ackvec_record_slab = NULL;
408 }
409} 405}
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 83498975165f..90f77d08cc37 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -95,8 +95,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_f
95 95
96static void ccid_kmem_cache_destroy(struct kmem_cache *slab) 96static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
97{ 97{
98 if (slab != NULL) 98 kmem_cache_destroy(slab);
99 kmem_cache_destroy(slab);
100} 99}
101 100
102static int __init ccid_activate(struct ccid_operations *ccid_ops) 101static int __init ccid_activate(struct ccid_operations *ccid_ops)
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 30addee2dd03..838f524cf11a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -48,8 +48,6 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
48 tw->tw_ipv6only = sk->sk_ipv6only; 48 tw->tw_ipv6only = sk->sk_ipv6only;
49 } 49 }
50#endif 50#endif
51 /* Linkage updates. */
52 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
53 51
54 /* Get the TIME_WAIT timeout firing. */ 52 /* Get the TIME_WAIT timeout firing. */
55 if (timeo < rto) 53 if (timeo < rto)
@@ -60,6 +58,8 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
60 timeo = DCCP_TIMEWAIT_LEN; 58 timeo = DCCP_TIMEWAIT_LEN;
61 59
62 inet_twsk_schedule(tw, timeo); 60 inet_twsk_schedule(tw, timeo);
61 /* Linkage updates. */
62 __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
63 inet_twsk_put(tw); 63 inet_twsk_put(tw);
64 } else { 64 } else {
65 /* Sorry, if we're out of memory, just CLOSE this 65 /* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 76e3800765f8..c59fa5d9c22c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -634,6 +634,10 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
634 port_index++; 634 port_index++;
635 } 635 }
636 kfree(pd->chip[i].rtable); 636 kfree(pd->chip[i].rtable);
637
638 /* Drop our reference to the MDIO bus device */
639 if (pd->chip[i].host_dev)
640 put_device(pd->chip[i].host_dev);
637 } 641 }
638 kfree(pd->chip); 642 kfree(pd->chip);
639} 643}
@@ -661,16 +665,22 @@ static int dsa_of_probe(struct device *dev)
661 return -EPROBE_DEFER; 665 return -EPROBE_DEFER;
662 666
663 ethernet = of_parse_phandle(np, "dsa,ethernet", 0); 667 ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
664 if (!ethernet) 668 if (!ethernet) {
665 return -EINVAL; 669 ret = -EINVAL;
670 goto out_put_mdio;
671 }
666 672
667 ethernet_dev = of_find_net_device_by_node(ethernet); 673 ethernet_dev = of_find_net_device_by_node(ethernet);
668 if (!ethernet_dev) 674 if (!ethernet_dev) {
669 return -EPROBE_DEFER; 675 ret = -EPROBE_DEFER;
676 goto out_put_mdio;
677 }
670 678
671 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 679 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
672 if (!pd) 680 if (!pd) {
673 return -ENOMEM; 681 ret = -ENOMEM;
682 goto out_put_ethernet;
683 }
674 684
675 dev->platform_data = pd; 685 dev->platform_data = pd;
676 pd->of_netdev = ethernet_dev; 686 pd->of_netdev = ethernet_dev;
@@ -691,7 +701,9 @@ static int dsa_of_probe(struct device *dev)
691 cd = &pd->chip[chip_index]; 701 cd = &pd->chip[chip_index];
692 702
693 cd->of_node = child; 703 cd->of_node = child;
694 cd->host_dev = &mdio_bus->dev; 704
705 /* When assigning the host device, increment its refcount */
706 cd->host_dev = get_device(&mdio_bus->dev);
695 707
696 sw_addr = of_get_property(child, "reg", NULL); 708 sw_addr = of_get_property(child, "reg", NULL);
697 if (!sw_addr) 709 if (!sw_addr)
@@ -711,6 +723,12 @@ static int dsa_of_probe(struct device *dev)
711 ret = -EPROBE_DEFER; 723 ret = -EPROBE_DEFER;
712 goto out_free_chip; 724 goto out_free_chip;
713 } 725 }
726
727 /* Drop the mdio_bus device ref, replacing the host
728 * device with the mdio_bus_switch device, keeping
729 * the refcount from of_mdio_find_bus() above.
730 */
731 put_device(cd->host_dev);
714 cd->host_dev = &mdio_bus_switch->dev; 732 cd->host_dev = &mdio_bus_switch->dev;
715 } 733 }
716 734
@@ -744,6 +762,10 @@ static int dsa_of_probe(struct device *dev)
744 } 762 }
745 } 763 }
746 764
765 /* The individual chips hold their own refcount on the mdio bus,
766 * so drop ours */
767 put_device(&mdio_bus->dev);
768
747 return 0; 769 return 0;
748 770
749out_free_chip: 771out_free_chip:
@@ -751,6 +773,10 @@ out_free_chip:
751out_free: 773out_free:
752 kfree(pd); 774 kfree(pd);
753 dev->platform_data = NULL; 775 dev->platform_data = NULL;
776out_put_ethernet:
777 put_device(&ethernet_dev->dev);
778out_put_mdio:
779 put_device(&mdio_bus->dev);
754 return ret; 780 return ret;
755} 781}
756 782
@@ -762,6 +788,7 @@ static void dsa_of_remove(struct device *dev)
762 return; 788 return;
763 789
764 dsa_of_free_platform_data(pd); 790 dsa_of_free_platform_data(pd);
791 put_device(&pd->of_netdev->dev);
765 kfree(pd); 792 kfree(pd);
766} 793}
767#else 794#else
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index d25efc93d8f1..b6ca0890d018 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -78,7 +78,7 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
78 78
79 trailer = skb_tail_pointer(skb) - 4; 79 trailer = skb_tail_pointer(skb) - 4;
80 if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 || 80 if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 ||
81 (trailer[3] & 0xef) != 0x00 || trailer[3] != 0x00) 81 (trailer[2] & 0xef) != 0x00 || trailer[3] != 0x00)
82 goto out_drop; 82 goto out_drop;
83 83
84 source_port = trailer[1] & 7; 84 source_port = trailer[1] & 7;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 61ff5ea31283..01308e6e6127 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -113,6 +113,8 @@
113#include <net/arp.h> 113#include <net/arp.h>
114#include <net/ax25.h> 114#include <net/ax25.h>
115#include <net/netrom.h> 115#include <net/netrom.h>
116#include <net/dst_metadata.h>
117#include <net/ip_tunnels.h>
116 118
117#include <linux/uaccess.h> 119#include <linux/uaccess.h>
118 120
@@ -296,7 +298,8 @@ static void arp_send_dst(int type, int ptype, __be32 dest_ip,
296 struct net_device *dev, __be32 src_ip, 298 struct net_device *dev, __be32 src_ip,
297 const unsigned char *dest_hw, 299 const unsigned char *dest_hw,
298 const unsigned char *src_hw, 300 const unsigned char *src_hw,
299 const unsigned char *target_hw, struct sk_buff *oskb) 301 const unsigned char *target_hw,
302 struct dst_entry *dst)
300{ 303{
301 struct sk_buff *skb; 304 struct sk_buff *skb;
302 305
@@ -309,9 +312,7 @@ static void arp_send_dst(int type, int ptype, __be32 dest_ip,
309 if (!skb) 312 if (!skb)
310 return; 313 return;
311 314
312 if (oskb) 315 skb_dst_set(skb, dst);
313 skb_dst_copy(skb, oskb);
314
315 arp_xmit(skb); 316 arp_xmit(skb);
316} 317}
317 318
@@ -333,6 +334,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
333 __be32 target = *(__be32 *)neigh->primary_key; 334 __be32 target = *(__be32 *)neigh->primary_key;
334 int probes = atomic_read(&neigh->probes); 335 int probes = atomic_read(&neigh->probes);
335 struct in_device *in_dev; 336 struct in_device *in_dev;
337 struct dst_entry *dst = NULL;
336 338
337 rcu_read_lock(); 339 rcu_read_lock();
338 in_dev = __in_dev_get_rcu(dev); 340 in_dev = __in_dev_get_rcu(dev);
@@ -381,9 +383,10 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
381 } 383 }
382 } 384 }
383 385
386 if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE))
387 dst = dst_clone(skb_dst(skb));
384 arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, 388 arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
385 dst_hw, dev->dev_addr, NULL, 389 dst_hw, dev->dev_addr, NULL, dst);
386 dev->priv_flags & IFF_XMIT_DST_RELEASE ? NULL : skb);
387} 390}
388 391
389static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) 392static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
@@ -654,6 +657,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
654 u16 dev_type = dev->type; 657 u16 dev_type = dev->type;
655 int addr_type; 658 int addr_type;
656 struct neighbour *n; 659 struct neighbour *n;
660 struct dst_entry *reply_dst = NULL;
657 bool is_garp = false; 661 bool is_garp = false;
658 662
659 /* arp_rcv below verifies the ARP header and verifies the device 663 /* arp_rcv below verifies the ARP header and verifies the device
@@ -754,13 +758,18 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
754 * cache. 758 * cache.
755 */ 759 */
756 760
761 if (arp->ar_op == htons(ARPOP_REQUEST) && skb_metadata_dst(skb))
762 reply_dst = (struct dst_entry *)
763 iptunnel_metadata_reply(skb_metadata_dst(skb),
764 GFP_ATOMIC);
765
757 /* Special case: IPv4 duplicate address detection packet (RFC2131) */ 766 /* Special case: IPv4 duplicate address detection packet (RFC2131) */
758 if (sip == 0) { 767 if (sip == 0) {
759 if (arp->ar_op == htons(ARPOP_REQUEST) && 768 if (arp->ar_op == htons(ARPOP_REQUEST) &&
760 inet_addr_type_dev_table(net, dev, tip) == RTN_LOCAL && 769 inet_addr_type_dev_table(net, dev, tip) == RTN_LOCAL &&
761 !arp_ignore(in_dev, sip, tip)) 770 !arp_ignore(in_dev, sip, tip))
762 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, 771 arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip,
763 dev->dev_addr, sha); 772 sha, dev->dev_addr, sha, reply_dst);
764 goto out; 773 goto out;
765 } 774 }
766 775
@@ -779,9 +788,10 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
779 if (!dont_send) { 788 if (!dont_send) {
780 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 789 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
781 if (n) { 790 if (n) {
782 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, 791 arp_send_dst(ARPOP_REPLY, ETH_P_ARP,
783 dev, tip, sha, dev->dev_addr, 792 sip, dev, tip, sha,
784 sha); 793 dev->dev_addr, sha,
794 reply_dst);
785 neigh_release(n); 795 neigh_release(n);
786 } 796 }
787 } 797 }
@@ -799,9 +809,10 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
799 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 809 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
800 skb->pkt_type == PACKET_HOST || 810 skb->pkt_type == PACKET_HOST ||
801 NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) { 811 NEIGH_VAR(in_dev->arp_parms, PROXY_DELAY) == 0) {
802 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, 812 arp_send_dst(ARPOP_REPLY, ETH_P_ARP,
803 dev, tip, sha, dev->dev_addr, 813 sip, dev, tip, sha,
804 sha); 814 dev->dev_addr, sha,
815 reply_dst);
805 } else { 816 } else {
806 pneigh_enqueue(&arp_tbl, 817 pneigh_enqueue(&arp_tbl,
807 in_dev->arp_parms, skb); 818 in_dev->arp_parms, skb);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 26d6ffb6d23c..6c2af797f2f9 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1426,7 +1426,7 @@ found:
1426 nh->nh_flags & RTNH_F_LINKDOWN && 1426 nh->nh_flags & RTNH_F_LINKDOWN &&
1427 !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) 1427 !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1428 continue; 1428 continue;
1429 if (!(flp->flowi4_flags & FLOWI_FLAG_VRFSRC)) { 1429 if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
1430 if (flp->flowi4_oif && 1430 if (flp->flowi4_oif &&
1431 flp->flowi4_oif != nh->nh_oif) 1431 flp->flowi4_oif != nh->nh_oif)
1432 continue; 1432 continue;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 79fe05befcae..e5eb8ac4089d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
427 fl4.flowi4_mark = mark; 427 fl4.flowi4_mark = mark;
428 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 428 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
429 fl4.flowi4_proto = IPPROTO_ICMP; 429 fl4.flowi4_proto = IPPROTO_ICMP;
430 fl4.flowi4_oif = vrf_master_ifindex(skb->dev) ? : skb->dev->ifindex; 430 fl4.flowi4_oif = vrf_master_ifindex(skb->dev);
431 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 431 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
432 rt = ip_route_output_key(net, &fl4); 432 rt = ip_route_output_key(net, &fl4);
433 if (IS_ERR(rt)) 433 if (IS_ERR(rt))
@@ -461,7 +461,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
461 fl4->flowi4_proto = IPPROTO_ICMP; 461 fl4->flowi4_proto = IPPROTO_ICMP;
462 fl4->fl4_icmp_type = type; 462 fl4->fl4_icmp_type = type;
463 fl4->fl4_icmp_code = code; 463 fl4->fl4_icmp_code = code;
464 fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev) ? : skb_in->dev->ifindex; 464 fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev);
465 465
466 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); 466 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
467 rt = __ip_route_output_key(net, fl4); 467 rt = __ip_route_output_key(net, fl4);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index bac205136e1c..ba2f90d90cb5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -685,20 +685,20 @@ void reqsk_queue_hash_req(struct request_sock_queue *queue,
685 req->num_timeout = 0; 685 req->num_timeout = 0;
686 req->sk = NULL; 686 req->sk = NULL;
687 687
688 setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
689 mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
690 req->rsk_hash = hash;
691
688 /* before letting lookups find us, make sure all req fields 692 /* before letting lookups find us, make sure all req fields
689 * are committed to memory and refcnt initialized. 693 * are committed to memory and refcnt initialized.
690 */ 694 */
691 smp_wmb(); 695 smp_wmb();
692 atomic_set(&req->rsk_refcnt, 2); 696 atomic_set(&req->rsk_refcnt, 2);
693 setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
694 req->rsk_hash = hash;
695 697
696 spin_lock(&queue->syn_wait_lock); 698 spin_lock(&queue->syn_wait_lock);
697 req->dl_next = lopt->syn_table[hash]; 699 req->dl_next = lopt->syn_table[hash];
698 lopt->syn_table[hash] = req; 700 lopt->syn_table[hash] = req;
699 spin_unlock(&queue->syn_wait_lock); 701 spin_unlock(&queue->syn_wait_lock);
700
701 mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
702} 702}
703EXPORT_SYMBOL(reqsk_queue_hash_req); 703EXPORT_SYMBOL(reqsk_queue_hash_req);
704 704
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index ae22cc24fbe8..c67f9bd7699c 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -123,13 +123,15 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
123 /* 123 /*
124 * Step 2: Hash TW into tcp ehash chain. 124 * Step 2: Hash TW into tcp ehash chain.
125 * Notes : 125 * Notes :
126 * - tw_refcnt is set to 3 because : 126 * - tw_refcnt is set to 4 because :
127 * - We have one reference from bhash chain. 127 * - We have one reference from bhash chain.
128 * - We have one reference from ehash chain. 128 * - We have one reference from ehash chain.
129 * - We have one reference from timer.
130 * - One reference for ourself (our caller will release it).
129 * We can use atomic_set() because prior spin_lock()/spin_unlock() 131 * We can use atomic_set() because prior spin_lock()/spin_unlock()
130 * committed into memory all tw fields. 132 * committed into memory all tw fields.
131 */ 133 */
132 atomic_set(&tw->tw_refcnt, 1 + 1 + 1); 134 atomic_set(&tw->tw_refcnt, 4);
133 inet_twsk_add_node_rcu(tw, &ehead->chain); 135 inet_twsk_add_node_rcu(tw, &ehead->chain);
134 136
135 /* Step 3: Remove SK from hash chain */ 137 /* Step 3: Remove SK from hash chain */
@@ -217,7 +219,7 @@ void inet_twsk_deschedule_put(struct inet_timewait_sock *tw)
217} 219}
218EXPORT_SYMBOL(inet_twsk_deschedule_put); 220EXPORT_SYMBOL(inet_twsk_deschedule_put);
219 221
220void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) 222void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
221{ 223{
222 /* timeout := RTO * 3.5 224 /* timeout := RTO * 3.5
223 * 225 *
@@ -245,12 +247,14 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
245 */ 247 */
246 248
247 tw->tw_kill = timeo <= 4*HZ; 249 tw->tw_kill = timeo <= 4*HZ;
248 if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) { 250 if (!rearm) {
249 atomic_inc(&tw->tw_refcnt); 251 BUG_ON(mod_timer_pinned(&tw->tw_timer, jiffies + timeo));
250 atomic_inc(&tw->tw_dr->tw_count); 252 atomic_inc(&tw->tw_dr->tw_count);
253 } else {
254 mod_timer_pending(&tw->tw_timer, jiffies + timeo);
251 } 255 }
252} 256}
253EXPORT_SYMBOL_GPL(inet_twsk_schedule); 257EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
254 258
255void inet_twsk_purge(struct inet_hashinfo *hashinfo, 259void inet_twsk_purge(struct inet_hashinfo *hashinfo,
256 struct inet_timewait_death_row *twdr, int family) 260 struct inet_timewait_death_row *twdr, int family)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 29ed6c5a5185..84dce6a92f93 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,12 +46,13 @@
46#include <net/net_namespace.h> 46#include <net/net_namespace.h>
47#include <net/netns/generic.h> 47#include <net/netns/generic.h>
48#include <net/rtnetlink.h> 48#include <net/rtnetlink.h>
49#include <net/dst_metadata.h>
49 50
50int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, 51int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
51 __be32 src, __be32 dst, __u8 proto, 52 __be32 src, __be32 dst, __u8 proto,
52 __u8 tos, __u8 ttl, __be16 df, bool xnet) 53 __u8 tos, __u8 ttl, __be16 df, bool xnet)
53{ 54{
54 int pkt_len = skb->len; 55 int pkt_len = skb->len - skb_inner_network_offset(skb);
55 struct iphdr *iph; 56 struct iphdr *iph;
56 int err; 57 int err;
57 58
@@ -119,6 +120,33 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
119} 120}
120EXPORT_SYMBOL_GPL(iptunnel_pull_header); 121EXPORT_SYMBOL_GPL(iptunnel_pull_header);
121 122
123struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
124 gfp_t flags)
125{
126 struct metadata_dst *res;
127 struct ip_tunnel_info *dst, *src;
128
129 if (!md || md->u.tun_info.mode & IP_TUNNEL_INFO_TX)
130 return NULL;
131
132 res = metadata_dst_alloc(0, flags);
133 if (!res)
134 return NULL;
135
136 dst = &res->u.tun_info;
137 src = &md->u.tun_info;
138 dst->key.tun_id = src->key.tun_id;
139 if (src->mode & IP_TUNNEL_INFO_IPV6)
140 memcpy(&dst->key.u.ipv6.dst, &src->key.u.ipv6.src,
141 sizeof(struct in6_addr));
142 else
143 dst->key.u.ipv4.dst = src->key.u.ipv4.src;
144 dst->mode = src->mode | IP_TUNNEL_INFO_TX;
145
146 return res;
147}
148EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
149
122struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, 150struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
123 bool csum_help, 151 bool csum_help,
124 int gso_type_mask) 152 int gso_type_mask)
@@ -198,8 +226,6 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
198 [LWTUNNEL_IP_SRC] = { .type = NLA_U32 }, 226 [LWTUNNEL_IP_SRC] = { .type = NLA_U32 },
199 [LWTUNNEL_IP_TTL] = { .type = NLA_U8 }, 227 [LWTUNNEL_IP_TTL] = { .type = NLA_U8 },
200 [LWTUNNEL_IP_TOS] = { .type = NLA_U8 }, 228 [LWTUNNEL_IP_TOS] = { .type = NLA_U8 },
201 [LWTUNNEL_IP_SPORT] = { .type = NLA_U16 },
202 [LWTUNNEL_IP_DPORT] = { .type = NLA_U16 },
203 [LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 }, 229 [LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 },
204}; 230};
205 231
@@ -239,12 +265,6 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr,
239 if (tb[LWTUNNEL_IP_TOS]) 265 if (tb[LWTUNNEL_IP_TOS])
240 tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); 266 tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
241 267
242 if (tb[LWTUNNEL_IP_SPORT])
243 tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP_SPORT]);
244
245 if (tb[LWTUNNEL_IP_DPORT])
246 tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP_DPORT]);
247
248 if (tb[LWTUNNEL_IP_FLAGS]) 268 if (tb[LWTUNNEL_IP_FLAGS])
249 tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]); 269 tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]);
250 270
@@ -266,8 +286,6 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
266 nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || 286 nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
267 nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || 287 nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
268 nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || 288 nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
269 nla_put_u16(skb, LWTUNNEL_IP_SPORT, tun_info->key.tp_src) ||
270 nla_put_u16(skb, LWTUNNEL_IP_DPORT, tun_info->key.tp_dst) ||
271 nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) 289 nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
272 return -ENOMEM; 290 return -ENOMEM;
273 291
@@ -281,8 +299,6 @@ static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
281 + nla_total_size(4) /* LWTUNNEL_IP_SRC */ 299 + nla_total_size(4) /* LWTUNNEL_IP_SRC */
282 + nla_total_size(1) /* LWTUNNEL_IP_TOS */ 300 + nla_total_size(1) /* LWTUNNEL_IP_TOS */
283 + nla_total_size(1) /* LWTUNNEL_IP_TTL */ 301 + nla_total_size(1) /* LWTUNNEL_IP_TTL */
284 + nla_total_size(2) /* LWTUNNEL_IP_SPORT */
285 + nla_total_size(2) /* LWTUNNEL_IP_DPORT */
286 + nla_total_size(2); /* LWTUNNEL_IP_FLAGS */ 302 + nla_total_size(2); /* LWTUNNEL_IP_FLAGS */
287} 303}
288 304
@@ -305,8 +321,6 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
305 [LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) }, 321 [LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) },
306 [LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 }, 322 [LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 },
307 [LWTUNNEL_IP6_TC] = { .type = NLA_U8 }, 323 [LWTUNNEL_IP6_TC] = { .type = NLA_U8 },
308 [LWTUNNEL_IP6_SPORT] = { .type = NLA_U16 },
309 [LWTUNNEL_IP6_DPORT] = { .type = NLA_U16 },
310 [LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 }, 324 [LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 },
311}; 325};
312 326
@@ -346,12 +360,6 @@ static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr,
346 if (tb[LWTUNNEL_IP6_TC]) 360 if (tb[LWTUNNEL_IP6_TC])
347 tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); 361 tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
348 362
349 if (tb[LWTUNNEL_IP6_SPORT])
350 tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP6_SPORT]);
351
352 if (tb[LWTUNNEL_IP6_DPORT])
353 tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP6_DPORT]);
354
355 if (tb[LWTUNNEL_IP6_FLAGS]) 363 if (tb[LWTUNNEL_IP6_FLAGS])
356 tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]); 364 tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]);
357 365
@@ -373,8 +381,6 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb,
373 nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || 381 nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
374 nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) || 382 nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) ||
375 nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) || 383 nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) ||
376 nla_put_u16(skb, LWTUNNEL_IP6_SPORT, tun_info->key.tp_src) ||
377 nla_put_u16(skb, LWTUNNEL_IP6_DPORT, tun_info->key.tp_dst) ||
378 nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) 384 nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
379 return -ENOMEM; 385 return -ENOMEM;
380 386
@@ -388,8 +394,6 @@ static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
388 + nla_total_size(16) /* LWTUNNEL_IP6_SRC */ 394 + nla_total_size(16) /* LWTUNNEL_IP6_SRC */
389 + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */ 395 + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */
390 + nla_total_size(1) /* LWTUNNEL_IP6_TC */ 396 + nla_total_size(1) /* LWTUNNEL_IP6_TC */
391 + nla_total_size(2) /* LWTUNNEL_IP6_SPORT */
392 + nla_total_size(2) /* LWTUNNEL_IP6_DPORT */
393 + nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */ 397 + nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */
394} 398}
395 399
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f7afcba8b1a1..6bab84503cd9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2036,6 +2036,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2036 struct fib_result res; 2036 struct fib_result res;
2037 struct rtable *rth; 2037 struct rtable *rth;
2038 int orig_oif; 2038 int orig_oif;
2039 int err = -ENETUNREACH;
2039 2040
2040 res.tclassid = 0; 2041 res.tclassid = 0;
2041 res.fi = NULL; 2042 res.fi = NULL;
@@ -2144,7 +2145,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2144 goto make_route; 2145 goto make_route;
2145 } 2146 }
2146 2147
2147 if (fib_lookup(net, fl4, &res, 0)) { 2148 err = fib_lookup(net, fl4, &res, 0);
2149 if (err) {
2148 res.fi = NULL; 2150 res.fi = NULL;
2149 res.table = NULL; 2151 res.table = NULL;
2150 if (fl4->flowi4_oif) { 2152 if (fl4->flowi4_oif) {
@@ -2172,7 +2174,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
2172 res.type = RTN_UNICAST; 2174 res.type = RTN_UNICAST;
2173 goto make_route; 2175 goto make_route;
2174 } 2176 }
2175 rth = ERR_PTR(-ENETUNREACH); 2177 rth = ERR_PTR(err);
2176 goto out; 2178 goto out;
2177 } 2179 }
2178 2180
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index c6ded6b2a79f..448c2615fece 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -154,14 +154,20 @@ static void bictcp_init(struct sock *sk)
154static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) 154static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
155{ 155{
156 if (event == CA_EVENT_TX_START) { 156 if (event == CA_EVENT_TX_START) {
157 s32 delta = tcp_time_stamp - tcp_sk(sk)->lsndtime;
158 struct bictcp *ca = inet_csk_ca(sk); 157 struct bictcp *ca = inet_csk_ca(sk);
158 u32 now = tcp_time_stamp;
159 s32 delta;
160
161 delta = now - tcp_sk(sk)->lsndtime;
159 162
160 /* We were application limited (idle) for a while. 163 /* We were application limited (idle) for a while.
161 * Shift epoch_start to keep cwnd growth to cubic curve. 164 * Shift epoch_start to keep cwnd growth to cubic curve.
162 */ 165 */
163 if (ca->epoch_start && delta > 0) 166 if (ca->epoch_start && delta > 0) {
164 ca->epoch_start += delta; 167 ca->epoch_start += delta;
168 if (after(ca->epoch_start, now))
169 ca->epoch_start = now;
170 }
165 return; 171 return;
166 } 172 }
167} 173}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e0a87c238882..e4fe62b6b106 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -162,9 +162,9 @@ kill_with_rst:
162 if (tcp_death_row.sysctl_tw_recycle && 162 if (tcp_death_row.sysctl_tw_recycle &&
163 tcptw->tw_ts_recent_stamp && 163 tcptw->tw_ts_recent_stamp &&
164 tcp_tw_remember_stamp(tw)) 164 tcp_tw_remember_stamp(tw))
165 inet_twsk_schedule(tw, tw->tw_timeout); 165 inet_twsk_reschedule(tw, tw->tw_timeout);
166 else 166 else
167 inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); 167 inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
168 return TCP_TW_ACK; 168 return TCP_TW_ACK;
169 } 169 }
170 170
@@ -201,7 +201,7 @@ kill:
201 return TCP_TW_SUCCESS; 201 return TCP_TW_SUCCESS;
202 } 202 }
203 } 203 }
204 inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); 204 inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
205 205
206 if (tmp_opt.saw_tstamp) { 206 if (tmp_opt.saw_tstamp) {
207 tcptw->tw_ts_recent = tmp_opt.rcv_tsval; 207 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
@@ -251,7 +251,7 @@ kill:
251 * Do not reschedule in the last case. 251 * Do not reschedule in the last case.
252 */ 252 */
253 if (paws_reject || th->ack) 253 if (paws_reject || th->ack)
254 inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); 254 inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
255 255
256 return tcp_timewait_check_oow_rate_limit( 256 return tcp_timewait_check_oow_rate_limit(
257 tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); 257 tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
@@ -322,9 +322,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
322 } while (0); 322 } while (0);
323#endif 323#endif
324 324
325 /* Linkage updates. */
326 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
327
328 /* Get the TIME_WAIT timeout firing. */ 325 /* Get the TIME_WAIT timeout firing. */
329 if (timeo < rto) 326 if (timeo < rto)
330 timeo = rto; 327 timeo = rto;
@@ -338,6 +335,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
338 } 335 }
339 336
340 inet_twsk_schedule(tw, timeo); 337 inet_twsk_schedule(tw, timeo);
338 /* Linkage updates. */
339 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
341 inet_twsk_put(tw); 340 inet_twsk_put(tw);
342 } else { 341 } else {
343 /* Sorry, if we're out of memory, just CLOSE this 342 /* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 53ce6cf55598..9e53dd9bfcad 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2893,6 +2893,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2893 skb_reserve(skb, MAX_TCP_HEADER); 2893 skb_reserve(skb, MAX_TCP_HEADER);
2894 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), 2894 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2895 TCPHDR_ACK | TCPHDR_RST); 2895 TCPHDR_ACK | TCPHDR_RST);
2896 skb_mstamp_get(&skb->skb_mstamp);
2896 /* Send it off. */ 2897 /* Send it off. */
2897 if (tcp_transmit_skb(sk, skb, 0, priority)) 2898 if (tcp_transmit_skb(sk, skb, 0, priority))
2898 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); 2899 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c0a15e7f359f..f7d1d5e19e95 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1024,7 +1024,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1024 if (netif_index_is_vrf(net, ipc.oif)) { 1024 if (netif_index_is_vrf(net, ipc.oif)) {
1025 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, 1025 flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
1026 RT_SCOPE_UNIVERSE, sk->sk_protocol, 1026 RT_SCOPE_UNIVERSE, sk->sk_protocol,
1027 (flow_flags | FLOWI_FLAG_VRFSRC), 1027 (flow_flags | FLOWI_FLAG_VRFSRC |
1028 FLOWI_FLAG_SKIP_NH_OIF),
1028 faddr, saddr, dport, 1029 faddr, saddr, dport,
1029 inet->inet_sport); 1030 inet->inet_sport);
1030 1031
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 671011055ad5..0304d1680ca2 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -33,6 +33,8 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
33 if (saddr) 33 if (saddr)
34 fl4->saddr = saddr->a4; 34 fl4->saddr = saddr->a4;
35 35
36 fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF;
37
36 rt = __ip_route_output_key(net, fl4); 38 rt = __ip_route_output_key(net, fl4);
37 if (!IS_ERR(rt)) 39 if (!IS_ERR(rt))
38 return &rt->dst; 40 return &rt->dst;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 759d28ad16b7..c8380f1876f1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5132,13 +5132,12 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
5132 5132
5133 rt = addrconf_get_prefix_route(&ifp->peer_addr, 128, 5133 rt = addrconf_get_prefix_route(&ifp->peer_addr, 128,
5134 ifp->idev->dev, 0, 0); 5134 ifp->idev->dev, 0, 0);
5135 if (rt && ip6_del_rt(rt)) 5135 if (rt)
5136 dst_free(&rt->dst); 5136 ip6_del_rt(rt);
5137 } 5137 }
5138 dst_hold(&ifp->rt->dst); 5138 dst_hold(&ifp->rt->dst);
5139 5139
5140 if (ip6_del_rt(ifp->rt)) 5140 ip6_del_rt(ifp->rt);
5141 dst_free(&ifp->rt->dst);
5142 5141
5143 rt_genid_bump_ipv6(net); 5142 rt_genid_bump_ipv6(net);
5144 break; 5143 break;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 418d9823692b..7d2e0023c72d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -155,6 +155,11 @@ static void node_free(struct fib6_node *fn)
155 kmem_cache_free(fib6_node_kmem, fn); 155 kmem_cache_free(fib6_node_kmem, fn);
156} 156}
157 157
158static void rt6_rcu_free(struct rt6_info *rt)
159{
160 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
161}
162
158static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) 163static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
159{ 164{
160 int cpu; 165 int cpu;
@@ -169,7 +174,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
169 ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); 174 ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
170 pcpu_rt = *ppcpu_rt; 175 pcpu_rt = *ppcpu_rt;
171 if (pcpu_rt) { 176 if (pcpu_rt) {
172 dst_free(&pcpu_rt->dst); 177 rt6_rcu_free(pcpu_rt);
173 *ppcpu_rt = NULL; 178 *ppcpu_rt = NULL;
174 } 179 }
175 } 180 }
@@ -181,7 +186,7 @@ static void rt6_release(struct rt6_info *rt)
181{ 186{
182 if (atomic_dec_and_test(&rt->rt6i_ref)) { 187 if (atomic_dec_and_test(&rt->rt6i_ref)) {
183 rt6_free_pcpu(rt); 188 rt6_free_pcpu(rt);
184 dst_free(&rt->dst); 189 rt6_rcu_free(rt);
185 } 190 }
186} 191}
187 192
@@ -846,7 +851,7 @@ add:
846 *ins = rt; 851 *ins = rt;
847 rt->rt6i_node = fn; 852 rt->rt6i_node = fn;
848 atomic_inc(&rt->rt6i_ref); 853 atomic_inc(&rt->rt6i_ref);
849 inet6_rt_notify(RTM_NEWROUTE, rt, info); 854 inet6_rt_notify(RTM_NEWROUTE, rt, info, 0);
850 info->nl_net->ipv6.rt6_stats->fib_rt_entries++; 855 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
851 856
852 if (!(fn->fn_flags & RTN_RTINFO)) { 857 if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -872,7 +877,7 @@ add:
872 rt->rt6i_node = fn; 877 rt->rt6i_node = fn;
873 rt->dst.rt6_next = iter->dst.rt6_next; 878 rt->dst.rt6_next = iter->dst.rt6_next;
874 atomic_inc(&rt->rt6i_ref); 879 atomic_inc(&rt->rt6i_ref);
875 inet6_rt_notify(RTM_NEWROUTE, rt, info); 880 inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
876 if (!(fn->fn_flags & RTN_RTINFO)) { 881 if (!(fn->fn_flags & RTN_RTINFO)) {
877 info->nl_net->ipv6.rt6_stats->fib_route_nodes++; 882 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
878 fn->fn_flags |= RTN_RTINFO; 883 fn->fn_flags |= RTN_RTINFO;
@@ -933,6 +938,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
933 int replace_required = 0; 938 int replace_required = 0;
934 int sernum = fib6_new_sernum(info->nl_net); 939 int sernum = fib6_new_sernum(info->nl_net);
935 940
941 if (WARN_ON_ONCE((rt->dst.flags & DST_NOCACHE) &&
942 !atomic_read(&rt->dst.__refcnt)))
943 return -EINVAL;
944
936 if (info->nlh) { 945 if (info->nlh) {
937 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) 946 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
938 allow_create = 0; 947 allow_create = 0;
@@ -1025,6 +1034,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
1025 fib6_start_gc(info->nl_net, rt); 1034 fib6_start_gc(info->nl_net, rt);
1026 if (!(rt->rt6i_flags & RTF_CACHE)) 1035 if (!(rt->rt6i_flags & RTF_CACHE))
1027 fib6_prune_clones(info->nl_net, pn); 1036 fib6_prune_clones(info->nl_net, pn);
1037 rt->dst.flags &= ~DST_NOCACHE;
1028 } 1038 }
1029 1039
1030out: 1040out:
@@ -1049,7 +1059,8 @@ out:
1049 atomic_inc(&pn->leaf->rt6i_ref); 1059 atomic_inc(&pn->leaf->rt6i_ref);
1050 } 1060 }
1051#endif 1061#endif
1052 dst_free(&rt->dst); 1062 if (!(rt->dst.flags & DST_NOCACHE))
1063 dst_free(&rt->dst);
1053 } 1064 }
1054 return err; 1065 return err;
1055 1066
@@ -1060,7 +1071,8 @@ out:
1060st_failure: 1071st_failure:
1061 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 1072 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
1062 fib6_repair_tree(info->nl_net, fn); 1073 fib6_repair_tree(info->nl_net, fn);
1063 dst_free(&rt->dst); 1074 if (!(rt->dst.flags & DST_NOCACHE))
1075 dst_free(&rt->dst);
1064 return err; 1076 return err;
1065#endif 1077#endif
1066} 1078}
@@ -1410,7 +1422,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1410 1422
1411 fib6_purge_rt(rt, fn, net); 1423 fib6_purge_rt(rt, fn, net);
1412 1424
1413 inet6_rt_notify(RTM_DELROUTE, rt, info); 1425 inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
1414 rt6_release(rt); 1426 rt6_release(rt);
1415} 1427}
1416 1428
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4038c694ec03..3c7b9310b33f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -404,13 +404,13 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
404 struct ipv6_tlv_tnl_enc_lim *tel; 404 struct ipv6_tlv_tnl_enc_lim *tel;
405 __u32 mtu; 405 __u32 mtu;
406 case ICMPV6_DEST_UNREACH: 406 case ICMPV6_DEST_UNREACH:
407 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", 407 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
408 t->parms.name); 408 t->parms.name);
409 break; 409 break;
410 case ICMPV6_TIME_EXCEED: 410 case ICMPV6_TIME_EXCEED:
411 if (code == ICMPV6_EXC_HOPLIMIT) { 411 if (code == ICMPV6_EXC_HOPLIMIT) {
412 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", 412 net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
413 t->parms.name); 413 t->parms.name);
414 } 414 }
415 break; 415 break;
416 case ICMPV6_PARAMPROB: 416 case ICMPV6_PARAMPROB:
@@ -421,12 +421,12 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
421 if (teli && teli == be32_to_cpu(info) - 2) { 421 if (teli && teli == be32_to_cpu(info) - 2) {
422 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 422 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
423 if (tel->encap_limit == 0) { 423 if (tel->encap_limit == 0) {
424 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", 424 net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
425 t->parms.name); 425 t->parms.name);
426 } 426 }
427 } else { 427 } else {
428 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", 428 net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
429 t->parms.name); 429 t->parms.name);
430 } 430 }
431 break; 431 break;
432 case ICMPV6_PKT_TOOBIG: 432 case ICMPV6_PKT_TOOBIG:
@@ -634,20 +634,20 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
634 } 634 }
635 635
636 if (!fl6->flowi6_mark) 636 if (!fl6->flowi6_mark)
637 dst = ip6_tnl_dst_check(tunnel); 637 dst = ip6_tnl_dst_get(tunnel);
638 638
639 if (!dst) { 639 if (!dst) {
640 ndst = ip6_route_output(net, NULL, fl6); 640 dst = ip6_route_output(net, NULL, fl6);
641 641
642 if (ndst->error) 642 if (dst->error)
643 goto tx_err_link_failure; 643 goto tx_err_link_failure;
644 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); 644 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
645 if (IS_ERR(ndst)) { 645 if (IS_ERR(dst)) {
646 err = PTR_ERR(ndst); 646 err = PTR_ERR(dst);
647 ndst = NULL; 647 dst = NULL;
648 goto tx_err_link_failure; 648 goto tx_err_link_failure;
649 } 649 }
650 dst = ndst; 650 ndst = dst;
651 } 651 }
652 652
653 tdev = dst->dev; 653 tdev = dst->dev;
@@ -702,12 +702,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
702 skb = new_skb; 702 skb = new_skb;
703 } 703 }
704 704
705 if (fl6->flowi6_mark) { 705 if (!fl6->flowi6_mark && ndst)
706 skb_dst_set(skb, dst); 706 ip6_tnl_dst_set(tunnel, ndst);
707 ndst = NULL; 707 skb_dst_set(skb, dst);
708 } else {
709 skb_dst_set_noref(skb, dst);
710 }
711 708
712 proto = NEXTHDR_GRE; 709 proto = NEXTHDR_GRE;
713 if (encap_limit >= 0) { 710 if (encap_limit >= 0) {
@@ -762,14 +759,12 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
762 skb_set_inner_protocol(skb, protocol); 759 skb_set_inner_protocol(skb, protocol);
763 760
764 ip6tunnel_xmit(NULL, skb, dev); 761 ip6tunnel_xmit(NULL, skb, dev);
765 if (ndst)
766 ip6_tnl_dst_store(tunnel, ndst);
767 return 0; 762 return 0;
768tx_err_link_failure: 763tx_err_link_failure:
769 stats->tx_carrier_errors++; 764 stats->tx_carrier_errors++;
770 dst_link_failure(skb); 765 dst_link_failure(skb);
771tx_err_dst_release: 766tx_err_dst_release:
772 dst_release(ndst); 767 dst_release(dst);
773 return err; 768 return err;
774} 769}
775 770
@@ -1223,6 +1218,9 @@ static const struct net_device_ops ip6gre_netdev_ops = {
1223 1218
1224static void ip6gre_dev_free(struct net_device *dev) 1219static void ip6gre_dev_free(struct net_device *dev)
1225{ 1220{
1221 struct ip6_tnl *t = netdev_priv(dev);
1222
1223 ip6_tnl_dst_destroy(t);
1226 free_percpu(dev->tstats); 1224 free_percpu(dev->tstats);
1227 free_netdev(dev); 1225 free_netdev(dev);
1228} 1226}
@@ -1245,9 +1243,10 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
1245 netif_keep_dst(dev); 1243 netif_keep_dst(dev);
1246} 1244}
1247 1245
1248static int ip6gre_tunnel_init(struct net_device *dev) 1246static int ip6gre_tunnel_init_common(struct net_device *dev)
1249{ 1247{
1250 struct ip6_tnl *tunnel; 1248 struct ip6_tnl *tunnel;
1249 int ret;
1251 1250
1252 tunnel = netdev_priv(dev); 1251 tunnel = netdev_priv(dev);
1253 1252
@@ -1255,16 +1254,37 @@ static int ip6gre_tunnel_init(struct net_device *dev)
1255 tunnel->net = dev_net(dev); 1254 tunnel->net = dev_net(dev);
1256 strcpy(tunnel->parms.name, dev->name); 1255 strcpy(tunnel->parms.name, dev->name);
1257 1256
1257 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1258 if (!dev->tstats)
1259 return -ENOMEM;
1260
1261 ret = ip6_tnl_dst_init(tunnel);
1262 if (ret) {
1263 free_percpu(dev->tstats);
1264 dev->tstats = NULL;
1265 return ret;
1266 }
1267
1268 return 0;
1269}
1270
1271static int ip6gre_tunnel_init(struct net_device *dev)
1272{
1273 struct ip6_tnl *tunnel;
1274 int ret;
1275
1276 ret = ip6gre_tunnel_init_common(dev);
1277 if (ret)
1278 return ret;
1279
1280 tunnel = netdev_priv(dev);
1281
1258 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); 1282 memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
1259 memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); 1283 memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
1260 1284
1261 if (ipv6_addr_any(&tunnel->parms.raddr)) 1285 if (ipv6_addr_any(&tunnel->parms.raddr))
1262 dev->header_ops = &ip6gre_header_ops; 1286 dev->header_ops = &ip6gre_header_ops;
1263 1287
1264 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1265 if (!dev->tstats)
1266 return -ENOMEM;
1267
1268 return 0; 1288 return 0;
1269} 1289}
1270 1290
@@ -1460,19 +1480,16 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
1460static int ip6gre_tap_init(struct net_device *dev) 1480static int ip6gre_tap_init(struct net_device *dev)
1461{ 1481{
1462 struct ip6_tnl *tunnel; 1482 struct ip6_tnl *tunnel;
1483 int ret;
1463 1484
1464 tunnel = netdev_priv(dev); 1485 ret = ip6gre_tunnel_init_common(dev);
1486 if (ret)
1487 return ret;
1465 1488
1466 tunnel->dev = dev; 1489 tunnel = netdev_priv(dev);
1467 tunnel->net = dev_net(dev);
1468 strcpy(tunnel->parms.name, dev->name);
1469 1490
1470 ip6gre_tnl_link_config(tunnel, 1); 1491 ip6gre_tnl_link_config(tunnel, 1);
1471 1492
1472 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1473 if (!dev->tstats)
1474 return -ENOMEM;
1475
1476 return 0; 1493 return 0;
1477} 1494}
1478 1495
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 60c565309d0a..a598fe2c0849 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -600,20 +600,22 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
600 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, 600 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
601 &ipv6_hdr(skb)->saddr); 601 &ipv6_hdr(skb)->saddr);
602 602
603 hroom = LL_RESERVED_SPACE(rt->dst.dev);
603 if (skb_has_frag_list(skb)) { 604 if (skb_has_frag_list(skb)) {
604 int first_len = skb_pagelen(skb); 605 int first_len = skb_pagelen(skb);
605 struct sk_buff *frag2; 606 struct sk_buff *frag2;
606 607
607 if (first_len - hlen > mtu || 608 if (first_len - hlen > mtu ||
608 ((first_len - hlen) & 7) || 609 ((first_len - hlen) & 7) ||
609 skb_cloned(skb)) 610 skb_cloned(skb) ||
611 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
610 goto slow_path; 612 goto slow_path;
611 613
612 skb_walk_frags(skb, frag) { 614 skb_walk_frags(skb, frag) {
613 /* Correct geometry. */ 615 /* Correct geometry. */
614 if (frag->len > mtu || 616 if (frag->len > mtu ||
615 ((frag->len & 7) && frag->next) || 617 ((frag->len & 7) && frag->next) ||
616 skb_headroom(frag) < hlen) 618 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
617 goto slow_path_clean; 619 goto slow_path_clean;
618 620
619 /* Partially cloned skb? */ 621 /* Partially cloned skb? */
@@ -630,8 +632,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
630 632
631 err = 0; 633 err = 0;
632 offset = 0; 634 offset = 0;
633 frag = skb_shinfo(skb)->frag_list;
634 skb_frag_list_init(skb);
635 /* BUILD HEADER */ 635 /* BUILD HEADER */
636 636
637 *prevhdr = NEXTHDR_FRAGMENT; 637 *prevhdr = NEXTHDR_FRAGMENT;
@@ -639,8 +639,11 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
639 if (!tmp_hdr) { 639 if (!tmp_hdr) {
640 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 640 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
641 IPSTATS_MIB_FRAGFAILS); 641 IPSTATS_MIB_FRAGFAILS);
642 return -ENOMEM; 642 err = -ENOMEM;
643 goto fail;
643 } 644 }
645 frag = skb_shinfo(skb)->frag_list;
646 skb_frag_list_init(skb);
644 647
645 __skb_pull(skb, hlen); 648 __skb_pull(skb, hlen);
646 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr)); 649 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
@@ -737,7 +740,6 @@ slow_path:
737 */ 740 */
738 741
739 *prevhdr = NEXTHDR_FRAGMENT; 742 *prevhdr = NEXTHDR_FRAGMENT;
740 hroom = LL_RESERVED_SPACE(rt->dst.dev);
741 troom = rt->dst.dev->needed_tailroom; 743 troom = rt->dst.dev->needed_tailroom;
742 744
743 /* 745 /*
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b0ab420612bc..eabffbb89795 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -126,36 +126,92 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
126 * Locking : hash tables are protected by RCU and RTNL 126 * Locking : hash tables are protected by RCU and RTNL
127 */ 127 */
128 128
129struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 129static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
130 struct dst_entry *dst)
130{ 131{
131 struct dst_entry *dst = t->dst_cache; 132 write_seqlock_bh(&idst->lock);
133 dst_release(rcu_dereference_protected(
134 idst->dst,
135 lockdep_is_held(&idst->lock.lock)));
136 if (dst) {
137 dst_hold(dst);
138 idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
139 } else {
140 idst->cookie = 0;
141 }
142 rcu_assign_pointer(idst->dst, dst);
143 write_sequnlock_bh(&idst->lock);
144}
145
146struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
147{
148 struct ip6_tnl_dst *idst;
149 struct dst_entry *dst;
150 unsigned int seq;
151 u32 cookie;
132 152
133 if (dst && dst->obsolete && 153 idst = raw_cpu_ptr(t->dst_cache);
134 !dst->ops->check(dst, t->dst_cookie)) { 154
135 t->dst_cache = NULL; 155 rcu_read_lock();
156 do {
157 seq = read_seqbegin(&idst->lock);
158 dst = rcu_dereference(idst->dst);
159 cookie = idst->cookie;
160 } while (read_seqretry(&idst->lock, seq));
161
162 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
163 dst = NULL;
164 rcu_read_unlock();
165
166 if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) {
167 ip6_tnl_per_cpu_dst_set(idst, NULL);
136 dst_release(dst); 168 dst_release(dst);
137 return NULL; 169 dst = NULL;
138 } 170 }
139
140 return dst; 171 return dst;
141} 172}
142EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); 173EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
143 174
144void ip6_tnl_dst_reset(struct ip6_tnl *t) 175void ip6_tnl_dst_reset(struct ip6_tnl *t)
145{ 176{
146 dst_release(t->dst_cache); 177 int i;
147 t->dst_cache = NULL; 178
179 for_each_possible_cpu(i)
180 ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
148} 181}
149EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); 182EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
150 183
151void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) 184void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
185{
186 ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
187
188}
189EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
190
191void ip6_tnl_dst_destroy(struct ip6_tnl *t)
152{ 192{
153 struct rt6_info *rt = (struct rt6_info *) dst; 193 if (!t->dst_cache)
154 t->dst_cookie = rt6_get_cookie(rt); 194 return;
155 dst_release(t->dst_cache); 195
156 t->dst_cache = dst; 196 ip6_tnl_dst_reset(t);
197 free_percpu(t->dst_cache);
157} 198}
158EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); 199EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
200
201int ip6_tnl_dst_init(struct ip6_tnl *t)
202{
203 int i;
204
205 t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
206 if (!t->dst_cache)
207 return -ENOMEM;
208
209 for_each_possible_cpu(i)
210 seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
211
212 return 0;
213}
214EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
159 215
160/** 216/**
161 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 217 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -271,6 +327,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
271 327
272static void ip6_dev_free(struct net_device *dev) 328static void ip6_dev_free(struct net_device *dev)
273{ 329{
330 struct ip6_tnl *t = netdev_priv(dev);
331
332 ip6_tnl_dst_destroy(t);
274 free_percpu(dev->tstats); 333 free_percpu(dev->tstats);
275 free_netdev(dev); 334 free_netdev(dev);
276} 335}
@@ -510,14 +569,14 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
510 struct ipv6_tlv_tnl_enc_lim *tel; 569 struct ipv6_tlv_tnl_enc_lim *tel;
511 __u32 mtu; 570 __u32 mtu;
512 case ICMPV6_DEST_UNREACH: 571 case ICMPV6_DEST_UNREACH:
513 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", 572 net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
514 t->parms.name); 573 t->parms.name);
515 rel_msg = 1; 574 rel_msg = 1;
516 break; 575 break;
517 case ICMPV6_TIME_EXCEED: 576 case ICMPV6_TIME_EXCEED:
518 if ((*code) == ICMPV6_EXC_HOPLIMIT) { 577 if ((*code) == ICMPV6_EXC_HOPLIMIT) {
519 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", 578 net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
520 t->parms.name); 579 t->parms.name);
521 rel_msg = 1; 580 rel_msg = 1;
522 } 581 }
523 break; 582 break;
@@ -529,13 +588,13 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
529 if (teli && teli == *info - 2) { 588 if (teli && teli == *info - 2) {
530 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 589 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
531 if (tel->encap_limit == 0) { 590 if (tel->encap_limit == 0) {
532 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", 591 net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
533 t->parms.name); 592 t->parms.name);
534 rel_msg = 1; 593 rel_msg = 1;
535 } 594 }
536 } else { 595 } else {
537 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", 596 net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
538 t->parms.name); 597 t->parms.name);
539 } 598 }
540 break; 599 break;
541 case ICMPV6_PKT_TOOBIG: 600 case ICMPV6_PKT_TOOBIG:
@@ -1010,23 +1069,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1010 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); 1069 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1011 neigh_release(neigh); 1070 neigh_release(neigh);
1012 } else if (!fl6->flowi6_mark) 1071 } else if (!fl6->flowi6_mark)
1013 dst = ip6_tnl_dst_check(t); 1072 dst = ip6_tnl_dst_get(t);
1014 1073
1015 if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) 1074 if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
1016 goto tx_err_link_failure; 1075 goto tx_err_link_failure;
1017 1076
1018 if (!dst) { 1077 if (!dst) {
1019 ndst = ip6_route_output(net, NULL, fl6); 1078 dst = ip6_route_output(net, NULL, fl6);
1020 1079
1021 if (ndst->error) 1080 if (dst->error)
1022 goto tx_err_link_failure; 1081 goto tx_err_link_failure;
1023 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); 1082 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
1024 if (IS_ERR(ndst)) { 1083 if (IS_ERR(dst)) {
1025 err = PTR_ERR(ndst); 1084 err = PTR_ERR(dst);
1026 ndst = NULL; 1085 dst = NULL;
1027 goto tx_err_link_failure; 1086 goto tx_err_link_failure;
1028 } 1087 }
1029 dst = ndst; 1088 ndst = dst;
1030 } 1089 }
1031 1090
1032 tdev = dst->dev; 1091 tdev = dst->dev;
@@ -1072,12 +1131,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1072 consume_skb(skb); 1131 consume_skb(skb);
1073 skb = new_skb; 1132 skb = new_skb;
1074 } 1133 }
1075 if (fl6->flowi6_mark) { 1134
1076 skb_dst_set(skb, dst); 1135 if (!fl6->flowi6_mark && ndst)
1077 ndst = NULL; 1136 ip6_tnl_dst_set(t, ndst);
1078 } else { 1137 skb_dst_set(skb, dst);
1079 skb_dst_set_noref(skb, dst); 1138
1080 }
1081 skb->transport_header = skb->network_header; 1139 skb->transport_header = skb->network_header;
1082 1140
1083 proto = fl6->flowi6_proto; 1141 proto = fl6->flowi6_proto;
@@ -1101,14 +1159,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1101 ipv6h->saddr = fl6->saddr; 1159 ipv6h->saddr = fl6->saddr;
1102 ipv6h->daddr = fl6->daddr; 1160 ipv6h->daddr = fl6->daddr;
1103 ip6tunnel_xmit(NULL, skb, dev); 1161 ip6tunnel_xmit(NULL, skb, dev);
1104 if (ndst)
1105 ip6_tnl_dst_store(t, ndst);
1106 return 0; 1162 return 0;
1107tx_err_link_failure: 1163tx_err_link_failure:
1108 stats->tx_carrier_errors++; 1164 stats->tx_carrier_errors++;
1109 dst_link_failure(skb); 1165 dst_link_failure(skb);
1110tx_err_dst_release: 1166tx_err_dst_release:
1111 dst_release(ndst); 1167 dst_release(dst);
1112 return err; 1168 return err;
1113} 1169}
1114 1170
@@ -1573,12 +1629,21 @@ static inline int
1573ip6_tnl_dev_init_gen(struct net_device *dev) 1629ip6_tnl_dev_init_gen(struct net_device *dev)
1574{ 1630{
1575 struct ip6_tnl *t = netdev_priv(dev); 1631 struct ip6_tnl *t = netdev_priv(dev);
1632 int ret;
1576 1633
1577 t->dev = dev; 1634 t->dev = dev;
1578 t->net = dev_net(dev); 1635 t->net = dev_net(dev);
1579 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1636 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1580 if (!dev->tstats) 1637 if (!dev->tstats)
1581 return -ENOMEM; 1638 return -ENOMEM;
1639
1640 ret = ip6_tnl_dst_init(t);
1641 if (ret) {
1642 free_percpu(dev->tstats);
1643 dev->tstats = NULL;
1644 return ret;
1645 }
1646
1582 return 0; 1647 return 0;
1583} 1648}
1584 1649
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 118f8fa1a809..6fbf6fdde7e7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1298,8 +1298,7 @@ static void ip6_link_failure(struct sk_buff *skb)
1298 if (rt) { 1298 if (rt) {
1299 if (rt->rt6i_flags & RTF_CACHE) { 1299 if (rt->rt6i_flags & RTF_CACHE) {
1300 dst_hold(&rt->dst); 1300 dst_hold(&rt->dst);
1301 if (ip6_del_rt(rt)) 1301 ip6_del_rt(rt);
1302 dst_free(&rt->dst);
1303 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { 1302 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1304 rt->rt6i_node->fn_sernum = -1; 1303 rt->rt6i_node->fn_sernum = -1;
1305 } 1304 }
@@ -1862,9 +1861,11 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1862 rt->dst.input = ip6_pkt_prohibit; 1861 rt->dst.input = ip6_pkt_prohibit;
1863 break; 1862 break;
1864 case RTN_THROW: 1863 case RTN_THROW:
1864 case RTN_UNREACHABLE:
1865 default: 1865 default:
1866 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN 1866 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1867 : -ENETUNREACH; 1867 : (cfg->fc_type == RTN_UNREACHABLE)
1868 ? -EHOSTUNREACH : -ENETUNREACH;
1868 rt->dst.output = ip6_pkt_discard_out; 1869 rt->dst.output = ip6_pkt_discard_out;
1869 rt->dst.input = ip6_pkt_discard; 1870 rt->dst.input = ip6_pkt_discard;
1870 break; 1871 break;
@@ -2004,7 +2005,8 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2004 struct fib6_table *table; 2005 struct fib6_table *table;
2005 struct net *net = dev_net(rt->dst.dev); 2006 struct net *net = dev_net(rt->dst.dev);
2006 2007
2007 if (rt == net->ipv6.ip6_null_entry) { 2008 if (rt == net->ipv6.ip6_null_entry ||
2009 rt->dst.flags & DST_NOCACHE) {
2008 err = -ENOENT; 2010 err = -ENOENT;
2009 goto out; 2011 goto out;
2010 } 2012 }
@@ -2491,6 +2493,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2491 rt->rt6i_dst.addr = *addr; 2493 rt->rt6i_dst.addr = *addr;
2492 rt->rt6i_dst.plen = 128; 2494 rt->rt6i_dst.plen = 128;
2493 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2495 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2496 rt->dst.flags |= DST_NOCACHE;
2494 2497
2495 atomic_set(&rt->dst.__refcnt, 1); 2498 atomic_set(&rt->dst.__refcnt, 1);
2496 2499
@@ -3279,7 +3282,8 @@ errout:
3279 return err; 3282 return err;
3280} 3283}
3281 3284
3282void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 3285void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3286 unsigned int nlm_flags)
3283{ 3287{
3284 struct sk_buff *skb; 3288 struct sk_buff *skb;
3285 struct net *net = info->nl_net; 3289 struct net *net = info->nl_net;
@@ -3294,7 +3298,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
3294 goto errout; 3298 goto errout;
3295 3299
3296 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 3300 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3297 event, info->portid, seq, 0, 0, 0); 3301 event, info->portid, seq, 0, 0, nlm_flags);
3298 if (err < 0) { 3302 if (err < 0) {
3299 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 3303 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3300 WARN_ON(err == -EMSGSIZE); 3304 WARN_ON(err == -EMSGSIZE);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 17b1fe961c5d..7a77a1470f25 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2474,6 +2474,7 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
2474 2474
2475 bss_conf->cqm_rssi_thold = rssi_thold; 2475 bss_conf->cqm_rssi_thold = rssi_thold;
2476 bss_conf->cqm_rssi_hyst = rssi_hyst; 2476 bss_conf->cqm_rssi_hyst = rssi_hyst;
2477 sdata->u.mgd.last_cqm_event_signal = 0;
2477 2478
2478 /* tell the driver upon association, unless already associated */ 2479 /* tell the driver upon association, unless already associated */
2479 if (sdata->u.mgd.associated && 2480 if (sdata->u.mgd.associated &&
@@ -2518,15 +2519,17 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
2518 continue; 2519 continue;
2519 2520
2520 for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) { 2521 for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) {
2521 if (~sdata->rc_rateidx_mcs_mask[i][j]) 2522 if (~sdata->rc_rateidx_mcs_mask[i][j]) {
2522 sdata->rc_has_mcs_mask[i] = true; 2523 sdata->rc_has_mcs_mask[i] = true;
2524 break;
2525 }
2526 }
2523 2527
2524 if (~sdata->rc_rateidx_vht_mcs_mask[i][j]) 2528 for (j = 0; j < NL80211_VHT_NSS_MAX; j++) {
2529 if (~sdata->rc_rateidx_vht_mcs_mask[i][j]) {
2525 sdata->rc_has_vht_mcs_mask[i] = true; 2530 sdata->rc_has_vht_mcs_mask[i] = true;
2526
2527 if (sdata->rc_has_mcs_mask[i] &&
2528 sdata->rc_has_vht_mcs_mask[i])
2529 break; 2531 break;
2532 }
2530 } 2533 }
2531 } 2534 }
2532 2535
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 675d12c69e32..a5d41dfa9f05 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -107,12 +107,17 @@ EXPORT_SYMBOL(nf_log_register);
107 107
108void nf_log_unregister(struct nf_logger *logger) 108void nf_log_unregister(struct nf_logger *logger)
109{ 109{
110 const struct nf_logger *log;
110 int i; 111 int i;
111 112
112 mutex_lock(&nf_log_mutex); 113 mutex_lock(&nf_log_mutex);
113 for (i = 0; i < NFPROTO_NUMPROTO; i++) 114 for (i = 0; i < NFPROTO_NUMPROTO; i++) {
114 RCU_INIT_POINTER(loggers[i][logger->type], NULL); 115 log = nft_log_dereference(loggers[i][logger->type]);
116 if (log == logger)
117 RCU_INIT_POINTER(loggers[i][logger->type], NULL);
118 }
115 mutex_unlock(&nf_log_mutex); 119 mutex_unlock(&nf_log_mutex);
120 synchronize_rcu();
116} 121}
117EXPORT_SYMBOL(nf_log_unregister); 122EXPORT_SYMBOL(nf_log_unregister);
118 123
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 66def315eb56..9c8fab00164b 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -619,6 +619,13 @@ struct nft_xt {
619 619
620static struct nft_expr_type nft_match_type; 620static struct nft_expr_type nft_match_type;
621 621
622static bool nft_match_cmp(const struct xt_match *match,
623 const char *name, u32 rev, u32 family)
624{
625 return strcmp(match->name, name) == 0 && match->revision == rev &&
626 (match->family == NFPROTO_UNSPEC || match->family == family);
627}
628
622static const struct nft_expr_ops * 629static const struct nft_expr_ops *
623nft_match_select_ops(const struct nft_ctx *ctx, 630nft_match_select_ops(const struct nft_ctx *ctx,
624 const struct nlattr * const tb[]) 631 const struct nlattr * const tb[])
@@ -626,7 +633,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
626 struct nft_xt *nft_match; 633 struct nft_xt *nft_match;
627 struct xt_match *match; 634 struct xt_match *match;
628 char *mt_name; 635 char *mt_name;
629 __u32 rev, family; 636 u32 rev, family;
630 637
631 if (tb[NFTA_MATCH_NAME] == NULL || 638 if (tb[NFTA_MATCH_NAME] == NULL ||
632 tb[NFTA_MATCH_REV] == NULL || 639 tb[NFTA_MATCH_REV] == NULL ||
@@ -641,8 +648,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
641 list_for_each_entry(nft_match, &nft_match_list, head) { 648 list_for_each_entry(nft_match, &nft_match_list, head) {
642 struct xt_match *match = nft_match->ops.data; 649 struct xt_match *match = nft_match->ops.data;
643 650
644 if (strcmp(match->name, mt_name) == 0 && 651 if (nft_match_cmp(match, mt_name, rev, family)) {
645 match->revision == rev && match->family == family) {
646 if (!try_module_get(match->me)) 652 if (!try_module_get(match->me))
647 return ERR_PTR(-ENOENT); 653 return ERR_PTR(-ENOENT);
648 654
@@ -693,6 +699,13 @@ static LIST_HEAD(nft_target_list);
693 699
694static struct nft_expr_type nft_target_type; 700static struct nft_expr_type nft_target_type;
695 701
702static bool nft_target_cmp(const struct xt_target *tg,
703 const char *name, u32 rev, u32 family)
704{
705 return strcmp(tg->name, name) == 0 && tg->revision == rev &&
706 (tg->family == NFPROTO_UNSPEC || tg->family == family);
707}
708
696static const struct nft_expr_ops * 709static const struct nft_expr_ops *
697nft_target_select_ops(const struct nft_ctx *ctx, 710nft_target_select_ops(const struct nft_ctx *ctx,
698 const struct nlattr * const tb[]) 711 const struct nlattr * const tb[])
@@ -700,7 +713,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
700 struct nft_xt *nft_target; 713 struct nft_xt *nft_target;
701 struct xt_target *target; 714 struct xt_target *target;
702 char *tg_name; 715 char *tg_name;
703 __u32 rev, family; 716 u32 rev, family;
704 717
705 if (tb[NFTA_TARGET_NAME] == NULL || 718 if (tb[NFTA_TARGET_NAME] == NULL ||
706 tb[NFTA_TARGET_REV] == NULL || 719 tb[NFTA_TARGET_REV] == NULL ||
@@ -715,8 +728,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
715 list_for_each_entry(nft_target, &nft_target_list, head) { 728 list_for_each_entry(nft_target, &nft_target_list, head) {
716 struct xt_target *target = nft_target->ops.data; 729 struct xt_target *target = nft_target->ops.data;
717 730
718 if (strcmp(target->name, tg_name) == 0 && 731 if (nft_target_cmp(target, tg_name, rev, family)) {
719 target->revision == rev && target->family == family) {
720 if (!try_module_get(target->me)) 732 if (!try_module_get(target->me))
721 return ERR_PTR(-ENOENT); 733 return ERR_PTR(-ENOENT);
722 734
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7f86d3b55060..8f060d7f9a0e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -125,6 +125,24 @@ static inline u32 netlink_group_mask(u32 group)
125 return group ? 1 << (group - 1) : 0; 125 return group ? 1 << (group - 1) : 0;
126} 126}
127 127
128static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
129 gfp_t gfp_mask)
130{
131 unsigned int len = skb_end_offset(skb);
132 struct sk_buff *new;
133
134 new = alloc_skb(len, gfp_mask);
135 if (new == NULL)
136 return NULL;
137
138 NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
139 NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
140 NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
141
142 memcpy(skb_put(new, len), skb->data, len);
143 return new;
144}
145
128int netlink_add_tap(struct netlink_tap *nt) 146int netlink_add_tap(struct netlink_tap *nt)
129{ 147{
130 if (unlikely(nt->dev->type != ARPHRD_NETLINK)) 148 if (unlikely(nt->dev->type != ARPHRD_NETLINK))
@@ -206,7 +224,11 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
206 int ret = -ENOMEM; 224 int ret = -ENOMEM;
207 225
208 dev_hold(dev); 226 dev_hold(dev);
209 nskb = skb_clone(skb, GFP_ATOMIC); 227
228 if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
229 nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
230 else
231 nskb = skb_clone(skb, GFP_ATOMIC);
210 if (nskb) { 232 if (nskb) {
211 nskb->dev = dev; 233 nskb->dev = dev;
212 nskb->protocol = htons((u16) sk->sk_protocol); 234 nskb->protocol = htons((u16) sk->sk_protocol);
@@ -279,11 +301,6 @@ static void netlink_rcv_wake(struct sock *sk)
279} 301}
280 302
281#ifdef CONFIG_NETLINK_MMAP 303#ifdef CONFIG_NETLINK_MMAP
282static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
283{
284 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
285}
286
287static bool netlink_rx_is_mmaped(struct sock *sk) 304static bool netlink_rx_is_mmaped(struct sock *sk)
288{ 305{
289 return nlk_sk(sk)->rx_ring.pg_vec != NULL; 306 return nlk_sk(sk)->rx_ring.pg_vec != NULL;
@@ -846,7 +863,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
846} 863}
847 864
848#else /* CONFIG_NETLINK_MMAP */ 865#else /* CONFIG_NETLINK_MMAP */
849#define netlink_skb_is_mmaped(skb) false
850#define netlink_rx_is_mmaped(sk) false 866#define netlink_rx_is_mmaped(sk) false
851#define netlink_tx_is_mmaped(sk) false 867#define netlink_tx_is_mmaped(sk) false
852#define netlink_mmap sock_no_mmap 868#define netlink_mmap sock_no_mmap
@@ -1094,8 +1110,8 @@ static int netlink_insert(struct sock *sk, u32 portid)
1094 1110
1095 lock_sock(sk); 1111 lock_sock(sk);
1096 1112
1097 err = -EBUSY; 1113 err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY;
1098 if (nlk_sk(sk)->portid) 1114 if (nlk_sk(sk)->bound)
1099 goto err; 1115 goto err;
1100 1116
1101 err = -ENOMEM; 1117 err = -ENOMEM;
@@ -1115,10 +1131,14 @@ static int netlink_insert(struct sock *sk, u32 portid)
1115 err = -EOVERFLOW; 1131 err = -EOVERFLOW;
1116 if (err == -EEXIST) 1132 if (err == -EEXIST)
1117 err = -EADDRINUSE; 1133 err = -EADDRINUSE;
1118 nlk_sk(sk)->portid = 0;
1119 sock_put(sk); 1134 sock_put(sk);
1135 goto err;
1120 } 1136 }
1121 1137
1138 /* We need to ensure that the socket is hashed and visible. */
1139 smp_wmb();
1140 nlk_sk(sk)->bound = portid;
1141
1122err: 1142err:
1123 release_sock(sk); 1143 release_sock(sk);
1124 return err; 1144 return err;
@@ -1503,6 +1523,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
1503 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; 1523 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1504 int err; 1524 int err;
1505 long unsigned int groups = nladdr->nl_groups; 1525 long unsigned int groups = nladdr->nl_groups;
1526 bool bound;
1506 1527
1507 if (addr_len < sizeof(struct sockaddr_nl)) 1528 if (addr_len < sizeof(struct sockaddr_nl))
1508 return -EINVAL; 1529 return -EINVAL;
@@ -1519,9 +1540,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
1519 return err; 1540 return err;
1520 } 1541 }
1521 1542
1522 if (nlk->portid) 1543 bound = nlk->bound;
1544 if (bound) {
1545 /* Ensure nlk->portid is up-to-date. */
1546 smp_rmb();
1547
1523 if (nladdr->nl_pid != nlk->portid) 1548 if (nladdr->nl_pid != nlk->portid)
1524 return -EINVAL; 1549 return -EINVAL;
1550 }
1525 1551
1526 if (nlk->netlink_bind && groups) { 1552 if (nlk->netlink_bind && groups) {
1527 int group; 1553 int group;
@@ -1537,7 +1563,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
1537 } 1563 }
1538 } 1564 }
1539 1565
1540 if (!nlk->portid) { 1566 /* No need for barriers here as we return to user-space without
1567 * using any of the bound attributes.
1568 */
1569 if (!bound) {
1541 err = nladdr->nl_pid ? 1570 err = nladdr->nl_pid ?
1542 netlink_insert(sk, nladdr->nl_pid) : 1571 netlink_insert(sk, nladdr->nl_pid) :
1543 netlink_autobind(sock); 1572 netlink_autobind(sock);
@@ -1585,7 +1614,10 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
1585 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) 1614 !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
1586 return -EPERM; 1615 return -EPERM;
1587 1616
1588 if (!nlk->portid) 1617 /* No need for barriers here as we return to user-space without
1618 * using any of the bound attributes.
1619 */
1620 if (!nlk->bound)
1589 err = netlink_autobind(sock); 1621 err = netlink_autobind(sock);
1590 1622
1591 if (err == 0) { 1623 if (err == 0) {
@@ -2426,10 +2458,13 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2426 dst_group = nlk->dst_group; 2458 dst_group = nlk->dst_group;
2427 } 2459 }
2428 2460
2429 if (!nlk->portid) { 2461 if (!nlk->bound) {
2430 err = netlink_autobind(sock); 2462 err = netlink_autobind(sock);
2431 if (err) 2463 if (err)
2432 goto out; 2464 goto out;
2465 } else {
2466 /* Ensure nlk is hashed and visible. */
2467 smp_rmb();
2433 } 2468 }
2434 2469
2435 /* It's a really convoluted way for userland to ask for mmaped 2470 /* It's a really convoluted way for userland to ask for mmaped
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 89008405d6b4..14437d9b1965 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -35,6 +35,7 @@ struct netlink_sock {
35 unsigned long state; 35 unsigned long state;
36 size_t max_recvmsg_len; 36 size_t max_recvmsg_len;
37 wait_queue_head_t wait; 37 wait_queue_head_t wait;
38 bool bound;
38 bool cb_running; 39 bool cb_running;
39 struct netlink_callback cb; 40 struct netlink_callback cb;
40 struct mutex *cb_mutex; 41 struct mutex *cb_mutex;
@@ -59,6 +60,15 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
59 return container_of(sk, struct netlink_sock, sk); 60 return container_of(sk, struct netlink_sock, sk);
60} 61}
61 62
63static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
64{
65#ifdef CONFIG_NETLINK_MMAP
66 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
67#else
68 return false;
69#endif /* CONFIG_NETLINK_MMAP */
70}
71
62struct netlink_table { 72struct netlink_table {
63 struct rhashtable hash; 73 struct rhashtable hash;
64 struct hlist_head mc_list; 74 struct hlist_head mc_list;
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 2a071f470d57..d143aa9f6654 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -5,7 +5,8 @@
5config OPENVSWITCH 5config OPENVSWITCH
6 tristate "Open vSwitch" 6 tristate "Open vSwitch"
7 depends on INET 7 depends on INET
8 depends on (!NF_CONNTRACK || NF_CONNTRACK) 8 depends on !NF_CONNTRACK || \
9 (NF_CONNTRACK && (!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6))
9 select LIBCRC32C 10 select LIBCRC32C
10 select MPLS 11 select MPLS
11 select NET_MPLS_GSO 12 select NET_MPLS_GSO
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index aaf5cbd6d9ae..eb759e3a88ca 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -275,13 +275,15 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
275 case NFPROTO_IPV6: { 275 case NFPROTO_IPV6: {
276 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 276 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
277 __be16 frag_off; 277 __be16 frag_off;
278 int ofs;
278 279
279 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), 280 ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
280 &nexthdr, &frag_off); 281 &frag_off);
281 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 282 if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
282 pr_debug("proto header not found\n"); 283 pr_debug("proto header not found\n");
283 return NF_ACCEPT; 284 return NF_ACCEPT;
284 } 285 }
286 protoff = ofs;
285 break; 287 break;
286 } 288 }
287 default: 289 default:
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 2913594c5123..a75828091e21 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -951,7 +951,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
951 if (error) 951 if (error)
952 goto err_kfree_flow; 952 goto err_kfree_flow;
953 953
954 ovs_flow_mask_key(&new_flow->key, &key, &mask); 954 ovs_flow_mask_key(&new_flow->key, &key, true, &mask);
955 955
956 /* Extract flow identifier. */ 956 /* Extract flow identifier. */
957 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], 957 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
@@ -1079,7 +1079,7 @@ static struct sw_flow_actions *get_flow_actions(struct net *net,
1079 struct sw_flow_key masked_key; 1079 struct sw_flow_key masked_key;
1080 int error; 1080 int error;
1081 1081
1082 ovs_flow_mask_key(&masked_key, key, mask); 1082 ovs_flow_mask_key(&masked_key, key, true, mask);
1083 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log); 1083 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1084 if (error) { 1084 if (error) {
1085 OVS_NLERR(log, 1085 OVS_NLERR(log,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c92d6a262bc5..5c030a4d7338 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -57,6 +57,7 @@ struct ovs_len_tbl {
57}; 57};
58 58
59#define OVS_ATTR_NESTED -1 59#define OVS_ATTR_NESTED -1
60#define OVS_ATTR_VARIABLE -2
60 61
61static void update_range(struct sw_flow_match *match, 62static void update_range(struct sw_flow_match *match,
62 size_t offset, size_t size, bool is_mask) 63 size_t offset, size_t size, bool is_mask)
@@ -304,6 +305,10 @@ size_t ovs_key_attr_size(void)
304 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 305 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
305} 306}
306 307
308static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
309 [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) },
310};
311
307static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 312static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
308 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, 313 [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) },
309 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, 314 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) },
@@ -315,8 +320,9 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
315 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, 320 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) },
316 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, 321 [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) },
317 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, 322 [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 },
318 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, 323 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE },
319 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, 324 [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED,
325 .next = ovs_vxlan_ext_key_lens },
320}; 326};
321 327
322/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 328/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -349,6 +355,13 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
349 [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) }, 355 [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) },
350}; 356};
351 357
358static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
359{
360 return expected_len == attr_len ||
361 expected_len == OVS_ATTR_NESTED ||
362 expected_len == OVS_ATTR_VARIABLE;
363}
364
352static bool is_all_zero(const u8 *fp, size_t size) 365static bool is_all_zero(const u8 *fp, size_t size)
353{ 366{
354 int i; 367 int i;
@@ -388,7 +401,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
388 } 401 }
389 402
390 expected_len = ovs_key_lens[type].len; 403 expected_len = ovs_key_lens[type].len;
391 if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { 404 if (!check_attr_len(nla_len(nla), expected_len)) {
392 OVS_NLERR(log, "Key %d has unexpected len %d expected %d", 405 OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
393 type, nla_len(nla), expected_len); 406 type, nla_len(nla), expected_len);
394 return -EINVAL; 407 return -EINVAL;
@@ -473,29 +486,50 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
473 return 0; 486 return 0;
474} 487}
475 488
476static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { 489static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
477 [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 },
478};
479
480static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
481 struct sw_flow_match *match, bool is_mask, 490 struct sw_flow_match *match, bool is_mask,
482 bool log) 491 bool log)
483{ 492{
484 struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; 493 struct nlattr *a;
494 int rem;
485 unsigned long opt_key_offset; 495 unsigned long opt_key_offset;
486 struct vxlan_metadata opts; 496 struct vxlan_metadata opts;
487 int err;
488 497
489 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); 498 BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
490 499
491 err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
492 if (err < 0)
493 return err;
494
495 memset(&opts, 0, sizeof(opts)); 500 memset(&opts, 0, sizeof(opts));
501 nla_for_each_nested(a, attr, rem) {
502 int type = nla_type(a);
496 503
497 if (tb[OVS_VXLAN_EXT_GBP]) 504 if (type > OVS_VXLAN_EXT_MAX) {
498 opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); 505 OVS_NLERR(log, "VXLAN extension %d out of range max %d",
506 type, OVS_VXLAN_EXT_MAX);
507 return -EINVAL;
508 }
509
510 if (!check_attr_len(nla_len(a),
511 ovs_vxlan_ext_key_lens[type].len)) {
512 OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
513 type, nla_len(a),
514 ovs_vxlan_ext_key_lens[type].len);
515 return -EINVAL;
516 }
517
518 switch (type) {
519 case OVS_VXLAN_EXT_GBP:
520 opts.gbp = nla_get_u32(a);
521 break;
522 default:
523 OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
524 type);
525 return -EINVAL;
526 }
527 }
528 if (rem) {
529 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
530 rem);
531 return -EINVAL;
532 }
499 533
500 if (!is_mask) 534 if (!is_mask)
501 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); 535 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
@@ -528,8 +562,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
528 return -EINVAL; 562 return -EINVAL;
529 } 563 }
530 564
531 if (ovs_tunnel_key_lens[type].len != nla_len(a) && 565 if (!check_attr_len(nla_len(a),
532 ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { 566 ovs_tunnel_key_lens[type].len)) {
533 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", 567 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
534 type, nla_len(a), ovs_tunnel_key_lens[type].len); 568 type, nla_len(a), ovs_tunnel_key_lens[type].len);
535 return -EINVAL; 569 return -EINVAL;
@@ -1052,10 +1086,13 @@ static void nlattr_set(struct nlattr *attr, u8 val,
1052 1086
1053 /* The nlattr stream should already have been validated */ 1087 /* The nlattr stream should already have been validated */
1054 nla_for_each_nested(nla, attr, rem) { 1088 nla_for_each_nested(nla, attr, rem) {
1055 if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) 1089 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) {
1056 nlattr_set(nla, val, tbl[nla_type(nla)].next); 1090 if (tbl[nla_type(nla)].next)
1057 else 1091 tbl = tbl[nla_type(nla)].next;
1092 nlattr_set(nla, val, tbl);
1093 } else {
1058 memset(nla_data(nla), val, nla_len(nla)); 1094 memset(nla_data(nla), val, nla_len(nla));
1095 }
1059 } 1096 }
1060} 1097}
1061 1098
@@ -1922,8 +1959,7 @@ static int validate_set(const struct nlattr *a,
1922 key_len /= 2; 1959 key_len /= 2;
1923 1960
1924 if (key_type > OVS_KEY_ATTR_MAX || 1961 if (key_type > OVS_KEY_ATTR_MAX ||
1925 (ovs_key_lens[key_type].len != key_len && 1962 !check_attr_len(key_len, ovs_key_lens[key_type].len))
1926 ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
1927 return -EINVAL; 1963 return -EINVAL;
1928 1964
1929 if (masked && !validate_masked(nla_data(ovs_key), key_len)) 1965 if (masked && !validate_masked(nla_data(ovs_key), key_len))
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index d22d8e948d0f..f2ea83ba4763 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -57,20 +57,21 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range)
57} 57}
58 58
59void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, 59void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
60 const struct sw_flow_mask *mask) 60 bool full, const struct sw_flow_mask *mask)
61{ 61{
62 const long *m = (const long *)((const u8 *)&mask->key + 62 int start = full ? 0 : mask->range.start;
63 mask->range.start); 63 int len = full ? sizeof *dst : range_n_bytes(&mask->range);
64 const long *s = (const long *)((const u8 *)src + 64 const long *m = (const long *)((const u8 *)&mask->key + start);
65 mask->range.start); 65 const long *s = (const long *)((const u8 *)src + start);
66 long *d = (long *)((u8 *)dst + mask->range.start); 66 long *d = (long *)((u8 *)dst + start);
67 int i; 67 int i;
68 68
69 /* The memory outside of the 'mask->range' are not set since 69 /* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
70 * further operations on 'dst' only uses contents within 70 * if 'full' is false the memory outside of the 'mask->range' is left
71 * 'mask->range'. 71 * uninitialized. This can be used as an optimization when further
72 * operations on 'dst' only use contents within 'mask->range'.
72 */ 73 */
73 for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) 74 for (i = 0; i < len; i += sizeof(long))
74 *d++ = *s++ & *m++; 75 *d++ = *s++ & *m++;
75} 76}
76 77
@@ -475,7 +476,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
475 u32 hash; 476 u32 hash;
476 struct sw_flow_key masked_key; 477 struct sw_flow_key masked_key;
477 478
478 ovs_flow_mask_key(&masked_key, unmasked, mask); 479 ovs_flow_mask_key(&masked_key, unmasked, false, mask);
479 hash = flow_hash(&masked_key, &mask->range); 480 hash = flow_hash(&masked_key, &mask->range);
480 head = find_bucket(ti, hash); 481 head = find_bucket(ti, hash);
481 hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { 482 hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 616eda10d955..2dd9900f533d 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -86,5 +86,5 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *,
86bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); 86bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
87 87
88void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, 88void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
89 const struct sw_flow_mask *mask); 89 bool full, const struct sw_flow_mask *mask);
90#endif /* flow_table.h */ 90#endif /* flow_table.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 7b8e39a22387..aa4b15c35884 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -230,6 +230,8 @@ struct packet_skb_cb {
230 } sa; 230 } sa;
231}; 231};
232 232
233#define vio_le() virtio_legacy_is_little_endian()
234
233#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) 235#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
234 236
235#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) 237#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
@@ -2680,15 +2682,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2680 goto out_unlock; 2682 goto out_unlock;
2681 2683
2682 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 2684 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2683 (__virtio16_to_cpu(false, vnet_hdr.csum_start) + 2685 (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
2684 __virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2 > 2686 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 >
2685 __virtio16_to_cpu(false, vnet_hdr.hdr_len))) 2687 __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len)))
2686 vnet_hdr.hdr_len = __cpu_to_virtio16(false, 2688 vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(),
2687 __virtio16_to_cpu(false, vnet_hdr.csum_start) + 2689 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
2688 __virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2); 2690 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2);
2689 2691
2690 err = -EINVAL; 2692 err = -EINVAL;
2691 if (__virtio16_to_cpu(false, vnet_hdr.hdr_len) > len) 2693 if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len)
2692 goto out_unlock; 2694 goto out_unlock;
2693 2695
2694 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { 2696 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
@@ -2731,7 +2733,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2731 hlen = LL_RESERVED_SPACE(dev); 2733 hlen = LL_RESERVED_SPACE(dev);
2732 tlen = dev->needed_tailroom; 2734 tlen = dev->needed_tailroom;
2733 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, 2735 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len,
2734 __virtio16_to_cpu(false, vnet_hdr.hdr_len), 2736 __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len),
2735 msg->msg_flags & MSG_DONTWAIT, &err); 2737 msg->msg_flags & MSG_DONTWAIT, &err);
2736 if (skb == NULL) 2738 if (skb == NULL)
2737 goto out_unlock; 2739 goto out_unlock;
@@ -2778,8 +2780,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2778 2780
2779 if (po->has_vnet_hdr) { 2781 if (po->has_vnet_hdr) {
2780 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2782 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2781 u16 s = __virtio16_to_cpu(false, vnet_hdr.csum_start); 2783 u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start);
2782 u16 o = __virtio16_to_cpu(false, vnet_hdr.csum_offset); 2784 u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset);
2783 if (!skb_partial_csum_set(skb, s, o)) { 2785 if (!skb_partial_csum_set(skb, s, o)) {
2784 err = -EINVAL; 2786 err = -EINVAL;
2785 goto out_free; 2787 goto out_free;
@@ -2787,7 +2789,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2787 } 2789 }
2788 2790
2789 skb_shinfo(skb)->gso_size = 2791 skb_shinfo(skb)->gso_size =
2790 __virtio16_to_cpu(false, vnet_hdr.gso_size); 2792 __virtio16_to_cpu(vio_le(), vnet_hdr.gso_size);
2791 skb_shinfo(skb)->gso_type = gso_type; 2793 skb_shinfo(skb)->gso_type = gso_type;
2792 2794
2793 /* Header must be checked, and gso_segs computed. */ 2795 /* Header must be checked, and gso_segs computed. */
@@ -3161,9 +3163,9 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3161 3163
3162 /* This is a hint as to how much should be linear. */ 3164 /* This is a hint as to how much should be linear. */
3163 vnet_hdr.hdr_len = 3165 vnet_hdr.hdr_len =
3164 __cpu_to_virtio16(false, skb_headlen(skb)); 3166 __cpu_to_virtio16(vio_le(), skb_headlen(skb));
3165 vnet_hdr.gso_size = 3167 vnet_hdr.gso_size =
3166 __cpu_to_virtio16(false, sinfo->gso_size); 3168 __cpu_to_virtio16(vio_le(), sinfo->gso_size);
3167 if (sinfo->gso_type & SKB_GSO_TCPV4) 3169 if (sinfo->gso_type & SKB_GSO_TCPV4)
3168 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 3170 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
3169 else if (sinfo->gso_type & SKB_GSO_TCPV6) 3171 else if (sinfo->gso_type & SKB_GSO_TCPV6)
@@ -3181,9 +3183,9 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3181 3183
3182 if (skb->ip_summed == CHECKSUM_PARTIAL) { 3184 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3183 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 3185 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
3184 vnet_hdr.csum_start = __cpu_to_virtio16(false, 3186 vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(),
3185 skb_checksum_start_offset(skb)); 3187 skb_checksum_start_offset(skb));
3186 vnet_hdr.csum_offset = __cpu_to_virtio16(false, 3188 vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(),
3187 skb->csum_offset); 3189 skb->csum_offset);
3188 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { 3190 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
3189 vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; 3191 vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 715e01e5910a..f23a3b68bba6 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -33,7 +33,6 @@
33 33
34struct fw_head { 34struct fw_head {
35 u32 mask; 35 u32 mask;
36 bool mask_set;
37 struct fw_filter __rcu *ht[HTSIZE]; 36 struct fw_filter __rcu *ht[HTSIZE];
38 struct rcu_head rcu; 37 struct rcu_head rcu;
39}; 38};
@@ -84,7 +83,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
84 } 83 }
85 } 84 }
86 } else { 85 } else {
87 /* old method */ 86 /* Old method: classify the packet using its skb mark. */
88 if (id && (TC_H_MAJ(id) == 0 || 87 if (id && (TC_H_MAJ(id) == 0 ||
89 !(TC_H_MAJ(id ^ tp->q->handle)))) { 88 !(TC_H_MAJ(id ^ tp->q->handle)))) {
90 res->classid = id; 89 res->classid = id;
@@ -114,14 +113,9 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
114 113
115static int fw_init(struct tcf_proto *tp) 114static int fw_init(struct tcf_proto *tp)
116{ 115{
117 struct fw_head *head; 116 /* We don't allocate fw_head here, because in the old method
118 117 * we don't need it at all.
119 head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); 118 */
120 if (head == NULL)
121 return -ENOBUFS;
122
123 head->mask_set = false;
124 rcu_assign_pointer(tp->root, head);
125 return 0; 119 return 0;
126} 120}
127 121
@@ -252,7 +246,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
252 int err; 246 int err;
253 247
254 if (!opt) 248 if (!opt)
255 return handle ? -EINVAL : 0; 249 return handle ? -EINVAL : 0; /* Succeed if it is old method. */
256 250
257 err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy); 251 err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
258 if (err < 0) 252 if (err < 0)
@@ -302,11 +296,17 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
302 if (!handle) 296 if (!handle)
303 return -EINVAL; 297 return -EINVAL;
304 298
305 if (!head->mask_set) { 299 if (!head) {
306 head->mask = 0xFFFFFFFF; 300 u32 mask = 0xFFFFFFFF;
307 if (tb[TCA_FW_MASK]) 301 if (tb[TCA_FW_MASK])
308 head->mask = nla_get_u32(tb[TCA_FW_MASK]); 302 mask = nla_get_u32(tb[TCA_FW_MASK]);
309 head->mask_set = true; 303
304 head = kzalloc(sizeof(*head), GFP_KERNEL);
305 if (!head)
306 return -ENOBUFS;
307 head->mask = mask;
308
309 rcu_assign_pointer(tp->root, head);
310 } 310 }
311 311
312 f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); 312 f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index b7143337e4fa..3d9ea9a48289 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1186,7 +1186,7 @@ static void sctp_v4_del_protocol(void)
1186 unregister_inetaddr_notifier(&sctp_inetaddr_notifier); 1186 unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
1187} 1187}
1188 1188
1189static int __net_init sctp_net_init(struct net *net) 1189static int __net_init sctp_defaults_init(struct net *net)
1190{ 1190{
1191 int status; 1191 int status;
1192 1192
@@ -1279,12 +1279,6 @@ static int __net_init sctp_net_init(struct net *net)
1279 1279
1280 sctp_dbg_objcnt_init(net); 1280 sctp_dbg_objcnt_init(net);
1281 1281
1282 /* Initialize the control inode/socket for handling OOTB packets. */
1283 if ((status = sctp_ctl_sock_init(net))) {
1284 pr_err("Failed to initialize the SCTP control sock\n");
1285 goto err_ctl_sock_init;
1286 }
1287
1288 /* Initialize the local address list. */ 1282 /* Initialize the local address list. */
1289 INIT_LIST_HEAD(&net->sctp.local_addr_list); 1283 INIT_LIST_HEAD(&net->sctp.local_addr_list);
1290 spin_lock_init(&net->sctp.local_addr_lock); 1284 spin_lock_init(&net->sctp.local_addr_lock);
@@ -1300,9 +1294,6 @@ static int __net_init sctp_net_init(struct net *net)
1300 1294
1301 return 0; 1295 return 0;
1302 1296
1303err_ctl_sock_init:
1304 sctp_dbg_objcnt_exit(net);
1305 sctp_proc_exit(net);
1306err_init_proc: 1297err_init_proc:
1307 cleanup_sctp_mibs(net); 1298 cleanup_sctp_mibs(net);
1308err_init_mibs: 1299err_init_mibs:
@@ -1311,15 +1302,12 @@ err_sysctl_register:
1311 return status; 1302 return status;
1312} 1303}
1313 1304
1314static void __net_exit sctp_net_exit(struct net *net) 1305static void __net_exit sctp_defaults_exit(struct net *net)
1315{ 1306{
1316 /* Free the local address list */ 1307 /* Free the local address list */
1317 sctp_free_addr_wq(net); 1308 sctp_free_addr_wq(net);
1318 sctp_free_local_addr_list(net); 1309 sctp_free_local_addr_list(net);
1319 1310
1320 /* Free the control endpoint. */
1321 inet_ctl_sock_destroy(net->sctp.ctl_sock);
1322
1323 sctp_dbg_objcnt_exit(net); 1311 sctp_dbg_objcnt_exit(net);
1324 1312
1325 sctp_proc_exit(net); 1313 sctp_proc_exit(net);
@@ -1327,9 +1315,32 @@ static void __net_exit sctp_net_exit(struct net *net)
1327 sctp_sysctl_net_unregister(net); 1315 sctp_sysctl_net_unregister(net);
1328} 1316}
1329 1317
1330static struct pernet_operations sctp_net_ops = { 1318static struct pernet_operations sctp_defaults_ops = {
1331 .init = sctp_net_init, 1319 .init = sctp_defaults_init,
1332 .exit = sctp_net_exit, 1320 .exit = sctp_defaults_exit,
1321};
1322
1323static int __net_init sctp_ctrlsock_init(struct net *net)
1324{
1325 int status;
1326
1327 /* Initialize the control inode/socket for handling OOTB packets. */
1328 status = sctp_ctl_sock_init(net);
1329 if (status)
1330 pr_err("Failed to initialize the SCTP control sock\n");
1331
1332 return status;
1333}
1334
1335static void __net_init sctp_ctrlsock_exit(struct net *net)
1336{
1337 /* Free the control endpoint. */
1338 inet_ctl_sock_destroy(net->sctp.ctl_sock);
1339}
1340
1341static struct pernet_operations sctp_ctrlsock_ops = {
1342 .init = sctp_ctrlsock_init,
1343 .exit = sctp_ctrlsock_exit,
1333}; 1344};
1334 1345
1335/* Initialize the universe into something sensible. */ 1346/* Initialize the universe into something sensible. */
@@ -1462,8 +1473,11 @@ static __init int sctp_init(void)
1462 sctp_v4_pf_init(); 1473 sctp_v4_pf_init();
1463 sctp_v6_pf_init(); 1474 sctp_v6_pf_init();
1464 1475
1465 status = sctp_v4_protosw_init(); 1476 status = register_pernet_subsys(&sctp_defaults_ops);
1477 if (status)
1478 goto err_register_defaults;
1466 1479
1480 status = sctp_v4_protosw_init();
1467 if (status) 1481 if (status)
1468 goto err_protosw_init; 1482 goto err_protosw_init;
1469 1483
@@ -1471,9 +1485,9 @@ static __init int sctp_init(void)
1471 if (status) 1485 if (status)
1472 goto err_v6_protosw_init; 1486 goto err_v6_protosw_init;
1473 1487
1474 status = register_pernet_subsys(&sctp_net_ops); 1488 status = register_pernet_subsys(&sctp_ctrlsock_ops);
1475 if (status) 1489 if (status)
1476 goto err_register_pernet_subsys; 1490 goto err_register_ctrlsock;
1477 1491
1478 status = sctp_v4_add_protocol(); 1492 status = sctp_v4_add_protocol();
1479 if (status) 1493 if (status)
@@ -1489,12 +1503,14 @@ out:
1489err_v6_add_protocol: 1503err_v6_add_protocol:
1490 sctp_v4_del_protocol(); 1504 sctp_v4_del_protocol();
1491err_add_protocol: 1505err_add_protocol:
1492 unregister_pernet_subsys(&sctp_net_ops); 1506 unregister_pernet_subsys(&sctp_ctrlsock_ops);
1493err_register_pernet_subsys: 1507err_register_ctrlsock:
1494 sctp_v6_protosw_exit(); 1508 sctp_v6_protosw_exit();
1495err_v6_protosw_init: 1509err_v6_protosw_init:
1496 sctp_v4_protosw_exit(); 1510 sctp_v4_protosw_exit();
1497err_protosw_init: 1511err_protosw_init:
1512 unregister_pernet_subsys(&sctp_defaults_ops);
1513err_register_defaults:
1498 sctp_v4_pf_exit(); 1514 sctp_v4_pf_exit();
1499 sctp_v6_pf_exit(); 1515 sctp_v6_pf_exit();
1500 sctp_sysctl_unregister(); 1516 sctp_sysctl_unregister();
@@ -1527,12 +1543,14 @@ static __exit void sctp_exit(void)
1527 sctp_v6_del_protocol(); 1543 sctp_v6_del_protocol();
1528 sctp_v4_del_protocol(); 1544 sctp_v4_del_protocol();
1529 1545
1530 unregister_pernet_subsys(&sctp_net_ops); 1546 unregister_pernet_subsys(&sctp_ctrlsock_ops);
1531 1547
1532 /* Free protosw registrations */ 1548 /* Free protosw registrations */
1533 sctp_v6_protosw_exit(); 1549 sctp_v6_protosw_exit();
1534 sctp_v4_protosw_exit(); 1550 sctp_v4_protosw_exit();
1535 1551
1552 unregister_pernet_subsys(&sctp_defaults_ops);
1553
1536 /* Unregister with socket layer. */ 1554 /* Unregister with socket layer. */
1537 sctp_v6_pf_exit(); 1555 sctp_v6_pf_exit();
1538 sctp_v4_pf_exit(); 1556 sctp_v4_pf_exit();
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b140c092d226..f14f24ee9983 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -297,7 +297,7 @@ static int rpc_complete_task(struct rpc_task *task)
297 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); 297 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
298 ret = atomic_dec_and_test(&task->tk_count); 298 ret = atomic_dec_and_test(&task->tk_count);
299 if (waitqueue_active(wq)) 299 if (waitqueue_active(wq))
300 __wake_up_locked_key(wq, TASK_NORMAL, 1, &k); 300 __wake_up_locked_key(wq, TASK_NORMAL, &k);
301 spin_unlock_irqrestore(&wq->lock, flags); 301 spin_unlock_irqrestore(&wq->lock, flags);
302 return ret; 302 return ret;
303} 303}
@@ -1092,14 +1092,10 @@ void
1092rpc_destroy_mempool(void) 1092rpc_destroy_mempool(void)
1093{ 1093{
1094 rpciod_stop(); 1094 rpciod_stop();
1095 if (rpc_buffer_mempool) 1095 mempool_destroy(rpc_buffer_mempool);
1096 mempool_destroy(rpc_buffer_mempool); 1096 mempool_destroy(rpc_task_mempool);
1097 if (rpc_task_mempool) 1097 kmem_cache_destroy(rpc_task_slabp);
1098 mempool_destroy(rpc_task_mempool); 1098 kmem_cache_destroy(rpc_buffer_slabp);
1099 if (rpc_task_slabp)
1100 kmem_cache_destroy(rpc_task_slabp);
1101 if (rpc_buffer_slabp)
1102 kmem_cache_destroy(rpc_buffer_slabp);
1103 rpc_destroy_wait_queue(&delay_queue); 1099 rpc_destroy_wait_queue(&delay_queue);
1104} 1100}
1105 1101
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ab5dd621ae0c..2e98f4a243e5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -614,6 +614,7 @@ static void xprt_autoclose(struct work_struct *work)
614 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 614 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
615 xprt->ops->close(xprt); 615 xprt->ops->close(xprt);
616 xprt_release_write(xprt, NULL); 616 xprt_release_write(xprt, NULL);
617 wake_up_bit(&xprt->state, XPRT_LOCKED);
617} 618}
618 619
619/** 620/**
@@ -723,6 +724,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
723 xprt->ops->release_xprt(xprt, NULL); 724 xprt->ops->release_xprt(xprt, NULL);
724out: 725out:
725 spin_unlock_bh(&xprt->transport_lock); 726 spin_unlock_bh(&xprt->transport_lock);
727 wake_up_bit(&xprt->state, XPRT_LOCKED);
726} 728}
727 729
728/** 730/**
@@ -1394,6 +1396,10 @@ out:
1394static void xprt_destroy(struct rpc_xprt *xprt) 1396static void xprt_destroy(struct rpc_xprt *xprt)
1395{ 1397{
1396 dprintk("RPC: destroying transport %p\n", xprt); 1398 dprintk("RPC: destroying transport %p\n", xprt);
1399
1400 /* Exclude transport connect/disconnect handlers */
1401 wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
1402
1397 del_timer_sync(&xprt->timer); 1403 del_timer_sync(&xprt->timer);
1398 1404
1399 rpc_xprt_debugfs_unregister(xprt); 1405 rpc_xprt_debugfs_unregister(xprt);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7be90bc1a7c2..1a85e0ed0b48 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -777,7 +777,6 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
777 xs_sock_reset_connection_flags(xprt); 777 xs_sock_reset_connection_flags(xprt);
778 /* Mark transport as closed and wake up all pending tasks */ 778 /* Mark transport as closed and wake up all pending tasks */
779 xprt_disconnect_done(xprt); 779 xprt_disconnect_done(xprt);
780 xprt_force_disconnect(xprt);
781} 780}
782 781
783/** 782/**
@@ -881,8 +880,11 @@ static void xs_xprt_free(struct rpc_xprt *xprt)
881 */ 880 */
882static void xs_destroy(struct rpc_xprt *xprt) 881static void xs_destroy(struct rpc_xprt *xprt)
883{ 882{
883 struct sock_xprt *transport = container_of(xprt,
884 struct sock_xprt, xprt);
884 dprintk("RPC: xs_destroy xprt %p\n", xprt); 885 dprintk("RPC: xs_destroy xprt %p\n", xprt);
885 886
887 cancel_delayed_work_sync(&transport->connect_worker);
886 xs_close(xprt); 888 xs_close(xprt);
887 xs_xprt_free(xprt); 889 xs_xprt_free(xprt);
888 module_put(THIS_MODULE); 890 module_put(THIS_MODULE);
@@ -1435,6 +1437,7 @@ out:
1435static void xs_tcp_state_change(struct sock *sk) 1437static void xs_tcp_state_change(struct sock *sk)
1436{ 1438{
1437 struct rpc_xprt *xprt; 1439 struct rpc_xprt *xprt;
1440 struct sock_xprt *transport;
1438 1441
1439 read_lock_bh(&sk->sk_callback_lock); 1442 read_lock_bh(&sk->sk_callback_lock);
1440 if (!(xprt = xprt_from_sock(sk))) 1443 if (!(xprt = xprt_from_sock(sk)))
@@ -1446,13 +1449,12 @@ static void xs_tcp_state_change(struct sock *sk)
1446 sock_flag(sk, SOCK_ZAPPED), 1449 sock_flag(sk, SOCK_ZAPPED),
1447 sk->sk_shutdown); 1450 sk->sk_shutdown);
1448 1451
1452 transport = container_of(xprt, struct sock_xprt, xprt);
1449 trace_rpc_socket_state_change(xprt, sk->sk_socket); 1453 trace_rpc_socket_state_change(xprt, sk->sk_socket);
1450 switch (sk->sk_state) { 1454 switch (sk->sk_state) {
1451 case TCP_ESTABLISHED: 1455 case TCP_ESTABLISHED:
1452 spin_lock(&xprt->transport_lock); 1456 spin_lock(&xprt->transport_lock);
1453 if (!xprt_test_and_set_connected(xprt)) { 1457 if (!xprt_test_and_set_connected(xprt)) {
1454 struct sock_xprt *transport = container_of(xprt,
1455 struct sock_xprt, xprt);
1456 1458
1457 /* Reset TCP record info */ 1459 /* Reset TCP record info */
1458 transport->tcp_offset = 0; 1460 transport->tcp_offset = 0;
@@ -1461,6 +1463,8 @@ static void xs_tcp_state_change(struct sock *sk)
1461 transport->tcp_flags = 1463 transport->tcp_flags =
1462 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; 1464 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
1463 xprt->connect_cookie++; 1465 xprt->connect_cookie++;
1466 clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
1467 xprt_clear_connecting(xprt);
1464 1468
1465 xprt_wake_pending_tasks(xprt, -EAGAIN); 1469 xprt_wake_pending_tasks(xprt, -EAGAIN);
1466 } 1470 }
@@ -1496,6 +1500,9 @@ static void xs_tcp_state_change(struct sock *sk)
1496 smp_mb__after_atomic(); 1500 smp_mb__after_atomic();
1497 break; 1501 break;
1498 case TCP_CLOSE: 1502 case TCP_CLOSE:
1503 if (test_and_clear_bit(XPRT_SOCK_CONNECTING,
1504 &transport->sock_state))
1505 xprt_clear_connecting(xprt);
1499 xs_sock_mark_closed(xprt); 1506 xs_sock_mark_closed(xprt);
1500 } 1507 }
1501 out: 1508 out:
@@ -2179,6 +2186,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2179 /* Tell the socket layer to start connecting... */ 2186 /* Tell the socket layer to start connecting... */
2180 xprt->stat.connect_count++; 2187 xprt->stat.connect_count++;
2181 xprt->stat.connect_start = jiffies; 2188 xprt->stat.connect_start = jiffies;
2189 set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
2182 ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); 2190 ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
2183 switch (ret) { 2191 switch (ret) {
2184 case 0: 2192 case 0:
@@ -2240,7 +2248,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2240 case -EINPROGRESS: 2248 case -EINPROGRESS:
2241 case -EALREADY: 2249 case -EALREADY:
2242 xprt_unlock_connect(xprt, transport); 2250 xprt_unlock_connect(xprt, transport);
2243 xprt_clear_connecting(xprt);
2244 return; 2251 return;
2245 case -EINVAL: 2252 case -EINVAL:
2246 /* Happens, for instance, if the user specified a link 2253 /* Happens, for instance, if the user specified a link
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 562c926a51cc..c5ac436235e0 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -539,6 +539,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
539 *err = -TIPC_ERR_NO_NAME; 539 *err = -TIPC_ERR_NO_NAME;
540 if (skb_linearize(skb)) 540 if (skb_linearize(skb))
541 return false; 541 return false;
542 msg = buf_msg(skb);
542 if (msg_reroute_cnt(msg)) 543 if (msg_reroute_cnt(msg))
543 return false; 544 return false;
544 dnode = addr_domain(net, msg_lookup_scope(msg)); 545 dnode = addr_domain(net, msg_lookup_scope(msg));